diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src')
29 files changed, 3207 insertions, 132 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index 67f4e42f386..4f2fffdf252 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -7,7 +7,7 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ glusterd-volgen.c glusterd-rebalance.c glusterd-ganesha.c \ glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \ - glusterd-replace-brick.c glusterd-log-ops.c \ + glusterd-replace-brick.c glusterd-log-ops.c glusterd-tier.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ @@ -17,7 +17,7 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ - glusterd-reset-brick.c + glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ @@ -39,6 +39,7 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \ glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ + glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index c2c4cf4606f..08a878388a3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -773,7 +773,7 @@ subvol_matcher_destroy (int *subvols) GF_FREE (subvols); } -static int +int glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo) { char key[256] = {0,}; @@ -1934,7 +1934,7 @@ out: return ret; } -static int +int glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count, dict_t *dict, glusterd_volinfo_t *volinfo, @@ -1983,7 +1983,8 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count, goto out; } - if (cmd == GF_OP_CMD_DETACH_COMMIT) { + if (cmd == GF_OP_CMD_DETACH_COMMIT || + cmd == GF_DEFRAG_CMD_DETACH_COMMIT) { snprintf (msg, sizeof (msg), "Bricks in Hot " "tier are not decommissioned yet. Use " "gluster volume tier <VOLNAME> " @@ -1993,7 +1994,8 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count, goto out; } } else { - if (cmd == GF_OP_CMD_DETACH_COMMIT && + if ((cmd == GF_OP_CMD_DETACH_COMMIT || + (cmd == GF_DEFRAG_CMD_DETACH_COMMIT)) && (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED)) { snprintf (msg, sizeof (msg), "Bricks in Hot " "tier are not decommissioned yet. Wait for " @@ -2007,7 +2009,8 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count, if (glusterd_is_local_brick (THIS, volinfo, brickinfo)) { if (((cmd == GF_OP_CMD_START) || - (cmd == GF_OP_CMD_DETACH_START)) && + (cmd == GF_OP_CMD_DETACH_START) || + (cmd == GF_DEFRAG_CMD_DETACH_START)) && brickinfo->status != GF_BRICK_STARTED) { snprintf (msg, sizeof (msg), "Found stopped " "brick %s", brick); @@ -2529,7 +2532,7 @@ out: return ret; } -static void +void glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo) { volinfo->type = volinfo->tier_info.cold_type; diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 9a4b1bf38da..364623317ef 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -4385,6 +4385,16 @@ __glusterd_handle_status_volume (rpcsvc_request_t *req) goto out; } + if ((cmd & GF_CLI_STATUS_TIERD) && + (conf->op_version < GD_OP_VERSION_3_10_0)) { + snprintf (err_str, sizeof (err_str), "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "tierd is not allowed in this state", + GD_OP_VERSION_3_6_0); + ret = -1; + goto out; + } + if ((cmd & GF_CLI_STATUS_SCRUB) && (conf->op_version < GD_OP_VERSION_3_7_0)) { snprintf (err_str, sizeof (err_str), "The cluster is operating " @@ -5977,7 +5987,6 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA}, - [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA}, [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA}, @@ -6005,6 +6014,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT, glusterd_handle_bitrot, NULL, 0, DRC_NA}, [GLUSTER_CLI_GET_STATE] = {"GET_STATE", GLUSTER_CLI_GET_STATE, glusterd_handle_get_state, NULL, 0, DRC_NA}, [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", GLUSTER_CLI_RESET_BRICK, glusterd_handle_reset_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_TIER] = {"TIER", GLUSTER_CLI_TIER, glusterd_handle_tier, NULL, 0, DRC_NA}, + [GLUSTER_CLI_REMOVE_TIER_BRICK] = {"REMOVE_TIER_BRICK", GLUSTER_CLI_REMOVE_TIER_BRICK, glusterd_handle_tier, NULL, 0, DRC_NA}, }; struct rpcsvc_program gd_svc_cli_prog = { diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 550ee2c10b3..c1392734d79 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -21,9 +21,10 @@ #include "glusterd-snapshot-utils.h" #include "glusterd-svc-mgmt.h" #include "glusterd-snapd-svc-helper.h" +#include "glusterd-tierd-svc-helper.h" +#include "glusterd-volgen.h" #include "glusterd-quotad-svc.h" #include "glusterd-messages.h" - #include "glusterfs3.h" #include "protocol-common.h" #include "rpcsvc.h" @@ -224,6 +225,28 @@ build_volfile_path (char *volume_id, char *path, } + volid_ptr = strstr (volume_id, "tierd/"); + if (volid_ptr) { + volid_ptr = strchr (volid_ptr, '/'); + if (!volid_ptr) { + ret = -1; + goto out; + } + volid_ptr++; + + ret = glusterd_volinfo_find (volid_ptr, &volinfo); + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLINFO_GET_FAIL, + "Couldn't find volinfo"); + goto out; + } + glusterd_svc_build_tierd_volfile_path (volinfo, path, path_len); + ret = 0; + goto out; + + } + volid_ptr = strstr (volume_id, "gluster/"); if (volid_ptr) { volid_ptr = strchr (volid_ptr, '/'); diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index c8c48d42316..00de88f4e36 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 589 +#define GLFS_NUM_MESSAGES 595 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4761,6 +4761,61 @@ */ #define GD_MSG_NFS_GANESHA_DISABLED (GLUSTERD_COMP_BASE + 589) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_TIERD_STOP_FAIL (GLUSTERD_COMP_BASE + 590) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_TIERD_CREATE_FAIL (GLUSTERD_COMP_BASE + 591) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_TIERD_START_FAIL (GLUSTERD_COMP_BASE + 592) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_TIERD_OBJ_GET_FAIL (GLUSTERD_COMP_BASE + 593) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_TIERD_NOT_RUNNING (GLUSTERD_COMP_BASE + 594) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_TIERD_INIT_FAIL (GLUSTERD_COMP_BASE + 595) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c index b7b1a3fcbfd..7a7db069b6e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -188,6 +188,18 @@ gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, goto out; } break; + case GD_OP_TIER_START_STOP: + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_REMOVE_TIER_BRICK: + ret = glusterd_op_stage_tier (dict, op_errstr, rsp_dict); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_COMMAND_NOT_FOUND, "tier " + "prevalidation failed"); + goto out; + } + break; case GD_OP_RESET_BRICK: ret = glusterd_reset_brick_prevalidate (dict, op_errstr, @@ -256,6 +268,7 @@ gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, { int32_t ret = -1; xlator_t *this = NULL; + int32_t cmd = 0; this = THIS; GF_ASSERT (this); @@ -336,6 +349,49 @@ gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, } break; } + case GD_OP_TIER_START_STOP: + { + ret = glusterd_op_tier_start_stop (dict, op_errstr, + rsp_dict); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_COMMIT_OP_FAIL, + "tier commit failed."); + goto out; + } + break; + } + case GD_OP_REMOVE_TIER_BRICK: + { + ret = glusterd_op_remove_tier_brick (dict, op_errstr, + rsp_dict); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_COMMIT_OP_FAIL, + "tier detach commit failed."); + goto out; + } + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_msg_debug (this->name, 0, "cmd not found"); + goto out; + } + + if (cmd != GF_DEFRAG_CMD_DETACH_STOP) + break; + } + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_TIER_STATUS: + { + ret = glusterd_op_tier_status (dict, op_errstr, + rsp_dict, op); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_COMMIT_OP_FAIL, + "tier status commit failed"); + goto out; + } + } default: break; @@ -355,6 +411,7 @@ gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, xlator_t *this = NULL; char *volname = NULL; glusterd_volinfo_t *volinfo = NULL; + glusterd_svc_t *svc = NULL; this = THIS; @@ -427,13 +484,11 @@ gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, } if (volinfo->type == GF_CLUSTER_TYPE_TIER) { - if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { - glusterd_defrag_info_set (volinfo, dict, - GF_DEFRAG_CMD_START_TIER, - GF_DEFRAG_CMD_START, - GD_OP_REBALANCE); - } - glusterd_restart_rebalance_for_volume (volinfo); + svc = &(volinfo->tierd.svc); + ret = svc->manager (svc, volinfo, + PROC_START_NO_WAIT); + if (ret) + goto out; } break; } @@ -717,6 +772,10 @@ glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, "response dictionaries."); goto out; } + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_TIER_START_STOP: + case GD_OP_REMOVE_TIER_BRICK: break; case GD_OP_MAX_OPVERSION: break; @@ -1046,8 +1105,14 @@ glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, dict_copy (dict, req_dict); } break; + case GD_OP_TIER_START_STOP: + case GD_OP_REMOVE_TIER_BRICK: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_TIER_STATUS: + dict_copy (dict, req_dict); + break; default: - break; + break; } *req = req_dict; @@ -1435,6 +1500,7 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, uuid_t peer_uuid = {0}; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; + int32_t count = 0; this = THIS; GF_ASSERT (this); @@ -1488,6 +1554,7 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, goto out; } + dict_unref (rsp_dict); rsp_dict = NULL; @@ -1504,8 +1571,25 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, if (peerinfo->generation > txn_generation) continue; - if (!peerinfo->connected) + if (!peerinfo->connected) { + if (op == GD_OP_TIER_STATUS || op == + GD_OP_DETACH_TIER_STATUS) { + ret = dict_get_int32 (args.dict, "count", + &count); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "failed to get index"); + count++; + ret = dict_set_int32 (args.dict, "count", + count); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "failed to set index"); + } continue; + } if (op != GD_OP_SYNC_VOLUME && peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) continue; @@ -1538,6 +1622,7 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, gf_msg_debug (this->name, 0, "Sent commit req for %s to %d " "peers. Returning %d", gd_op_list[op], peer_cnt, ret); out: + glusterd_op_modify_op_ctx (op, op_ctx); return ret; } @@ -1684,7 +1769,12 @@ glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict, } /* Copy the contents of dict like missed snaps info to req_dict */ - dict_copy (dict, req_dict); + if (op != GD_OP_REMOVE_TIER_BRICK) + /* dict and req_dict has the same values during remove tier + * brick (detach start) So this rewrite make the remove brick + * id to become empty. + * Avoiding to copy it retains the value. */ + dict_copy (dict, req_dict); /* Post Validation on local node */ ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index ffae6296404..5650bb2e7be 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -79,6 +79,10 @@ int glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr, struct cds_list_head *selected); +int +glusterd_bricks_select_tier_volume (dict_t *dict, char **op_errstr, + struct cds_list_head *selected); + int32_t glusterd_txn_opinfo_dict_init () @@ -593,6 +597,8 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin } break; case GD_OP_REBALANCE: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_TIER_STATUS: case GD_OP_DEFRAG_BRICK_VOLUME: brick_req = GF_CALLOC (1, sizeof (*brick_req), gf_gld_mt_mop_brick_req_t); @@ -1662,6 +1668,16 @@ glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr) goto out; } + if ((cmd & GF_CLI_STATUS_TIERD) && + (priv->op_version < GD_OP_VERSION_3_10_0)) { + snprintf (msg, sizeof (msg), "The cluster is operating at " + "version less than %d. Getting the " + "status of tierd is not allowed in this state.", + GD_OP_VERSION_3_10_0); + ret = -1; + goto out; + } + if ((cmd & GF_CLI_STATUS_SNAPD) && (priv->op_version < GD_OP_VERSION_3_6_0)) { snprintf (msg, sizeof (msg), "The cluster is operating at " @@ -1743,6 +1759,13 @@ glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr) "bitrot enabled", volname); goto out; } + } else if ((cmd & GF_CLI_STATUS_TIERD) != 0) { + if (!glusterd_is_tierd_enabled (volinfo)) { + ret = -1; + snprintf (msg, sizeof (msg), "Volume %s does not have " + "tierd enabled.", volname); + goto out; + } } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) { if (!glusterd_is_bitrot_enabled (volinfo)) { ret = -1; @@ -1997,6 +2020,12 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key, if (ret) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->reconfigure (volinfo); + if (ret) + goto out; + } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { @@ -2780,6 +2809,12 @@ glusterd_op_set_volume (dict_t *dict, char **errstr) if (ret) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->reconfigure (volinfo); + if (ret) + goto out; + } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2817,6 +2852,13 @@ glusterd_op_set_volume (dict_t *dict, char **errstr) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->reconfigure (volinfo); + if (ret) + goto out; + } + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -3144,7 +3186,7 @@ _add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) GF_ASSERT (this); switch (op) { - case GD_OP_DETACH_TIER: + case GD_OP_REMOVE_TIER_BRICK: case GD_OP_REMOVE_BRICK: snprintf (key, sizeof (key), "task%d", index); ret = _add_remove_bricks_to_dict (dict, volinfo, key); @@ -3213,20 +3255,26 @@ glusterd_aggregate_task_status (dict_t *rsp_dict, glusterd_volinfo_t *volinfo) int ret = -1; int tasks = 0; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; if (!gf_uuid_is_null (volinfo->rebal.rebalance_id)) { if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + if (conf->op_version > GD_OP_VERSION_3_10_0) + goto done; if (volinfo->rebal.op == GD_OP_REMOVE_BRICK) - ret = _add_task_to_dict (rsp_dict, volinfo, - GD_OP_DETACH_TIER, - tasks); + ret = _add_task_to_dict (rsp_dict, + volinfo, + GD_OP_REMOVE_TIER_BRICK, + tasks); else if (volinfo->rebal.op == GD_OP_REBALANCE) - ret = _add_task_to_dict (rsp_dict, volinfo, - GD_OP_TIER_MIGRATE, - tasks); + ret = _add_task_to_dict (rsp_dict, + volinfo, + GD_OP_TIER_MIGRATE, + tasks); } else ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, tasks); @@ -3239,7 +3287,7 @@ glusterd_aggregate_task_status (dict_t *rsp_dict, glusterd_volinfo_t *volinfo) } tasks++; } - +done: ret = dict_set_int32 (rsp_dict, "tasks", tasks); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -3358,6 +3406,13 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, goto out; other_count++; node_count++; + } else if ((cmd & GF_CLI_STATUS_TIERD) != 0) { + ret = glusterd_add_tierd_to_dict (volinfo, rsp_dict, + other_index); + if (ret) + goto out; + other_count++; + node_count++; } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) { ret = glusterd_add_snapd_to_dict (volinfo, rsp_dict, other_index); @@ -3424,6 +3479,17 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, node_count++; } + if (glusterd_is_tierd_enabled (volinfo)) { + ret = glusterd_add_tierd_to_dict (volinfo, + rsp_dict, + other_index); + if (ret) + goto out; + other_count++; + other_index++; + node_count++; + } + nfs_disabled = dict_get_str_boolean (vol_opts, NFS_DISABLE_MAP_KEY, _gf_false); @@ -4181,6 +4247,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) break; case GD_OP_REMOVE_BRICK: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_REMOVE_TIER_BRICK: { dict_t *dict = ctx; ret = dict_get_str (dict, "volname", &volname); @@ -4240,6 +4308,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_BARRIER: case GD_OP_BITROT: + case GD_OP_TIER_START_STOP: + case GD_OP_TIER_STATUS: case GD_OP_SCRUB_STATUS: case GD_OP_SCRUB_ONDEMAND: case GD_OP_RESET_BRICK: @@ -4840,6 +4910,9 @@ glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx) * same */ case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_TIER_STATUS: + case GD_OP_REMOVE_TIER_BRICK: + case GD_OP_DETACH_TIER_STATUS: case GD_OP_SCRUB_STATUS: case GD_OP_SCRUB_ONDEMAND: ret = dict_get_int32 (op_ctx, "count", &count); @@ -5557,6 +5630,8 @@ glusterd_need_brick_op (glusterd_op_t op) switch (op) { case GD_OP_PROFILE_VOLUME: case GD_OP_STATUS_VOLUME: + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_HEAL_VOLUME: case GD_OP_SCRUB_STATUS: @@ -6069,8 +6144,9 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, goto out; } - if (command == GF_OP_CMD_DETACH_START) - return glusterd_bricks_select_rebalance_volume(dict, op_errstr, selected); + if (command == GF_DEFRAG_CMD_DETACH_START) + return glusterd_bricks_select_tier_volume(dict, op_errstr, + selected); ret = dict_get_int32 (dict, "force", &force); if (ret) { @@ -6825,6 +6901,67 @@ out: } int +glusterd_bricks_select_tier_volume (dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char msg[2048] = {0,}; + glusterd_pending_node_t *pending_node = NULL; + glusterd_brickinfo_t *brick = NULL; + gf_boolean_t retval = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, "volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", + volname); + + *op_errstr = gf_strdup (msg); + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_VOL_NOT_FOUND, "%s", msg); + goto out; + } + /*check if this node needs tierd*/ + cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) { + if (gf_uuid_compare (MY_UUID, brick->uuid) == 0) { + retval = _gf_true; + break; + } + } + + if (!retval) + goto out; + + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = volinfo; + pending_node->type = GD_NODE_TIERD; + cds_list_add_tail (&pending_node->list, selected); + pending_node = NULL; + } + ret = 0; + +out: + return ret; +} + +int glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr, struct cds_list_head *selected) { @@ -6913,6 +7050,7 @@ glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr, case GF_CLI_STATUS_SHD: case GF_CLI_STATUS_QUOTAD: case GF_CLI_STATUS_SNAPD: + case GF_CLI_STATUS_TIERD: case GF_CLI_STATUS_BITD: case GF_CLI_STATUS_SCRUB: break; @@ -7061,6 +7199,30 @@ glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr, cds_list_add_tail (&pending_node->list, selected); ret = 0; + } else if ((cmd & GF_CLI_STATUS_TIERD) != 0) { + if (!volinfo->tierd.svc.online) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_NOT_RUNNING, "tierd is not " + "running"); + ret = -1; + goto out; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + GD_MSG_NO_MEMORY, "failed to allocate " + "memory for pending node"); + ret = -1; + goto out; + } + + pending_node->node = (void *)(&volinfo->tierd); + pending_node->type = GD_NODE_TIERD; + pending_node->index = 0; + cds_list_add_tail (&pending_node->list, selected); + + ret = 0; } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) { if (!volinfo->snapd.svc.online) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -7384,7 +7546,12 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_bricks_select_status_volume (dict, op_errstr, selected); break; + case GD_OP_TIER_STATUS: + ret = glusterd_bricks_select_tier_volume (dict, op_errstr, + selected); + break; + case GD_OP_DETACH_TIER_STATUS: case GD_OP_DEFRAG_BRICK_VOLUME: ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr, selected); @@ -7971,11 +8138,13 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx) case GD_OP_PROFILE_VOLUME: case GD_OP_STATUS_VOLUME: case GD_OP_REBALANCE: + case GD_OP_TIER_START_STOP: case GD_OP_HEAL_VOLUME: case GD_OP_STATEDUMP_VOLUME: case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_MAX_OPVERSION: + case GD_OP_TIER_STATUS: dict_unref (ctx); break; default: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 19b1bd97e04..142f7ba89f7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -260,6 +260,12 @@ glusterd_op_init_commit_rsp_dict (glusterd_op_t op); void glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx); +void +glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo); + +int +glusterd_set_detach_bricks (dict_t *dict, glusterd_volinfo_t *volinfo); + int32_t glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, int32_t blk_count, double *throughput, double *time); @@ -299,4 +305,7 @@ glusterd_set_opinfo (char *errstr, int32_t op_errno, int32_t op_ret); int glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr); + +int32_t +glusterd_tier_op (xlator_t *this, void *data); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 36a98fc87ad..3853e148893 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1058,18 +1058,25 @@ glusterd_defrag_event_notify_handle (dict_t *dict) volname_ptr = strstr (volname, "rebalance/"); if (volname_ptr) { volname_ptr = strchr (volname_ptr, '/'); - if (!volname_ptr) { + volname = volname_ptr + 1; + } else { + volname_ptr = strstr (volname, "tierd/"); + if (volname_ptr) { + volname_ptr = strchr (volname_ptr, '/'); + if (!volname_ptr) { + ret = -1; + goto out; + } + volname = volname_ptr + 1; + } else { + + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, + "volname recieved (%s) is not prefixed with " + "rebalance or tierd.", volname); ret = -1; goto out; } - volname = volname_ptr + 1; - } else { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, - "volname received (%s) is not prefixed with rebalance.", - volname); - ret = -1; - goto out; } ret = glusterd_volinfo_find (volname, &volinfo); @@ -1081,7 +1088,7 @@ glusterd_defrag_event_notify_handle (dict_t *dict) return ret; } - ret = glusterd_defrag_volume_status_update (volinfo, dict); + ret = glusterd_defrag_volume_status_update (volinfo, dict, 0); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index dc955de635a..7eda25e6b0d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -59,6 +59,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, switch (op) { case GD_OP_DETACH_TIER: + case GD_OP_REMOVE_TIER_BRICK: case GD_OP_REMOVE_BRICK: { if (ctx) @@ -72,6 +73,8 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, break; } case GD_OP_TIER_MIGRATE: + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: case GD_OP_REBALANCE: case GD_OP_DEFRAG_BRICK_VOLUME: { @@ -146,6 +149,9 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_SCRUB_ONDEMAND: case GD_OP_RESET_BRICK: case GD_OP_MAX_OPVERSION: + case GD_OP_TIER_START_STOP: + case GD_OP_DETACH_NOT_STARTED: + { /*nothing specific to be done*/ break; @@ -2332,7 +2338,8 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, rpc = glusterd_pending_node_get_rpc (pending_node); if (!rpc) { - if (pending_node->type == GD_NODE_REBALANCE) { + if (pending_node->type == GD_NODE_REBALANCE || + pending_node->type == GD_NODE_TIERD) { opinfo.brick_pending_count = 0; ret = 0; if (req) { diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index c1fb3181b90..f83e8519ad9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -736,6 +736,15 @@ glusterd_peer_detach_cleanup (glusterd_conf_t *priv) } } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->stop (svc, SIGTERM); + if (ret) { + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_SVC_STOP_FAIL, "Failed " + "to stop tierd daemon service"); + } + } ret = glusterd_cleanup_snaps_for_volume (volinfo); if (ret) { gf_msg (THIS->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 8e37c671909..710a92c98c3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1034,6 +1034,12 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) buf); if (ret) goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->is_tier_enabled); + ret = gf_store_save_value (fd, GF_TIER_ENABLED, buf); + if (ret) + goto out; + } ret = glusterd_volume_write_tier_details (fd, volinfo); @@ -1358,6 +1364,91 @@ _gd_store_rebalance_dict (dict_t *dict, char *key, data_t *value, void *data) } int32_t +glusterd_store_state_tier_write (int fd, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char buf[PATH_MAX] = {0, }; + + GF_VALIDATE_OR_GOTO (THIS->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO (THIS->name, volinfo, out); + + /*tier counter values are stored here. so that after restart + * of glusterd tier resumes at the state is was brought down + */ + + if (volinfo->tier.defrag_cmd == GF_DEFRAG_CMD_STATUS) { + ret = 0; + goto out; + } + + snprintf (buf, sizeof (buf), "%d", volinfo->tier.defrag_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TIER_STATUS, + buf); + if (ret) + goto out; + + + snprintf (buf, sizeof (buf), "%d", volinfo->tier.op); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_TIER_DETACH_OP, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->tier.rebalance_files); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->tier.rebalance_data); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->tier.lookedup_files); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%"PRIu64, + volinfo->tier.rebalance_failures); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->tier.skipped_files); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%f", volinfo->tier.rebalance_time); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME, + buf); + if (ret) + goto out; + + gf_uuid_unparse (volinfo->tier.rebalance_id, buf); + ret = gf_store_save_value (fd, GF_TIER_TID_KEY, buf); + if (ret) + goto out; + + if (volinfo->tier.dict) { + dict_foreach (volinfo->tier.dict, _gd_store_rebalance_dict, + &fd); + } +out: + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + return ret; +} + +int32_t glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) { int ret = -1; @@ -1454,6 +1545,12 @@ glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo) if (ret) goto out; + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + ret = glusterd_store_state_tier_write (fd, volinfo); + if (ret) + goto out; + } + ret = gf_store_rename_tmppath (volinfo->node_state_shandle); if (ret) goto out; @@ -2559,6 +2656,41 @@ glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) { volinfo->rebal.rebalance_time = atoi (value); + + /* if none of the above keys match then its related to tier + * so we get the values and store it on volinfo->tier + */ + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TIER_STATUS, + strlen (GLUSTERD_STORE_KEY_VOL_TIER_STATUS))) { + volinfo->tier.defrag_status = atoi (value); + } else if (!strncmp (key, GF_TIER_TID_KEY, + strlen (GF_TIER_TID_KEY))) { + gf_uuid_parse (value, volinfo->tier.rebalance_id); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_TIER_DETACH_OP, + strlen (GLUSTERD_STORE_KEY_TIER_DETACH_OP))) { + volinfo->tier.op = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES, + strlen (GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES))) { + volinfo->tier.rebalance_files = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE, + strlen (GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE))) { + volinfo->tier.rebalance_data = atoi (value); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED, + strlen (GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED))) { + volinfo->tier.lookedup_files = atoi (value); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES, + strlen (GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES))) { + volinfo->tier.rebalance_failures = atoi (value); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED, + strlen (GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED))) { + volinfo->tier.skipped_files = atoi (value); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME, + strlen (GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME))) { + volinfo->tier.rebalance_time = atoi (value); } else { if (!tmp_dict) { tmp_dict = dict_new (); @@ -2593,8 +2725,12 @@ glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } - if (tmp_dict) - volinfo->rebal.dict = dict_ref (tmp_dict); + if (tmp_dict) { + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + volinfo->tier.dict = dict_ref (tmp_dict); + else + volinfo->rebal.dict = dict_ref (tmp_dict); + } if (op_errno != GD_STORE_EOF) { ret = -1; @@ -2609,8 +2745,12 @@ glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) out: if (dup_value) GF_FREE (dup_value); - if (ret && volinfo->rebal.dict) - dict_unref (volinfo->rebal.dict); + if (ret) { + if (volinfo->rebal.dict) + dict_unref (volinfo->rebal.dict); + else if (volinfo->tier.dict) + dict_unref (volinfo->tier.dict); + } if (tmp_dict) dict_unref (tmp_dict); @@ -2757,6 +2897,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); + } else if (!strncmp (key, GF_TIER_ENABLED, + strlen (GF_TIER_ENABLED))) { + volinfo->is_tier_enabled = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT, strlen (key))) { volinfo->tier_info.cold_brick_count = atoi (value); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index afb04cb5ec6..1c4ae097663 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -61,6 +61,8 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" #define GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION "quota-version" +#define GLUSTERD_STORE_KEY_VOL_TIER_STATUS "tier_status" +#define GLUSTERD_STORE_KEY_TIER_DETACH_OP "tier_op" #define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type" #define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count" #define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count" @@ -110,6 +112,13 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED "skipped" #define GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME "run-time" +#define GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES "migrated-files" +#define GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE "migration-size" +#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED "migration-scanned" +#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES "migration-failures" +#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped" +#define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time" + int32_t glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac); diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c index 70a12dff499..14c2fce5353 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c @@ -18,6 +18,8 @@ #include "glusterd-quotad-svc.h" #include "glusterd-nfs-svc.h" #include "glusterd-bitd-svc.h" +#include "glusterd-tierd-svc.h" +#include "glusterd-tierd-svc-helper.h" #include "glusterd-scrub-svc.h" #include "glusterd-svc-helper.h" #include "syscall.h" @@ -60,8 +62,6 @@ glusterd_svcs_reconfigure () svc_name = "scrubber"; ret = glusterd_scrubsvc_reconfigure (); - if (ret) - goto out; out: if (ret && svc_name) gf_event (EVENT_SVC_RECONFIGURE_FAILED, "svc_name=%s", @@ -99,7 +99,6 @@ glusterd_svcs_stop () goto out; ret = glusterd_svc_stop (&(priv->scrub_svc), SIGTERM); - out: return ret; } @@ -152,7 +151,6 @@ glusterd_svcs_manager (glusterd_volinfo_t *volinfo) PROC_START_NO_WAIT); if (ret == -EINVAL) ret = 0; - out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h index b5aafefc1b5..bbba5ce9ee4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h @@ -33,4 +33,13 @@ glusterd_svc_check_topology_identical (char *svc_name, glusterd_graph_builder_t builder, gf_boolean_t *identical); +int +glusterd_svc_check_tier_volfile_identical (char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); +int +glusterd_svc_check_tier_topology_identical (char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h index fe7a19385cd..c505d1e897c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h @@ -23,6 +23,7 @@ typedef int (*glusterd_svc_manager_t) (glusterd_svc_t *svc, void *data, int flags); typedef int (*glusterd_svc_start_t) (glusterd_svc_t *svc, int flags); typedef int (*glusterd_svc_stop_t) (glusterd_svc_t *svc, int sig); +typedef int (*glusterd_svc_reconfigure_t) (void *data); struct glusterd_svc_ { char name[PATH_MAX]; @@ -34,6 +35,7 @@ struct glusterd_svc_ { glusterd_svc_stop_t stop; gf_boolean_t online; gf_boolean_t inited; + glusterd_svc_reconfigure_t reconfigure; }; int diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index ffc7216b1da..970aed2924c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -318,6 +318,10 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) ret = glusterd_max_opversion_use_rsp_dict (aggr, rsp); break; + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_REMOVE_TIER_BRICK: + ret = glusterd_volume_tier_use_rsp_dict (aggr, rsp); default: break; } @@ -1705,18 +1709,29 @@ gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, ret = dict_get_int32 (req_dict, "command", &cmd); if (!ret) { if (cmd == GF_OP_CMD_DETACH_START) { + /* this change is left to support backward + * compatibility. */ op = GD_OP_REBALANCE; - ret = dict_set_int32 (req_dict, "rebalance-command", + ret = dict_set_int32 (req_dict, + "rebalance-command", GF_DEFRAG_CMD_START_DETACH_TIER); - if (ret) - goto out; + } else if (cmd == GF_DEFRAG_CMD_DETACH_START) { + op = GD_OP_REMOVE_TIER_BRICK; + ret = dict_set_int32 (req_dict, + "rebalance-command", + GF_DEFRAG_CMD_DETACH_START); } + if (ret) + goto out; } ret = gd_syncop_mgmt_brick_op (rpc, pending_node, op, req_dict, op_ctx, op_errstr); if (cmd == GF_OP_CMD_DETACH_START) { op = GD_OP_REMOVE_BRICK; dict_del (req_dict, "rebalance-command"); + } else if (cmd == GF_DEFRAG_CMD_DETACH_START) { + op = GD_OP_REMOVE_TIER_BRICK; + dict_del (req_dict, "rebalance-command"); } if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c new file mode 100644 index 00000000000..03fbbfba8ec --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-tier.c @@ -0,0 +1,1406 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-geo-rep.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "run.h" +#include "syscall.h" +#include "byte-order.h" +#include "glusterd-svc-helper.h" +#include "compat-errno.h" +#include "glusterd-tierd-svc.h" +#include "glusterd-tierd-svc-helper.h" +#include "glusterd-messages.h" +#include "glusterd-mgmt.h" +#include "glusterd-syncop.h" + +#include <sys/wait.h> +#include <dlfcn.h> + +extern struct rpc_clnt_program gd_brick_prog; + +const char *gd_tier_op_list[GF_DEFRAG_CMD_TYPE_MAX] = { + [GF_DEFRAG_CMD_START_TIER] = "start", + [GF_DEFRAG_CMD_STOP_TIER] = "stop", +}; + +int +__glusterd_handle_tier (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = { {0,} }; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_TIER_START_STOP; + char *volname = NULL; + int32_t cmd = 0; + char msg[2048] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + char err_str[2048] = {0}; + + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, req, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to get volume name"); + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get volume name, " + "while handling tier command"); + goto out; + } + + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to get the command"); + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get the cmd"); + goto out; + } + + if (conf->op_version < GD_OP_VERSION_3_7_0) { + snprintf (msg, sizeof (msg), "Cannot execute command. The " + "cluster is operating at version %d. Tier command " + "%s is unavailable in this version", conf->op_version, + gd_tier_op_list[cmd]); + ret = -1; + goto out; + } + + if (conf->op_version < GD_OP_VERSION_3_10_0) { + gf_msg_debug (this->name, 0, "The cluster is operating at " + "version less than or equal to %d. Falling back " + "to syncop framework.", + GD_OP_VERSION_3_7_5); + switch (cmd) { + case GF_DEFRAG_CMD_DETACH_STOP: + ret = dict_set_int32 (dict, "rebalance-command", + GF_DEFRAG_CMD_STOP_DETACH_TIER); + break; + + case GF_DEFRAG_CMD_DETACH_COMMIT: + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), "Volume " + "%s does not exist", volname); + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_VOL_NOT_FOUND, "%s", err_str); + goto out; + } + ret = glusterd_set_detach_bricks (dict, volinfo); + ret = dict_set_int32 (dict, "command", + GF_OP_CMD_DETACH_COMMIT); + break; + case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE: + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), "Volume " + "%s does not exist", volname); + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_VOL_NOT_FOUND, "%s", err_str); + goto out; + } + ret = glusterd_set_detach_bricks (dict, volinfo); + ret = dict_set_int32 (dict, "command", + GF_OP_CMD_DETACH_COMMIT_FORCE); + break; + case GF_DEFRAG_CMD_DETACH_START: + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), "Volume " + "%s does not exist", volname); + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_VOL_NOT_FOUND, "%s", err_str); + goto out; + } + ret = glusterd_set_detach_bricks (dict, volinfo); + ret = dict_set_int32 (dict, "command", + GF_OP_CMD_DETACH_START); + break; + + default: + break; + + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set dict"); + goto out; + } + if ((cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS) || + (cmd == GF_DEFRAG_CMD_START_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STOP)) { + ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, + dict, msg, sizeof (msg)); + } else + ret = glusterd_op_begin (req, GD_OP_REMOVE_BRICK, dict, + msg, sizeof (msg)); + + glusterd_friend_sm (); + glusterd_op_sm (); + + } else { + switch (cmd) { + case GF_DEFRAG_CMD_STATUS_TIER: + cli_op = GD_OP_TIER_STATUS; + break; + + case GF_DEFRAG_CMD_DETACH_STATUS: + cli_op = GD_OP_DETACH_TIER_STATUS; + break; + + case GF_DEFRAG_CMD_DETACH_STOP: + cli_op = GD_OP_REMOVE_TIER_BRICK; + break; + + case GF_DEFRAG_CMD_DETACH_COMMIT: + case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE: + case GF_DEFRAG_CMD_DETACH_START: + cli_op = GD_OP_REMOVE_TIER_BRICK; + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), "Volume " + "%s does not exist", volname); + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_VOL_NOT_FOUND, "%s", err_str); + goto out; + } + ret = glusterd_set_detach_bricks (dict, volinfo); + break; + + default: + break; + } + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, "dict set failed"); + goto out; + } + ret = glusterd_mgmt_v3_initiate_all_phases (req, + cli_op, + dict); + } + +out: + if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Tier operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, msg); + } + + return ret; +} + +int +glusterd_handle_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_tier); +} + + +static int +glusterd_manage_tier (glusterd_volinfo_t *volinfo, int opcode) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + switch (opcode) { + case GF_DEFRAG_CMD_START_TIER: + case GF_DEFRAG_CMD_STOP_TIER: + ret = volinfo->tierd.svc.manager (&(volinfo->tierd.svc), + volinfo, PROC_START_NO_WAIT); + break; + default: + ret = 0; + break; + } + +out: + return ret; + +} + +static int +glusterd_tier_enable (glusterd_volinfo_t *volinfo, char **op_errstr) +{ + int32_t ret = -1; + xlator_t *this = NULL; + int32_t tier_online = -1; + char pidfile[PATH_MAX] = {0}; + int32_t pid = -1; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, op_errstr, out); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + if (glusterd_is_volume_started (volinfo) == 0) { + *op_errstr = gf_strdup ("Volume is stopped, start volume " + "to enable tier."); + ret = -1; + goto out; + } + + GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv); + tier_online = gf_is_service_running (pidfile, &pid); + + if (tier_online) { + *op_errstr = gf_strdup ("tier is already enabled"); + ret = -1; + goto out; + } + + volinfo->is_tier_enabled = _gf_true; + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf (op_errstr, "Enabling tier on volume %s has been " + "unsuccessful", volinfo->volname); + return ret; +} + +static int +glusterd_tier_disable (glusterd_volinfo_t *volinfo, char **op_errstr) +{ + int32_t ret = -1; + xlator_t *this = NULL; + int32_t tier_online = -1; + char pidfile[PATH_MAX] = {0}; + int32_t pid = -1; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, op_errstr, out); + priv = this->private; + + GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv); + tier_online = gf_is_service_running (pidfile, &pid); + + if (!tier_online) { + *op_errstr = gf_strdup ("tier is already disabled"); + ret = -1; + goto out; + } + + volinfo->is_tier_enabled = _gf_false; + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf (op_errstr, "Disabling tier volume %s has " + "been unsuccessful", volinfo->volname); + return ret; +} + +int +glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + int32_t cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + dict_t *bricks_dict = NULL; + char *brick_tmpstr = NULL; + uint32_t commit_hash = 0; + int detach_commit = 0; + void *tier_info = NULL; + char *cold_shd_key = NULL; + char *hot_shd_key = NULL; + int delete_key = 1; + glusterd_svc_t *svc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + GF_VALIDATE_OR_GOTO (this->name, op_errstr, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_VOL_NOT_FOUND, "Unable to get volinfo"); + goto out; + } + + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "cmd not found"); + goto out; + } + + if (is_origin_glusterd (dict) && + (cmd != GF_DEFRAG_CMD_DETACH_START)) { + if (!gf_uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_REMOVE_BRICK_ID_SET_FAIL, + "Failed to set remove-brick-id"); + goto out; + } + } + } + /*check only if a tierd is supposed to be running + * if no brick in the tierd volume is a local brick + * skip it */ + cds_list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + if (glusterd_is_local_brick (this, volinfo, + brickinfo)) { + flag = _gf_true; + break; + } + } + if (!flag) + goto out; + + + ret = -1; + + switch (cmd) { + case GF_DEFRAG_CMD_DETACH_STOP: + /* Fall back to the old volume file */ + cds_list_for_each_entry_safe (brickinfo, tmp, + &volinfo->bricks, + brick_list) { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + } + ret = glusterd_create_volfiles_and_notify_services + (volinfo); + + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLINFO_SET_FAIL, + "failed to store volinfo"); + goto out; + } + ret = glusterd_tierdsvc_restart (); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_START_FAIL, + "Couldn't restart tierd for " + "vol: %s", volinfo->volname); + goto out; + } + + volinfo->tier.op = GD_OP_DETACH_NOT_STARTED; + ret = 0; + goto out; + + + + case GF_DEFRAG_CMD_DETACH_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_msg_debug (this->name, errno, + "Missing remove-brick-id"); + ret = 0; + } else { + ret = dict_set_str (rsp_dict, + GF_REMOVE_BRICK_TID_KEY, + task_id_str); + gf_uuid_parse (task_id_str, + volinfo->tier.rebalance_id); + } + force = 0; + + volinfo->tier.op = GD_OP_DETACH_TIER; + volinfo->tier.defrag_status = GF_DEFRAG_STATUS_STARTED; + break; + + case GF_DEFRAG_CMD_DETACH_COMMIT: + if (volinfo->decommission_in_progress) { + gf_asprintf (op_errstr, "use 'force' option as " + "migration is in progress"); + goto out; + } + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_FAILED) { + gf_asprintf (op_errstr, "use 'force' option as " + "migration has failed"); + goto out; + } + + case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE: + glusterd_op_perform_detach_tier (volinfo); + detach_commit = 1; + + /* Disabling ctr when detaching a tier, since + * currently tier is the only consumer of ctr. + * Revisit this code when this constraint no + * longer exist. + */ + dict_del (volinfo->dict, "features.ctr-enabled"); + dict_del (volinfo->dict, "cluster.tier-mode"); + + hot_shd_key = gd_get_shd_key + (volinfo->tier_info.hot_type); + cold_shd_key = gd_get_shd_key + (volinfo->tier_info.cold_type); + if (hot_shd_key) { + /* + * Since post detach, shd graph will not + * contain hot tier. So we need to clear + * option set for hot tier. For a tiered + * volume there can be different key + * for both hot and cold. If hot tier is + * shd compatible then we need to remove + * the configured value when detaching a tier, + * only if the key's are different or + * cold key is NULL. So we will set + * delete_key first, and if cold key is not + * null and they are equal then we will clear + * the flag. Otherwise we will delete the + * key. + */ + + if (cold_shd_key) + delete_key = strcmp (hot_shd_key, + cold_shd_key); + if (delete_key) + dict_del (volinfo->dict, hot_shd_key); + } + /* fall through */ + + if (volinfo->decommission_in_progress) { + if (volinfo->tier.defrag) { + LOCK (&volinfo->rebal.defrag->lock); + /* Fake 'rebalance-complete' so the + * graph change + * happens right away */ + volinfo->tier.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; + + UNLOCK (&volinfo->tier.defrag->lock); + } + } + + volinfo->tier.op = GD_OP_DETACH_NOT_STARTED; + ret = 0; + force = 1; + break; + default: + gf_asprintf (op_errstr, "tier command failed. Invalid " + "opcode"); + ret = -1; + goto out; + } + + count = glusterd_set_detach_bricks(dict, volinfo); + + if (cmd == GF_DEFRAG_CMD_DETACH_START) { + bricks_dict = dict_new (); + if (!bricks_dict) { + ret = -1; + goto out; + } + ret = dict_set_int32 (bricks_dict, "count", count); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "Failed to save remove-brick count"); + goto out; + } + } + + while (i <= count) { + snprintf (key, 256, "brick%d", i); + ret = dict_get_str (dict, key, &brick); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get %s", + key); + goto out; + } + + if (cmd == GF_DEFRAG_CMD_DETACH_START) { + brick_tmpstr = gf_strdup (brick); + if (!brick_tmpstr) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + GD_MSG_NO_MEMORY, + "Failed to duplicate brick name"); + goto out; + } + ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "Failed to add brick to dict"); + goto out; + } + brick_tmpstr = NULL; + } + + ret = glusterd_op_perform_remove_brick (volinfo, brick, force, + &need_rebalance); + if (ret) + goto out; + i++; + } + + if (detach_commit) { + /* Clear related information from volinfo */ + tier_info = ((void *)(&volinfo->tier_info)); + memset (tier_info, 0, sizeof (volinfo->tier_info)); + } + + if (cmd == GF_DEFRAG_CMD_DETACH_START) + volinfo->tier.dict = dict_ref (bricks_dict); + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) { + gf_msg (this->name, GF_LOG_INFO, errno, + GD_MSG_DICT_GET_FAILED, + "changing replica count %d to %d on volume %s", + volinfo->replica_count, replica_count, + volinfo->volname); + volinfo->replica_count = replica_count; + volinfo->sub_count = replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count + (volinfo); + + /* + * volinfo->type and sub_count have already been set for + * volumes undergoing a detach operation, they should not + * be modified here. + */ + if ((replica_count == 1) && (cmd != GF_DEFRAG_CMD_DETACH_COMMIT) + && (cmd != GF_DEFRAG_CMD_DETACH_COMMIT_FORCE)) { + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + volinfo->type = GF_CLUSTER_TYPE_NONE; + /* backward compatibility */ + volinfo->sub_count = 0; + } else { + volinfo->type = GF_CLUSTER_TYPE_STRIPE; + /* backward compatibility */ + volinfo->sub_count = volinfo->dist_leaf_count; + } + } + } + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLFILE_CREATE_FAIL, "failed to create" + "volfiles"); + goto out; + } + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLINFO_STORE_FAIL, "failed to store volinfo"); + goto out; + } + + if (cmd == GF_DEFRAG_CMD_DETACH_START && + volinfo->status == GLUSTERD_STATUS_STARTED) { + + svc = &(volinfo->tierd.svc); + ret = svc->reconfigure (volinfo); + if (ret) + goto out; + + ret = glusterd_svcs_reconfigure (); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); + goto out; + } + } + /* Need to reset the defrag/rebalance status accordingly */ + switch (volinfo->tier.defrag_status) { + case GF_DEFRAG_STATUS_FAILED: + case GF_DEFRAG_STATUS_COMPLETE: + volinfo->tier.defrag_status = 0; + default: + break; + } + if (!force && need_rebalance) { + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->tier.commit_hash = commit_hash; + } + /* perform the rebalance operations */ + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_DETACH_TIER, + /*change this label to GF_DEFRAG_CMD_DETACH_START + * while removing old code + */ + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + + if (!ret) + volinfo->decommission_in_progress = 1; + + else if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_REBALANCE_START_FAIL, + "failed to start the rebalance"); + } + } else { + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_svcs_manager (volinfo); + } + +out: + if (ret && err_str[0] && op_errstr) + *op_errstr = gf_strdup (err_str); + + GF_FREE (brick_tmpstr); + if (bricks_dict) + dict_unref (bricks_dict); + + return ret; + +} + +int +glusterd_op_tier_start_stop (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = -1; + char *volname = NULL; + int cmd = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brick = NULL; + gf_boolean_t retval = _gf_false; + glusterd_conf_t *priv = NULL; + int32_t pid = -1; + char pidfile[PATH_MAX] = {0}; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + GF_VALIDATE_OR_GOTO (this->name, op_errstr, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get cmd from " + "dict"); + goto out; + } + + cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) { + if (gf_uuid_compare (MY_UUID, brick->uuid) == 0) { + retval = _gf_true; + break; + } + } + /*check if this node needs tierd*/ + + if (!retval) + goto out; + + switch (cmd) { + case GF_DEFRAG_CMD_START_TIER: + GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv); + /* we check if its running and skip so that we dont get a + * failure during force start + */ + if (gf_is_service_running (pidfile, &pid)) + goto out; + ret = glusterd_tier_enable (volinfo, op_errstr); + if (ret < 0) + goto out; + glusterd_store_perform_node_state_store (volinfo); + break; + + case GF_DEFRAG_CMD_STOP_TIER: + ret = glusterd_tier_disable (volinfo, op_errstr); + if (ret < 0) + goto out; + break; + default: + gf_asprintf (op_errstr, "tier command failed. Invalid " + "opcode"); + ret = -1; + goto out; + } + + ret = glusterd_manage_tier (volinfo, cmd); + if (ret) + goto out; + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to store volinfo for tier"); + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stage_tier (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char *volname = NULL; + int ret = -1; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + xlator_t *this = 0; + int32_t is_force = 0; + char pidfile[PATH_MAX] = {0}; + int32_t tier_online = -1; + int32_t pid = -1; + int32_t brick_count = 0; + gsync_status_param_t param = {0,}; + glusterd_conf_t *priv = NULL; + gf_boolean_t flag = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + GF_VALIDATE_OR_GOTO (this->name, op_errstr, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volname not found"); + goto out; + } + + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, + msg, sizeof (msg)); + if (ret) { + gf_msg_debug (this->name, 0, "cmd validate failed"); + goto out; + } + + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + snprintf (msg, sizeof(msg), "volume %s is not a tier " + "volume", volinfo->volname); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOL_NOT_TIER, "volume: %s is not a tier " + "volume", volinfo->volname); + ret = -1; + goto out; + } + /* Check if the connected clients are all of version + * glusterfs-3.6 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support + (volname, GD_OP_VERSION_3_6_0, NULL); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume %s has one or " + "more connected clients of a version" + " lower than GlusterFS-v3.6.0. " + "Tier operations not supported in" + " below this version", volname); + goto out; + } + /*check only if a tierd is supposed to be running + * if no brick in the tierd volume is a local brick + * skip it */ + cds_list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + if (glusterd_is_local_brick (this, volinfo, + brickinfo)) { + flag = _gf_true; + break; + } + } + if (!flag) + goto out; + + GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv); + tier_online = gf_is_service_running (pidfile, &pid); + + switch (cmd) { + case GF_DEFRAG_CMD_START_TIER: + ret = dict_get_int32 (dict, "force", &is_force); + if (ret) + is_force = 0; + + if (brickinfo->status != GF_BRICK_STARTED) { + gf_asprintf (op_errstr, "Received" + " tier start on volume " + "with stopped brick %s", + brickinfo->path); + ret = -1; + goto out; + } + if ((!is_force) && tier_online) { + ret = gf_asprintf (op_errstr, "Tier daemon is " + "already running on volume %s", + volname); + ret = -1; + goto out; + } + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); + if (ret) { + gf_msg (this->name, 0, GF_LOG_ERROR, + GD_MSG_REBALANCE_START_FAIL, + "start validate failed"); + goto out; + } + break; + + case GF_DEFRAG_CMD_STOP_TIER: + + if (!tier_online) { + ret = gf_asprintf (op_errstr, "Tier daemon is " + "not running on volume %s", + volname); + ret = -1; + goto out; + } + break; + + case GF_DEFRAG_CMD_DETACH_START: + + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, + "Unable to get brick count"); + goto out; + } + + if (!tier_online) { + ret = gf_asprintf (op_errstr, "Tier daemon is " + "not running on volume %s", + volname); + ret = -1; + goto out; + } + if (volinfo->tier.op == GD_OP_DETACH_TIER) { + snprintf (msg, sizeof (msg), "An earlier detach tier " + "task exists for volume %s. Either commit it" + " or stop it before starting a new task.", + volinfo->volname); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_OLD_REMOVE_BRICK_EXISTS, + "Earlier remove-brick" + " task exists for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } + if (glusterd_is_defrag_on(volinfo)) { + snprintf (msg, sizeof (msg), "Migration is in progress." + " Please retry after completion"); + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_OIP_RETRY_LATER, "Migration is" + "in progress"); + goto out; + } + + ret = glusterd_remove_brick_validate_bricks (cmd, brick_count, + dict, volinfo, + op_errstr); + if (ret) + goto out; + + if (is_origin_glusterd (dict)) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, errno, + GD_MSG_DICT_GET_FAILED, + "Missing remove-brick-id"); + ret = 0; + } + } + break; + + case GF_DEFRAG_CMD_DETACH_STOP: + if (volinfo->tier.op != GD_OP_DETACH_TIER) { + snprintf (msg, sizeof(msg), "Detach-tier " + "not started"); + ret = -1; + goto out; + } + ret = 0; + break; + + case GF_DEFRAG_CMD_STATUS_TIER: + + if (!tier_online) { + ret = gf_asprintf (op_errstr, "Tier daemon is " + "not running on volume %s", + volname); + ret = -1; + goto out; + } + break; + + case GF_DEFRAG_CMD_DETACH_COMMIT: + + if (volinfo->tier.op != GD_OP_DETACH_TIER) { + snprintf (msg, sizeof(msg), "Detach-tier " + "not started"); + ret = -1; + goto out; + } + if ((volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) + && (volinfo->tier.op == GD_OP_DETACH_TIER)) { + ret = -1; + snprintf (msg, sizeof (msg), "Detach is in progress. " + "Please retry after completion"); + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_OIP_RETRY_LATER, "Detach is in " + "progress"); + goto out; + } + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, + "Unable to get brick count"); + goto out; + } + + ret = glusterd_remove_brick_validate_bricks (cmd, brick_count, + dict, volinfo, + op_errstr); + if (ret) + goto out; + + /* If geo-rep is configured, for this volume, it should be + * stopped. + */ + param.volinfo = volinfo; + ret = glusterd_check_geo_rep_running (¶m, op_errstr); + if (ret || param.is_active) { + ret = -1; + goto out; + } + + break; + case GF_DEFRAG_CMD_DETACH_STATUS: + if (volinfo->tier.op != GD_OP_DETACH_TIER) { + snprintf (msg, sizeof(msg), "Detach-tier " + "not started"); + ret = -1; + goto out; + } + break; + + case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE: + default: + break; + + } + + ret = 0; +out: + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup (msg); + + return ret; +} + +int32_t +glusterd_add_tierd_to_dict (glusterd_volinfo_t *volinfo, + dict_t *dict, int32_t count) +{ + + int ret = -1; + int32_t pid = -1; + int32_t brick_online = -1; + char key[1024] = {0}; + char base_key[1024] = {0}; + char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + snprintf (base_key, sizeof (base_key), "brick%d", count); + snprintf (key, sizeof (key), "%s.hostname", base_key); + ret = dict_set_str (dict, key, "Tier Daemon"); + if (ret) + goto out; + + snprintf (key, sizeof (key), "%s.path", base_key); + ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID))); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.port", base_key); + ret = dict_set_int32 (dict, key, volinfo->tierd.port); + if (ret) + goto out; + + glusterd_svc_build_tierd_pidfile (volinfo, pidfile, sizeof (pidfile)); + + brick_online = gf_is_service_running (pidfile, &pid); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.pid", base_key); + ret = dict_set_int32 (dict, key, pid); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.status", base_key); + ret = dict_set_int32 (dict, key, brick_online); + +out: + if (ret) + gf_msg (this ? this->name : "glusterd", + GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Returning %d. adding values to dict failed", ret); + + return ret; +} + +int32_t +__glusterd_tier_status_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_brick_op_rsp rsp = {0}; + int ret = -1; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + struct syncargs *args = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, req, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + frame = myframe; + args = frame->local; + + if (-1 == req->rpc_status) { + args->op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_RES_DECODE_FAIL, + "Failed to decode brick op " + "response received"); + goto out; + } + + if (rsp.output.output_len) { + args->dict = dict_new (); + if (!args->dict) { + ret = -1; + args->op_errno = ENOMEM; + goto out; + } + + ret = dict_unserialize (rsp.output.output_val, + rsp.output.output_len, + &args->dict); + if (ret < 0) + goto out; + } + args->op_ret = rsp.op_ret; + args->op_errno = rsp.op_errno; + args->errstr = gf_strdup (rsp.op_errstr); + +out: + if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0)) + free (rsp.op_errstr); + free (rsp.output.output_val); + if (req->rpc_status != -1) + GLUSTERD_STACK_DESTROY(frame); + __wake (args); + + return ret; + +} + +int32_t +glusterd_tier_status_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_tier_status_cbk); +} + +int +glusterd_op_tier_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict, + glusterd_op_t op) +{ + int ret = -1; + xlator_t *this = NULL; + struct syncargs args = {0, }; + glusterd_req_ctx_t *data = NULL; + gd1_mgmt_brick_op_req *req = NULL; + glusterd_conf_t *priv = NULL; + int pending_bricks = 0; + glusterd_pending_node_t *pending_node; + glusterd_req_ctx_t *req_ctx = NULL; + struct rpc_clnt *rpc = NULL; + uuid_t *txn_id = NULL; + extern glusterd_op_info_t opinfo; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + GF_VALIDATE_OR_GOTO (this->name, rsp_dict, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + args.op_ret = -1; + args.op_errno = ENOTCONN; + data = GF_CALLOC (1, sizeof (*data), + gf_gld_mt_op_allack_ctx_t); + + gf_uuid_copy (data->uuid, MY_UUID); + + /* we are printing the detach status for issue of detach start + * by then we need the op to be GD_OP_DETACH_TIER_STATUS for it to + * get the status. ad for the rest of the condition it can go as such. + */ + + if (op == GD_OP_REMOVE_TIER_BRICK) + data->op = GD_OP_DETACH_TIER_STATUS; + else + data->op = op; + data->dict = dict; + + txn_id = &priv->global_txn_id; + + req_ctx = data; + GF_VALIDATE_OR_GOTO (this->name, req_ctx, out); + CDS_INIT_LIST_HEAD (&opinfo.pending_bricks); + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + gf_msg_debug (this->name, 0, "transaction ID = %s", + uuid_utoa (*txn_id)); + + ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, op_errstr, + &opinfo.pending_bricks, NULL); + + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_SELECT_FAIL, "Failed to select bricks"); + opinfo.op_errstr = *op_errstr; + goto out; + } + + cds_list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { + ret = glusterd_brick_op_build_payload + (req_ctx->op, pending_node->node, + (gd1_mgmt_brick_op_req **)&req, + req_ctx->dict); + + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, + "Failed to build brick op payload during " + "'Volume %s'", gd_op_list[req_ctx->op]); + goto out; + } + + + rpc = glusterd_pending_node_get_rpc (pending_node); + if (!rpc) { + opinfo.brick_pending_count = 0; + ret = 0; + if (req) { + GF_FREE (req); + req = NULL; + } + glusterd_defrag_volume_node_rsp (req_ctx->dict, + NULL, rsp_dict); + + goto out; + } + + GD_SYNCOP (rpc, (&args), NULL, glusterd_tier_status_cbk, req, + &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req); + + if (req) { + GF_FREE (req); + req = NULL; + } + if (!ret) + pending_bricks++; + + glusterd_pending_node_put_rpc (pending_node); + } + glusterd_handle_node_rsp (req_ctx->dict, pending_node->node, + req_ctx->op, args.dict, rsp_dict, op_errstr, + pending_node->type); + gf_msg_trace (this->name, 0, "Sent commit op req for operation " + "'Volume %s' to %d bricks", gd_op_list[req_ctx->op], + pending_bricks); + opinfo.brick_pending_count = pending_bricks; + +out: + + if (ret) + opinfo.op_ret = ret; + + ret = glusterd_set_txn_opinfo (txn_id, &opinfo); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set transaction's opinfo"); + + gf_msg_debug (this ? this->name : "glusterd", 0, + "Returning %d. Failed to get tier status", ret); + return ret; + +} diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c new file mode 100644 index 00000000000..b555a1a9ccb --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c @@ -0,0 +1,179 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-tierd-svc-helper.h" +#include "glusterd-messages.h" +#include "syscall.h" +#include "glusterd-volgen.h" + + +void +glusterd_svc_build_tierd_rundir (glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char workdir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_TIER_DIR (workdir, volinfo, priv); + + snprintf (path, path_len, "%s/run", workdir); +} + +void +glusterd_svc_build_tierd_socket_filepath (glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char sockfilepath[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + + glusterd_svc_build_tierd_rundir (volinfo, rundir, sizeof (rundir)); + snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s", + rundir, uuid_utoa (MY_UUID)); + + glusterd_set_socket_filepath (sockfilepath, path, path_len); +} + +void +glusterd_svc_build_tierd_pidfile (glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char rundir[PATH_MAX] = {0,}; + + glusterd_svc_build_tierd_rundir (volinfo, rundir, sizeof (rundir)); + + snprintf (path, path_len, "%s/%s-tierd.pid", rundir, volinfo->volname); +} + +void +glusterd_svc_build_tierd_volfile_path (glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char workdir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR (workdir, volinfo, priv); + + snprintf (path, path_len, "%s/%s-tierd.vol", workdir, + volinfo->volname); +} + +void +glusterd_svc_build_tierd_logdir (char *logdir, char *volname, size_t len) +{ + snprintf (logdir, len, "%s/tier/%s", DEFAULT_LOG_FILE_DIRECTORY, + volname); +} + +void +glusterd_svc_build_tierd_logfile (char *logfile, char *logdir, size_t len) +{ + snprintf (logfile, len, "%s/tierd.log", logdir); +} + +int +glusterd_svc_check_tier_volfile_identical (char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = {0,}; + char tmpvol[PATH_MAX] = {0,}; + xlator_t *this = NULL; + int ret = -1; + int need_unlink = 0; + int tmp_fd = -1; + + this = THIS; + + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, identical, out); + + glusterd_svc_build_tierd_volfile_path (volinfo, orgvol, + sizeof (orgvol)); + + snprintf (tmpvol, sizeof (tmpvol), "/tmp/g%s-XXXXXX", svc_name); + + tmp_fd = mkstemp (tmpvol); + if (tmp_fd < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + GD_MSG_FILE_OP_FAILED, "Unable to create temp file" + " %s:(%s)", tmpvol, strerror (errno)); + goto out; + } + + need_unlink = 1; + ret = build_rebalance_volfile (volinfo, tmpvol, NULL); + if (ret) + goto out; + + ret = glusterd_check_files_identical (orgvol, tmpvol, + identical); + if (ret) + goto out; + +out: + if (need_unlink) + sys_unlink (tmpvol); + + if (tmp_fd >= 0) + sys_close (tmp_fd); + + return ret; +} + +int +glusterd_svc_check_tier_topology_identical (char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = {0,}; + char tmpvol[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + xlator_t *this = THIS; + int ret = -1; + int tmpclean = 0; + int tmpfd = -1; + + if ((!identical) || (!this) || (!this->private)) + goto out; + + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + + glusterd_svc_build_tierd_volfile_path (volinfo, orgvol, + sizeof (orgvol)); + + snprintf (tmpvol, sizeof (tmpvol), "/tmp/g%s-XXXXXX", svc_name); + + tmpfd = mkstemp (tmpvol); + if (tmpfd < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + GD_MSG_FILE_OP_FAILED, "Unable to create temp file" + " %s:(%s)", tmpvol, strerror (errno)); + goto out; + } + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + ret = build_rebalance_volfile (volinfo, tmpvol, NULL); + if (ret) + goto out; + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical (orgvol, tmpvol, + identical); +out: + if (tmpfd >= 0) + sys_close (tmpfd); + if (tmpclean) + sys_unlink (tmpvol); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.h new file mode 100644 index 00000000000..1f0e33b989c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.h @@ -0,0 +1,37 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_TIERD_SVC_HELPER_H_ +#define _GLUSTERD_TIERD_SVC_HELPER_H_ + +#include "glusterd.h" + +void +glusterd_svc_build_tierd_rundir (glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_tierd_socket_filepath (glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_tierd_pidfile (glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_tierd_volfile_path (glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_tierd_logdir (char *logdir, char *volname, size_t len); + +void +glusterd_svc_build_tierd_logfile (char *logfile, char *logdir, size_t len); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c new file mode 100644 index 00000000000..bfc879a3436 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c @@ -0,0 +1,501 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "globals.h" +#include "run.h" +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-tierd-svc.h" +#include "glusterd-tierd-svc-helper.h" +#include "glusterd-svc-helper.h" +#include "syscall.h" +#include "glusterd-store.h" + +char *tierd_svc_name = "tierd"; + +void +glusterd_tierdsvc_build (glusterd_svc_t *svc) +{ + svc->manager = glusterd_tierdsvc_manager; + svc->start = glusterd_tierdsvc_start; + svc->stop = glusterd_svc_stop; + svc->reconfigure = glusterd_tierdsvc_reconfigure; +} + +/* a separate service framework is used because the tierd is a + * volume based framework while the common services are for node + * based daemons. when volume based common framework is available + * this can be consolidated into it. + */ + +int +glusterd_tierdsvc_init (void *data) +{ + int ret = -1; + char rundir[PATH_MAX] = {0,}; + char sockpath[PATH_MAX] = {0,}; + char pidfile[PATH_MAX] = {0,}; + char volfile[PATH_MAX] = {0,}; + char logdir[PATH_MAX] = {0,}; + char logfile[PATH_MAX] = {0,}; + char volfileid[256] = {0}; + glusterd_svc_t *svc = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_conn_notify_t notify = NULL; + xlator_t *this = NULL; + char *volfileserver = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + volinfo = data; + GF_VALIDATE_OR_GOTO (this->name, data, out); + + svc = &(volinfo->tierd.svc); + + ret = snprintf (svc->name, sizeof (svc->name), "%s", tierd_svc_name); + if (ret < 0) + goto out; + + notify = glusterd_svc_common_rpc_notify; + glusterd_store_perform_node_state_store (volinfo); + + glusterd_svc_build_tierd_rundir (volinfo, rundir, sizeof (rundir)); + glusterd_svc_create_rundir (rundir); + + /* Initialize the connection mgmt */ + glusterd_svc_build_tierd_socket_filepath (volinfo, sockpath, + sizeof (sockpath)); + ret = glusterd_conn_init (&(svc->conn), sockpath, 600, notify); + if (ret) + goto out; + + /* Initialize the process mgmt */ + glusterd_svc_build_tierd_pidfile (volinfo, pidfile, sizeof (pidfile)); + glusterd_svc_build_tierd_volfile_path (volinfo, volfile, + sizeof (volfile)); + glusterd_svc_build_tierd_logdir (logdir, volinfo->volname, + sizeof (logdir)); + ret = mkdir_p (logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_CREATE_DIR_FAILED, "Unable to create logdir %s", + logdir); + goto out; + } + glusterd_svc_build_tierd_logfile (logfile, logdir, sizeof (logfile)); + snprintf (volfileid, sizeof (volfileid), "tierd/%s", volinfo->volname); + + if (dict_get_str (this->options, "transport.socket.bind-address", + &volfileserver) != 0) { + volfileserver = "localhost"; + } + ret = glusterd_proc_init (&(svc->proc), tierd_svc_name, pidfile, logdir, + logfile, volfile, volfileid, volfileserver); + if (ret) + goto out; + +out: + gf_msg_debug (this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_tierdsvc_create_volfile (glusterd_volinfo_t *volinfo) +{ + char filepath[PATH_MAX] = {0,}; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + glusterd_svc_build_tierd_volfile_path (volinfo, filepath, + sizeof (filepath)); + ret = build_rebalance_volfile (volinfo, filepath, NULL); + + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); + goto out; + } + +out: + gf_msg_debug (this ? this->name : "glusterd", 0, "Returning %d", ret); + + return ret; + +} + + +int +glusterd_tierdsvc_manager (glusterd_svc_t *svc, void *data, int flags) +{ + int ret = 0; + xlator_t *this = THIS; + glusterd_volinfo_t *volinfo = NULL; + + volinfo = data; + GF_VALIDATE_OR_GOTO (this->name, data, out); + + if (!svc->inited) { + ret = glusterd_tierdsvc_init (volinfo); + if (ret) { + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_INIT_FAIL, "Failed to initialize " + "tierd service for volume %s", + volinfo->volname); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug (THIS->name, 0, "tierd service " + "initialized"); + } + } + + ret = glusterd_is_tierd_enabled (volinfo); + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLINFO_GET_FAIL, "Failed to read volume " + "options"); + goto out; + } + + if (ret) { + if (!glusterd_is_volume_started (volinfo)) { + if (glusterd_proc_is_running (&svc->proc)) { + ret = svc->stop (svc, SIGTERM); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_STOP_FAIL, + "Couldn't stop tierd for " + "volume: %s", + volinfo->volname); + } else { + ret = 0; + } + goto out; + } + + ret = glusterd_tierdsvc_create_volfile (volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_CREATE_FAIL, "Couldn't create " + "tierd volfile for volume: %s", + volinfo->volname); + goto out; + } + + ret = svc->start (svc, flags); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_START_FAIL, "Couldn't start " + "tierd for volume: %s", volinfo->volname); + goto out; + } + + glusterd_volinfo_ref (volinfo); + ret = glusterd_conn_connect (&(svc->conn)); + if (ret) { + glusterd_volinfo_unref (volinfo); + goto out; + } + + } else if (glusterd_proc_is_running (&svc->proc)) { + ret = svc->stop (svc, SIGTERM); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_STOP_FAIL, + "Couldn't stop tierd for volume: %s", + volinfo->volname); + goto out; + } + volinfo->tierd.port = 0; + } + +out: + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + + return ret; +} + + +int32_t +glusterd_tierdsvc_start (glusterd_svc_t *svc, int flags) +{ + int ret = -1; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char valgrind_logfile[PATH_MAX] = {0}; + int tierd_port = 0; + char msg[1024] = {0,}; + char tierd_id[PATH_MAX] = {0,}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_tierdsvc_t *tierd = NULL; + int cmd = GF_DEFRAG_CMD_START_TIER; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + if (glusterd_proc_is_running (&svc->proc)) { + ret = 0; + goto out; + } + + /* Get volinfo->tierd from svc object */ + tierd = cds_list_entry (svc, glusterd_tierdsvc_t, svc); + if (!tierd) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_OBJ_GET_FAIL, "Failed to get tierd object " + "from tierd service"); + goto out; + } + + /* Get volinfo from tierd */ + volinfo = cds_list_entry (tierd, glusterd_volinfo_t, tierd); + if (!volinfo) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo from " + "from tierd"); + goto out; + } + + ret = sys_access (svc->proc.volfile, F_OK); + if (ret) { + gf_msg (this->name, GF_LOG_DEBUG, 0, + GD_MSG_VOLINFO_GET_FAIL, + "tierd Volfile %s is not present", svc->proc.volfile); + /* If glusterd is down on one of the nodes and during + * that time if tier is started for the first time. After some + * time when the glusterd which was down comes back it tries + * to look for the tierd volfile and it does not find tierd + * volfile and because of this starting of tierd fails. + * Therefore, if volfile is not present then create a fresh + * volfile. + */ + ret = glusterd_tierdsvc_create_volfile (volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLFILE_CREATE_FAIL, "Couldn't create " + "tierd volfile for volume: %s", + volinfo->volname); + goto out; + } + } + runinit (&runner); + + if (priv->valgrind) { + snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-tierd.log", + svc->proc.logdir); + + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); + } + + snprintf (tierd_id, sizeof (tierd_id), "tierd-%s", volinfo->volname); + runner_add_args (&runner, SBIN_DIR"/glusterfs", + "-s", svc->proc.volfileserver, + "--volfile-id", svc->proc.volfileid, + "-p", svc->proc.pidfile, + "-l", svc->proc.logfile, + "--brick-name", tierd_id, + "-S", svc->conn.sockpath, + "--xlator-option", "*dht.use-readdirp=yes", + "--xlator-option", "*dht.lookup-unhashed=yes", + "--xlator-option", "*dht.assert-no-child-down=yes", + "--xlator-option", "*replicate*.data-self-heal=off", + "--xlator-option", + "*replicate*.metadata-self-heal=off", + "--xlator-option", "*replicate*.entry-self-heal=off", + "--xlator-option", "*dht.readdir-optimize=on", + "--xlator-option", + "*tier-dht.xattr-name=trusted.tier.tier-dht", + NULL); + + + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "*dht.rebalance-cmd=%d", cmd); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "*dht.commit-hash=%u", + volinfo->rebal.commit_hash); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + + /* Do a pmap registry remove on the older connected port */ + if (volinfo->tierd.port) { + ret = pmap_registry_remove (this, volinfo->tierd.port, + tierd_id, GF_PMAP_PORT_BRICKSERVER, + NULL); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to remove pmap " + "registry for older signin"); + goto out; + } + } + + + + tierd_port = pmap_registry_alloc (this); + if (!tierd_port) { + snprintf (msg, sizeof (msg), "Could not allocate port " + "for tierd service for volume %s", + volinfo->volname); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + ret = -1; + goto out; + } + + volinfo->tierd.port = tierd_port; + + snprintf (msg, sizeof (msg), + "Starting the tierd service for volume %s", volinfo->volname); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + + if (flags == PROC_START_NO_WAIT) { + ret = runner_run_nowait (&runner); + } else { + synclock_unlock (&priv->big_lock); + { + ret = runner_run (&runner); + } + synclock_lock (&priv->big_lock); + } + +out: + return ret; +} + + +int +glusterd_tierdsvc_restart () +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; + + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) { + /* Start per volume tierd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED && + glusterd_is_tierd_enabled (volinfo)) { + svc = &(volinfo->tierd.svc); + ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_TIERD_START_FAIL, + "Couldn't restart tierd for " + "vol: %s", volinfo->volname); + goto out; + } + } + } +out: + return ret; +} + + +int +glusterd_tierdsvc_reconfigure (void *data) +{ + int ret = -1; + xlator_t *this = NULL; + gf_boolean_t identical_topology = _gf_false; + gf_boolean_t identical_volfile = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + volinfo = data; + GF_VALIDATE_OR_GOTO (this->name, data, out); + + /* reconfigure function is not available for other volume based + * service. but it has been implemented for tier because there can be + * changes on the volfile that need not be related to topology. + * during these changes it is better not to restart the tierd. + * So reconfigure is written to avoid calling restart at such + * situations. + */ + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + if (glusterd_is_tierd_enabled (volinfo)) + goto manager; + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + + ret = glusterd_svc_check_tier_volfile_identical + (volinfo->tierd.svc.name, volinfo, &identical_volfile); + if (ret) + goto out; + if (identical_volfile) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + ret = glusterd_svc_check_tier_topology_identical + (volinfo->tierd.svc.name, volinfo, &identical_topology); + if (ret) + goto out; /*not able to compare due to some corruption */ + + /* Topology is not changed, but just the options. But write the + * options to tierd volfile, so that tierd will be reconfigured. + */ + if (identical_topology) { + ret = glusterd_tierdsvc_create_volfile (volinfo); + if (ret == 0) {/* Only if above PASSES */ + ret = glusterd_fetchspec_notify (this); + } + goto out; + } + goto out; + /*pending add/remove brick functionality*/ + +manager: + /* + * tierd volfile's topology has been changed. tierd server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = volinfo->tierd.svc.manager (&(volinfo->tierd.svc), + volinfo, PROC_START_NO_WAIT); + +out: + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.h b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.h new file mode 100644 index 00000000000..dfc63d25eb2 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.h @@ -0,0 +1,43 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_TIERD_SVC_H_ +#define _GLUSTERD_TIERD_SVC_H_ + +#include "glusterd-svc-mgmt.h" + + +typedef struct glusterd_tierdsvc_ glusterd_tierdsvc_t; + +struct glusterd_tierdsvc_ { + glusterd_svc_t svc; + int port; + gf_store_handle_t *handle; +}; + +void +glusterd_tierdsvc_build (glusterd_svc_t *svc); + +int +glusterd_tierdsvc_init (void *data); + +int +glusterd_tierdsvc_manager (glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_tierdsvc_start (glusterd_svc_t *svc, int flags); + +int +glusterd_tierdsvc_reconfigure (void *data); + +int +glusterd_tierdsvc_restart (); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 3b3effac1d3..7d1835236fe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -576,6 +576,9 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) new_volinfo->snapd.svc.build = glusterd_snapdsvc_build; new_volinfo->snapd.svc.build (&(new_volinfo->snapd.svc)); + new_volinfo->tierd.svc.build = glusterd_tierdsvc_build; + new_volinfo->tierd.svc.build (&(new_volinfo->tierd.svc)); + pthread_mutex_init (&new_volinfo->reflock, NULL); *volinfo = glusterd_volinfo_ref (new_volinfo); @@ -3075,6 +3078,7 @@ glusterd_spawn_daemons (void *opaque) glusterd_restart_gsyncds (conf); glusterd_restart_rebalance (conf); ret = glusterd_snapdsvc_restart (); + ret = glusterd_tierdsvc_restart (); return ret; } @@ -4455,6 +4459,9 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) } else if (pending_node->type == GD_NODE_SNAPD) { volinfo = pending_node->node; rpc = volinfo->snapd.svc.conn.rpc; + } else if (pending_node->type == GD_NODE_TIERD) { + volinfo = pending_node->node; + rpc = volinfo->tierd.svc.conn.rpc; } else { GF_ASSERT (0); } @@ -4474,6 +4481,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node) glusterd_defrag_rpc_put (volinfo->rebal.defrag); break; + case GD_NODE_TIERD: + volinfo = pending_node->node; + glusterd_defrag_rpc_put (volinfo->tier.defrag); + break; default: break; } @@ -7175,6 +7186,15 @@ glusterd_friend_remove_cleanup_vols (uuid_t uuid) "to stop snapd daemon service"); } } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->stop (svc, SIGTERM); + if (ret) { + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_SVC_STOP_FAIL, "Failed " + "to stop tierd daemon service"); + } + } } if (glusterd_friend_contains_vol_bricks (volinfo, uuid) == 2) { @@ -7590,7 +7610,7 @@ out: int glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, - dict_t *rsp_dict) + dict_t *rsp_dict, int32_t cmd) { int ret = 0; uint64_t files = 0; @@ -7651,25 +7671,43 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, gf_msg_trace (this->name, 0, "failed to get run-time"); - if (files) - volinfo->rebal.rebalance_files = files; - if (size) - volinfo->rebal.rebalance_data = size; - if (lookup) - volinfo->rebal.lookedup_files = lookup; - if (status) - volinfo->rebal.defrag_status = status; - if (failures) - volinfo->rebal.rebalance_failures = failures; - if (skipped) - volinfo->rebal.skipped_files = skipped; - if (run_time) - volinfo->rebal.rebalance_time = run_time; + if (cmd == GF_DEFRAG_CMD_STATUS_TIER) { + if (files) + volinfo->tier.rebalance_files = files; + if (size) + volinfo->tier.rebalance_data = size; + if (lookup) + volinfo->tier.lookedup_files = lookup; + if (status) + volinfo->tier.defrag_status = status; + if (failures) + volinfo->tier.rebalance_failures = failures; + if (skipped) + volinfo->tier.skipped_files = skipped; + if (run_time) + volinfo->tier.rebalance_time = run_time; + } else { + if (files) + volinfo->rebal.rebalance_files = files; + if (size) + volinfo->rebal.rebalance_data = size; + if (lookup) + volinfo->rebal.lookedup_files = lookup; + if (status) + volinfo->rebal.defrag_status = status; + if (failures) + volinfo->rebal.rebalance_failures = failures; + if (skipped) + volinfo->rebal.skipped_files = skipped; + if (run_time) + volinfo->rebal.rebalance_time = run_time; + } if (promoted) volinfo->tier_info.promoted = promoted; if (demoted) volinfo->tier_info.demoted = demoted; + return ret; } @@ -9373,6 +9411,212 @@ out: } int +glusterd_volume_tier_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + char key[256] = {0,}; + char *node_uuid = NULL; + char *node_uuid_str = NULL; + char *volname = NULL; + dict_t *ctx_dict = NULL; + double elapsed_time = 0; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int32_t index = 0; + int32_t count = 0; + int32_t value32 = 0; + uint64_t value = 0; + xlator_t *this = NULL; + char *task_id_str = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (this->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, rsp_dict, out); + + if (aggr) { + ctx_dict = aggr; + + } else { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_OPCTX_GET_FAIL, + "Operation Context is not present"); + goto out; + } + + if (!ctx_dict) + goto out; + + ret = dict_get_str (ctx_dict, "volname", &volname); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + ret = dict_get_int32 (rsp_dict, "count", &index); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "failed to get index"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", index); + ret = dict_get_str (rsp_dict, key, &node_uuid); + if (!ret) { + node_uuid_str = gf_strdup (node_uuid); + + } + ret = dict_get_int32 (ctx_dict, "count", &count); + count++; + ret = dict_set_int32 (ctx_dict, "count", count); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "Failed to set count"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", count); + ret = dict_set_dynstr (ctx_dict, key, node_uuid_str); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set node-uuid"); + } + + snprintf (key, 256, "files-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "files-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set the file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "size-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "size-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set the size of migration"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set lookuped file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "status-%d", index); + ret = dict_get_int32 (rsp_dict, key, &value32); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "status-%d", count); + ret = dict_set_int32 (ctx_dict, key, value32); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set status"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set failure count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set skipped count"); + } + } + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", index); + ret = dict_get_double (rsp_dict, key, &elapsed_time); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", count); + ret = dict_set_double (ctx_dict, key, elapsed_time); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set run-time"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "demoted-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "demoted-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set demoted count"); + } + } + memset (key, 0, 256); + snprintf (key, 256, "promoted-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "promoted-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set promoted count"); + } + } + + ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_msg_debug (this->name, errno, + "Missing remove-brick-id"); + } else + ret = dict_set_str (ctx_dict, GF_REMOVE_BRICK_TID_KEY, + task_id_str); + + ret = 0; + +out: + return ret; +} + +int glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) { char output_name[PATH_MAX] = ""; @@ -9892,6 +10136,71 @@ out: } int +glusterd_tier_or_rebalance_rsp (dict_t *op_ctx, glusterd_rebalance_t *index, int32_t i) +{ + int ret = 0; + char key[256] = {0,}; + + memset (key, 0 , 256); + snprintf (key, 256, "files-%d", i); + ret = dict_set_uint64 (op_ctx, key, index->rebalance_files); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set file count"); + + memset (key, 0 , 256); + snprintf (key, 256, "size-%d", i); + ret = dict_set_uint64 (op_ctx, key, index->rebalance_data); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set size of xfer"); + + memset (key, 0 , 256); + snprintf (key, 256, "lookups-%d", i); + ret = dict_set_uint64 (op_ctx, key, index->lookedup_files); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set lookedup file count"); + + memset (key, 0 , 256); + snprintf (key, 256, "status-%d", i); + ret = dict_set_int32 (op_ctx, key, index->defrag_status); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set status"); + + memset (key, 0 , 256); + snprintf (key, 256, "failures-%d", i); + ret = dict_set_uint64 (op_ctx, key, index->rebalance_failures); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set failure count"); + + memset (key, 0 , 256); + snprintf (key, 256, "skipped-%d", i); + ret = dict_set_uint64 (op_ctx, key, index->skipped_files); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set skipped count"); + + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_set_double (op_ctx, key, index->rebalance_time); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set run-time"); + + return ret; +} + +int glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, dict_t *op_ctx) { @@ -9902,6 +10211,7 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, int32_t i = 0; char buf[1024] = {0,}; char *node_str = NULL; + int32_t cmd = 0; GF_ASSERT (req_dict); @@ -9915,12 +10225,20 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, ret = glusterd_volinfo_find (volname, &volinfo); + ret = dict_get_int32 (req_dict, "rebalance-command", &cmd); + if (ret) { + gf_msg (THIS->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get the cmd"); + goto out; + } + if (ret) goto out; if (rsp_dict) { ret = glusterd_defrag_volume_status_update (volinfo, - rsp_dict); + rsp_dict, + cmd); } if (!op_ctx) { @@ -9947,61 +10265,10 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, GD_MSG_DICT_SET_FAILED, "failed to set node-uuid"); - memset (key, 0 , 256); - snprintf (key, 256, "files-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_files); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set file count"); - - memset (key, 0 , 256); - snprintf (key, 256, "size-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_data); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set size of xfer"); - - memset (key, 0 , 256); - snprintf (key, 256, "lookups-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.lookedup_files); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set lookedup file count"); - - memset (key, 0 , 256); - snprintf (key, 256, "status-%d", i); - ret = dict_set_int32 (op_ctx, key, volinfo->rebal.defrag_status); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set status"); - - memset (key, 0 , 256); - snprintf (key, 256, "failures-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_failures); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set failure count"); - - memset (key, 0 , 256); - snprintf (key, 256, "skipped-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set skipped count"); - - memset (key, 0, 256); - snprintf (key, 256, "run-time-%d", i); - ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SET_FAILED, - "failed to set run-time"); + if (cmd == GF_DEFRAG_CMD_STATUS_TIER) + glusterd_tier_or_rebalance_rsp (op_ctx, &volinfo->tier, i); + else + glusterd_tier_or_rebalance_rsp (op_ctx, &volinfo->rebal, i); memset (key, 0 , 256); snprintf (key, 256, "promoted-%d", i); @@ -10041,7 +10308,8 @@ glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, ret = glusterd_status_volume_brick_rsp (rsp_dict, op_ctx, op_errstr); break; - + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: case GD_OP_DEFRAG_BRICK_VOLUME: glusterd_defrag_volume_node_rsp (req_dict, rsp_dict, op_ctx); @@ -10407,6 +10675,12 @@ glusterd_is_volume_inode_quota_enabled (glusterd_volinfo_t *volinfo) } int +glusterd_is_tierd_enabled (glusterd_volinfo_t *volinfo) +{ + return volinfo->is_tier_enabled; +} + +int glusterd_is_bitrot_enabled (glusterd_volinfo_t *volinfo) { return glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_BITROT); @@ -11666,6 +11940,11 @@ glusterd_disallow_op_for_tier (glusterd_volinfo_t *volinfo, glusterd_op_t op, case GF_DEFRAG_CMD_STOP_DETACH_TIER: case GF_DEFRAG_CMD_STATUS: case GF_DEFRAG_CMD_DETACH_STATUS: + case GF_DEFRAG_CMD_STOP_TIER: + case GF_DEFRAG_CMD_DETACH_START: + case GF_DEFRAG_CMD_DETACH_COMMIT: + case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE: + case GF_DEFRAG_CMD_DETACH_STOP: ret = 0; break; default: @@ -11679,6 +11958,7 @@ glusterd_disallow_op_for_tier (glusterd_volinfo_t *volinfo, glusterd_op_t op, break; case GD_OP_REMOVE_BRICK: switch (cmd) { + case GF_DEFRAG_CMD_DETACH_START: case GF_OP_CMD_DETACH_COMMIT_FORCE: case GF_OP_CMD_DETACH_COMMIT: case GF_OP_CMD_DETACH_START: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 53f4d971998..5f490534ef5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -268,6 +268,13 @@ glusterd_brick_stop (glusterd_volinfo_t *volinfo, gf_boolean_t glusterd_is_tier_daemon_running (glusterd_volinfo_t *volinfo); +int32_t +glusterd_add_tierd_to_dict (glusterd_volinfo_t *volinfo, + dict_t *dict, int32_t count); + +int +glusterd_op_tier_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict, + glusterd_op_t op); int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo); @@ -445,7 +452,7 @@ glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo); int glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, - dict_t *rsp_dict); + dict_t *rsp_dict, int32_t cmd); int glusterd_check_files_identical (char *filename1, char *filename2, @@ -474,6 +481,8 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict); int glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int +glusterd_volume_tier_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); @@ -540,6 +549,11 @@ gf_boolean_t gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo); int +glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count, + dict_t *dict, + glusterd_volinfo_t *volinfo, + char **errstr); +int glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, char **slave_url, char **slave_host, char **slave_vol, @@ -579,6 +593,9 @@ gf_boolean_t gd_should_i_start_rebalance (glusterd_volinfo_t *volinfo); int +glusterd_is_tierd_enabled (glusterd_volinfo_t *volinfo); + +int glusterd_is_volume_quota_enabled (glusterd_volinfo_t *volinfo); int diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index a270f136c92..9537156b6c3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -4738,7 +4738,7 @@ out: return shd_enabled; } -static int +int build_rebalance_volfile (glusterd_volinfo_t *volinfo, char *filepath, dict_t *mod_dict) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index f90177372dc..2d62f720c8a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -196,6 +196,10 @@ int build_quotad_graph (volgen_graph_t *graph, dict_t *mod_dict); int +build_rebalance_volfile (glusterd_volinfo_t *volinfo, char *filepath, + dict_t *mod_dict); + +int build_bitd_graph (volgen_graph_t *graph, dict_t *mod_dict); int diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index e2b7fc1377b..0c3ac5816e7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2654,9 +2654,9 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr) if (volinfo->type == GF_CLUSTER_TYPE_TIER) { if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { glusterd_defrag_info_set (volinfo, dict, - GF_DEFRAG_CMD_START_TIER, - GF_DEFRAG_CMD_START, - GD_OP_REBALANCE); + GF_DEFRAG_CMD_START_TIER, + GF_DEFRAG_CMD_START, + GD_OP_REBALANCE); } glusterd_restart_rebalance_for_volume (volinfo); } @@ -2735,6 +2735,13 @@ glusterd_stop_volume (glusterd_volinfo_t *volinfo) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT); + if (ret) + goto out; + } + ret = glusterd_svcs_manager (volinfo); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index b8f92e4fcd5..a75770220b3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2146,7 +2146,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { .flags = OPT_FLAG_NEVER_RESET, .op_version = 1 }, - { .key = VKEY_FEATURES_BITROT, .voltype = "features/bit-rot", .option = "bitrot", diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 9d3d867e64c..e6a4d8b65a8 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -28,6 +28,7 @@ #include "rpcsvc.h" #include "glusterd-sm.h" #include "glusterd-snapd-svc.h" +#include "glusterd-tierd-svc.h" #include "glusterd-bitd-svc.h" #include "glusterd1-xdr.h" #include "protocol-common.h" @@ -125,6 +126,11 @@ typedef enum glusterd_op_ { GD_OP_SCRUB_ONDEMAND, GD_OP_RESET_BRICK, GD_OP_MAX_OPVERSION, + GD_OP_TIER_START_STOP, + GD_OP_TIER_STATUS, + GD_OP_DETACH_TIER_STATUS, + GD_OP_DETACH_NOT_STARTED, + GD_OP_REMOVE_TIER_BRICK, GD_OP_MAX, } glusterd_op_t; @@ -300,7 +306,6 @@ struct glusterd_bitrot_scrub_ { typedef struct glusterd_bitrot_scrub_ glusterd_bitrot_scrub_t; - struct glusterd_rebalance_ { gf_defrag_status_t defrag_status; uint64_t rebalance_files; @@ -354,6 +359,7 @@ struct glusterd_volinfo_ { glusterd_snap_t *snapshot; uuid_t restored_from_snap; gd_tier_info_t tier_info; + gf_boolean_t is_tier_enabled; char parent_volname[GD_VOLUME_NAME_MAX]; /* In case of a snap volume i.e (is_snap_volume == TRUE) this @@ -410,6 +416,8 @@ struct glusterd_volinfo_ { /* Bitrot scrub status*/ glusterd_bitrot_scrub_t bitrot_scrub; + glusterd_rebalance_t tier; + int version; uint32_t quota_conf_version; uint32_t cksum; @@ -438,6 +446,7 @@ struct glusterd_volinfo_ { gd_quorum_status_t quorum_status; glusterd_snapdsvc_t snapd; + glusterd_tierdsvc_t tierd; int32_t quota_xattr_version; }; @@ -489,6 +498,7 @@ typedef enum gd_node_type_ { GD_NODE_SNAPD, GD_NODE_BITD, GD_NODE_SCRUB, + GD_NODE_TIERD } gd_node_type; typedef enum missed_snap_stat { @@ -574,6 +584,17 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \ volinfo->volname); \ } +#define GLUSTERD_GET_TIER_DIR(path, volinfo, priv) do { \ + snprintf (path, PATH_MAX, "%s/tier/%s", priv->workdir, \ + volinfo->volname); \ + } while (0) + +#define GLUSTERD_GET_TIER_PID_FILE(path, volinfo, priv) do { \ + char tier_path[PATH_MAX]; \ + GLUSTERD_GET_TIER_DIR(tier_path, volinfo, priv); \ + snprintf (path, PATH_MAX, "%s/run/%s-tierd.pid", tier_path,\ + volinfo->volname); \ + } while (0) #define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \ snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, \ @@ -895,6 +916,9 @@ int glusterd_handle_add_brick (rpcsvc_request_t *req); int +glusterd_handle_tier (rpcsvc_request_t *req); + +int glusterd_handle_attach_tier (rpcsvc_request_t *req); int @@ -912,6 +936,15 @@ glusterd_handle_log_rotate (rpcsvc_request_t *req); int glusterd_handle_sync_volume (rpcsvc_request_t *req); +int +glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, glusterd_op_t op); + +int +glusterd_rebalance_cmd_validate (int cmd, char *volname, + glusterd_volinfo_t **volinfo, + char *op_errstr, size_t len); + int32_t glusterd_log_filename (rpcsvc_request_t *req, dict_t *dict); @@ -1207,7 +1240,16 @@ glusterd_should_i_stop_bitd (); int glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo, gf_defrag_status_t status); +/* tier */ int __glusterd_handle_reset_brick (rpcsvc_request_t *req); +int glusterd_op_stage_tier (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_tier_start_stop (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_tier_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict, uint32_t *op_errno); #endif |