diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-rebalance.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 447 |
1 files changed, 261 insertions, 186 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 7e35f06dd..b7b974c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -48,61 +38,31 @@ #include "xdr-generic.h" int32_t -glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov, +glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe); - -void -glusterd_rebalance_cmd_attempted_log (int cmd, char *volname) -{ - switch (cmd) { - case GF_DEFRAG_CMD_START_LAYOUT_FIX: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start fix layout , attempted", - volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume start layout fix on %s", volname); - break; - case GF_DEFRAG_CMD_START_FORCE: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start data force attempted", - volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume start migrate data on %s", volname); - break; - case GF_DEFRAG_CMD_START: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start, attempted", volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume start on %s", volname); - break; - case GF_DEFRAG_CMD_STOP: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: stop, attempted", volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume stop on %s", volname); - break; - default: - break; - } -} - -void -glusterd_rebalance_cmd_log (int cmd, char *volname, int status) -{ - if (cmd != GF_DEFRAG_CMD_STATUS) { - gf_cmd_log ("volume rebalance"," on volname: %s %d %s", - volname, cmd, ((status)?"FAILED":"SUCCESS")); - } -} - int glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) + size_t len, glusterd_op_t op) { - int ret = -1; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); + goto out; + } if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "rebalance on volume %s already started", volinfo->volname); snprintf (op_errstr, len, "Rebalance on %s is already started", @@ -112,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Rebalance failed as replace brick is in progress on volume %s", volinfo->volname); snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " @@ -121,13 +81,14 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, } ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int32_t -glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) +__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { glusterd_volinfo_t *volinfo = NULL; glusterd_defrag_info_t *defrag = NULL; @@ -143,12 +104,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, if (!volinfo) return 0; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; if (!defrag) return 0; if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) - volinfo->defrag = NULL; + volinfo->rebal.defrag = NULL; GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); @@ -181,27 +142,24 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, UNLOCK (&defrag->lock); if (!glusterd_is_service_running (pidfile, NULL)) { - if (volinfo->defrag_status == - GF_DEFRAG_STATUS_STARTED) { - volinfo->defrag_status = - GF_DEFRAG_STATUS_FAILED; - } else { - volinfo->defrag_cmd = 0; + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_FAILED; } } - glusterd_store_volinfo (volinfo, - GLUSTERD_VOLINFO_VER_AC_INCREMENT); + glusterd_store_perform_node_state_store (volinfo); if (defrag->rpc) { rpc_clnt_unref (defrag->rpc); defrag->rpc = NULL; } if (defrag->cbk_fn) - defrag->cbk_fn (volinfo, volinfo->defrag_status); + defrag->cbk_fn (volinfo, + volinfo->rebal.defrag_status); - if (defrag) - GF_FREE (defrag); + GF_FREE (defrag); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", rpc->conn.trans->name); break; @@ -216,66 +174,65 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, return ret; } +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, + data, __glusterd_defrag_notify); +} + int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk) + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) { int ret = -1; glusterd_defrag_info_t *defrag = NULL; runner_t runner = {0,}; glusterd_conf_t *priv = NULL; char defrag_path[PATH_MAX]; - struct stat buf = {0,}; char sockfile[PATH_MAX] = {0,}; char pidfile[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; dict_t *options = NULL; -#ifdef DEBUG char valgrind_logfile[PATH_MAX] = {0,}; -#endif + priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (op_errstr); - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); if (ret) goto out; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; - LOCK_INIT (&defrag->lock); + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = op; - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; + LOCK_INIT (&defrag->lock); - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; - volinfo->rebalance_failures = 0; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; - volinfo->defrag_cmd = cmd; - glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + glusterd_volinfo_reset_defrag_stats (volinfo); + glusterd_store_perform_node_state_store (volinfo); GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); - ret = stat (defrag_path, &buf); - if (ret && (errno == ENOENT)) { - runinit (&runner); - runner_add_args (&runner, "mkdir", "-p", defrag_path, NULL); - ret = runner_run_reuse (&runner); - if (ret) { - runner_log (&runner, "glusterd", GF_LOG_DEBUG, - "command failed"); - runner_end (&runner); - goto out; - } - runner_end (&runner); + ret = mkdir_p (defrag_path, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create " + "directory %s", defrag_path); + goto out; } GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); @@ -283,7 +240,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); runinit (&runner); -#ifdef DEBUG + if (priv->valgrind) { snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", @@ -291,21 +248,27 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, volinfo->volname); runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); + "--trace-children=yes", "--track-origins=yes", + NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } -#endif runner_add_args (&runner, SBIN_DIR"/glusterfs", "-s", "localhost", "--volfile-id", volinfo->volname, "--xlator-option", "*dht.use-readdirp=yes", "--xlator-option", "*dht.lookup-unhashed=yes", "--xlator-option", "*dht.assert-no-child-down=yes", + "--xlator-option", "*replicate*.data-self-heal=off", + "--xlator-option", + "*replicate*.metadata-self-heal=off", + "--xlator-option", "*replicate*.entry-self-heal=off", + "--xlator-option", "*replicate*.readdir-failover=off", + "--xlator-option", "*dht.readdir-optimize=on", NULL); runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); runner_add_arg (&runner, "--xlator-option"); - runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(priv->uuid)); + runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); runner_add_arg (&runner, "--socket-file"); runner_argprintf (&runner, "%s",sockfile); runner_add_arg (&runner, "--pid-file"); @@ -315,22 +278,29 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, if (volinfo->memory_accounting) runner_add_arg (&runner, "--mem-accounting"); - ret = runner_run_reuse (&runner); + ret = runner_run_nowait (&runner); if (ret) { - runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed"); - runner_end (&runner); + gf_log ("glusterd", GF_LOG_DEBUG, "rebalance command failed"); goto out; } sleep (5); - ret = rpc_clnt_transport_unix_options_build (&options, sockfile); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -354,27 +324,37 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, int ret = -1; glusterd_defrag_info_t *defrag = NULL; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; LOCK_INIT (&defrag->lock); GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); - ret = rpc_clnt_transport_unix_options_build (&options, sockfile); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -423,26 +403,32 @@ out: } int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +__glusterd_handle_defrag_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf_cli_req cli_req = {{0,}}; - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - char *volname = NULL; - gf_cli_defrag_type cmd = 0; + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; + char msg[2048] = {0,}; + xlator_t *this = NULL; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); - priv = THIS->private; + priv = this->private; + GF_ASSERT (priv); - if (!xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf_cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - if (cli_req.dict.dict_len) { + + if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new (); @@ -450,39 +436,39 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); goto out; } } ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Failed to get volname"); + snprintf (msg, sizeof (msg), "Failed to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Failed to get command"); + snprintf (msg, sizeof (msg), "Failed to get command"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } - glusterd_rebalance_cmd_attempted_log (cmd, volname); - - ret = dict_set_static_bin (dict, "node-uuid", priv->uuid, 16); + ret = dict_set_static_bin (dict, "node-uuid", MY_UUID, 16); if (ret) goto out; if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) { ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, - dict); + dict, msg, sizeof (msg)); } else - ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict); + ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict, + msg, sizeof (msg)); out: @@ -490,54 +476,94 @@ out: glusterd_op_sm (); if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, req, - NULL, "operation failed"); + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, + req, dict, msg); + } - if (cli_req.dict.dict_val) - free (cli_req.dict.dict_val);//malloced by xdr + free (cli_req.dict.dict_val);//malloced by xdr return 0; } +int +glusterd_handle_defrag_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume); +} + int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not found"); + gf_log (this->name, GF_LOG_DEBUG, "volname not found"); goto out; } + ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd not found"); + gf_log (this->name, GF_LOG_DEBUG, "cmd not found"); goto out; } ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "failed to validate"); + gf_log (this->name, GF_LOG_DEBUG, "failed to validate"); goto out; } switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + if (is_origin_glusterd (dict)) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (op_ctx, GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, + &task_id_str); + if (ret) { + snprintf (msg, sizeof (msg), + "Missing rebalance-id"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = 0; + } + } + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, - "start validate failed"); + gf_log (this->name, GF_LOG_DEBUG, + "start validate failed"); goto out; } break; @@ -560,45 +586,96 @@ out: int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - gf_boolean_t volfile_update = _gf_false; - - priv = THIS->private; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not given"); + gf_log (this->name, GF_LOG_DEBUG, "volname not given"); goto out; } ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "command not given"); + gf_log (this->name, GF_LOG_DEBUG, "command not given"); goto out; } + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd validate failed"); + gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed"); goto out; } + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + ret = -1; + goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REMOVE_BRICK_TID_KEY); + else + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set task-id"); + goto out; + } + } + } + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " + "id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REBALANCE; + } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cmd, NULL); + cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + /* Fall back to the old volume file in case of decommission*/ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { @@ -615,7 +692,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -623,7 +700,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } @@ -637,8 +714,6 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) break; } - glusterd_rebalance_cmd_log (cmd, volname, ret); - out: if (ret && op_errstr && msg[0]) *op_errstr = gf_strdup (msg); |
