diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-rebalance.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 1812 |
1 files changed, 1282 insertions, 530 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 20142a1cc4b..458bf168ede 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1,670 +1,1422 @@ /* - Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <inttypes.h> #include <sys/types.h> #include <unistd.h> #include <sys/resource.h> #include <sys/statvfs.h> -#include "globals.h" -#include "compat.h" +#include <glusterfs/compat.h> #include "protocol-common.h" -#include "xlator.h" -#include "logging.h" -#include "timer.h" +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/timer.h> #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-mgmt.h" +#include "glusterd-messages.h" #include "glusterd-store.h" -#include "run.h" +#include <glusterfs/run.h> #include "glusterd-volgen.h" +#include "glusterd-messages.h" -#include "syscall.h" +#include <glusterfs/syscall.h> #include "cli1-xdr.h" #include "xdr-generic.h" -int32_t -glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe); - -void -glusterd_rebalance_cmd_attempted_log (int cmd, char *volname) -{ - switch (cmd) { - case GF_DEFRAG_CMD_START_LAYOUT_FIX: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start fix layout , attempted", - volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume start layout fix on %s", volname); - break; - case GF_DEFRAG_CMD_START_FORCE: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start data force attempted", - volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume start migrate data on %s", volname); - break; - case GF_DEFRAG_CMD_START: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start, attempted", volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume start on %s", volname); - break; - case GF_DEFRAG_CMD_STOP: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: stop, attempted", volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance " - "volume stop on %s", volname); - break; - default: - break; - } -} - -void -glusterd_rebalance_cmd_log (int cmd, char *volname, int status) -{ - if (cmd != GF_DEFRAG_CMD_STATUS) { - gf_cmd_log ("volume rebalance"," on volname: %s %d %s", - volname, cmd, ((status)?"FAILED":"SUCCESS")); - } -} +#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) \ + do { \ + int32_t _defrag_sockfile_len; \ + char tmppath[PATH_MAX] = { \ + 0, \ + }; \ + _defrag_sockfile_len = snprintf( \ + tmppath, PATH_MAX, \ + DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", "rebalance", \ + volinfo->volname, uuid_utoa(MY_UUID)); \ + if ((_defrag_sockfile_len < 0) || \ + (_defrag_sockfile_len >= PATH_MAX)) { \ + path[0] = 0; \ + } else { \ + glusterd_set_socket_filepath(tmppath, path, sizeof(path)); \ + } \ + } while (0) +int32_t +glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe); int -glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) +glusterd_defrag_start_validate(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, glusterd_op_t op) { - int ret = -1; - - if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, - "rebalance on volume %s already started", - volinfo->volname); - snprintf (op_errstr, len, "Rebalance on %s is already started", - volinfo->volname); - goto out; - } - - if (glusterd_is_rb_started (volinfo) || - glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, - "Rebalance failed as replace brick is in progress on volume %s", - volinfo->volname); - snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " - "volume %s", volinfo->volname); - goto out; - } - ret = 0; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && !gd_is_remove_brick_committed(volinfo)) { + gf_msg_debug(this->name, 0, + "A remove-brick task on " + "volume %s is not yet committed", + volinfo->volname); + snprintf(op_errstr, len, + "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", + volinfo->volname); + goto out; + } + + if (glusterd_is_defrag_on(volinfo)) { + gf_msg_debug(this->name, 0, "rebalance on volume %s already started", + volinfo->volname); + snprintf(op_errstr, len, "Rebalance on %s is already started", + volinfo->volname); + goto out; + } + + ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) +__glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - glusterd_volinfo_t *volinfo = NULL; - glusterd_defrag_info_t *defrag = NULL; - int ret = 0; - char pidfile[PATH_MAX]; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; - if (!priv) - return 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_defrag_info_t *defrag = NULL; + int ret = 0; + char pidfile[PATH_MAX]; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int pid = -1; + + this = THIS; + if (!this) + return 0; - volinfo = mydata; - if (!volinfo) - return 0; + priv = this->private; + if (!priv) + return 0; - defrag = volinfo->defrag; - if (!defrag) - return 0; + volinfo = mydata; + if (!volinfo) + return 0; - if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) - volinfo->defrag = NULL; + defrag = volinfo->rebal.defrag; + if (!defrag) + return 0; - GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) + volinfo->rebal.defrag = NULL; - switch (event) { - case RPC_CLNT_CONNECT: - { - if (defrag->connected) - return 0; + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); - LOCK (&defrag->lock); - { - defrag->connected = 1; - } - UNLOCK (&defrag->lock); + switch (event) { + case RPC_CLNT_CONNECT: { + if (defrag->connected) + return 0; - gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT", - rpc->conn.trans->name); - break; + LOCK(&defrag->lock); + { + defrag->connected = 1; + } + UNLOCK(&defrag->lock); + + gf_msg_debug(this->name, 0, "%s got RPC_CLNT_CONNECT", + rpc->conn.name); + break; } - case RPC_CLNT_DISCONNECT: - { - if (!defrag->connected) - return 0; + case RPC_CLNT_DISCONNECT: { + if (!defrag->connected) + return 0; - LOCK (&defrag->lock); - { - defrag->connected = 0; - } - UNLOCK (&defrag->lock); - - if (!glusterd_is_service_running (pidfile, NULL)) { - if (volinfo->defrag_status == - GF_DEFRAG_STATUS_STARTED) { - volinfo->defrag_status = - GF_DEFRAG_STATUS_FAILED; - } else { - volinfo->defrag_cmd = 0; - } - } - - glusterd_store_perform_node_state_store (volinfo); - - if (defrag->rpc) { - rpc_clnt_unref (defrag->rpc); - defrag->rpc = NULL; + LOCK(&defrag->lock); + { + defrag->connected = 0; + } + UNLOCK(&defrag->lock); + + if (!gf_is_service_running(pidfile, &pid)) { + if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; } - if (defrag->cbk_fn) - defrag->cbk_fn (volinfo, volinfo->defrag_status); + } - if (defrag) - GF_FREE (defrag); - gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", - rpc->conn.trans->name); - break; + glusterd_store_perform_node_state_store(volinfo); + + rpc_clnt_disable(defrag->rpc); + glusterd_defrag_rpc_put(defrag); + if (defrag->cbk_fn) + defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status); + + GF_FREE(defrag); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED, + "Rebalance process for volume %s has disconnected.", + volinfo->volname); + break; } + case RPC_CLNT_DESTROY: + glusterd_volinfo_unref(volinfo); + break; default: - gf_log ("", GF_LOG_TRACE, - "got some other RPC event %d", event); - ret = 0; - break; - } + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + ret = 0; + break; + } - return ret; + return ret; } -int -glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk) +int32_t +glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - int ret = -1; - glusterd_defrag_info_t *defrag = NULL; - runner_t runner = {0,}; - glusterd_conf_t *priv = NULL; - char defrag_path[PATH_MAX]; - char sockfile[PATH_MAX] = {0,}; - char pidfile[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - dict_t *options = NULL; -#ifdef DEBUG - char valgrind_logfile[PATH_MAX] = {0,}; -#endif - priv = THIS->private; - - GF_ASSERT (volinfo); - GF_ASSERT (op_errstr); - - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); - if (ret) - goto out; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) - goto out; - - defrag = volinfo->defrag; - - defrag->cmd = cmd; + return glusterd_big_locked_notify(rpc, mydata, event, data, + __glusterd_defrag_notify); +} - LOCK_INIT (&defrag->lock); +int +glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_defrag_info_t *defrag = NULL; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + char defrag_path[PATH_MAX]; + char sockfile[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char volname[PATH_MAX] = { + 0, + }; + char valgrind_logfile[PATH_MAX] = { + 0, + }; + char msg[1024] = { + 0, + }; + char *volfileserver = NULL; + char *localtime_logging = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO("glusterd", priv, out); + + GF_ASSERT(volinfo); + GF_ASSERT(op_errstr); + + ret = glusterd_defrag_start_validate(volinfo, op_errstr, len, op); + if (ret) + goto out; + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) + goto out; + + defrag = volinfo->rebal.defrag; + + defrag->cmd = cmd; + + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = op; + + LOCK_INIT(&defrag->lock); + + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + + glusterd_volinfo_reset_defrag_stats(volinfo); + glusterd_store_perform_node_state_store(volinfo); + + GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); + ret = mkdir_p(defrag_path, 0755, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Failed to create " + "directory %s", + defrag_path); + goto out; + } + + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + snprintf(logfile, PATH_MAX, "%s/%s-%s.log", priv->logdir, volinfo->volname, + "rebalance"); + runinit(&runner); + + if (this->ctx->cmd_args.vgtool != _gf_none) { + snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", + priv->logdir, volinfo->volname); + + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + + snprintf(volname, sizeof(volname), "rebalance/%s", volinfo->volname); + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) { + volfileserver = "localhost"; + } + + runner_add_args( + &runner, SBIN_DIR "/glusterfs", "-s", volfileserver, "--volfile-id", + volname, "--xlator-option", "*dht.use-readdirp=yes", "--xlator-option", + "*dht.lookup-unhashed=yes", "--xlator-option", + "*dht.assert-no-child-down=yes", "--xlator-option", + "*dht.readdir-optimize=on", "--process-name", "rebalance", NULL); + + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "*dht.rebalance-cmd=%d", cmd); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "*dht.commit-hash=%u", + volinfo->rebal.commit_hash); + runner_add_arg(&runner, "--socket-file"); + runner_argprintf(&runner, "%s", sockfile); + runner_add_arg(&runner, "--pid-file"); + runner_argprintf(&runner, "%s", pidfile); + runner_add_arg(&runner, "-l"); + runner_argprintf(&runner, "%s", logfile); + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), + &localtime_logging) == 0) { + if (strcmp(localtime_logging, "enable") == 0) + runner_add_arg(&runner, "--localtime-logging"); + } + + snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s", + volinfo->volname); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + + ret = runner_run_nowait(&runner); + if (ret) { + gf_msg_debug("glusterd", 0, "rebalance command failed"); + goto out; + } + + sleep(5); + + ret = glusterd_rebalance_rpc_create(volinfo); + + // FIXME: this cbk is passed as NULL in all occurrences. May be + // we never needed it. + if (cbk) + defrag->cbk_fn = cbk; - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; - volinfo->rebalance_failures = 0; - volinfo->rebalance_time = 0; +int +glusterd_rebalance_defrag_init(glusterd_volinfo_t *volinfo, defrag_cbk_fn_t cbk) - volinfo->defrag_cmd = cmd; - glusterd_store_perform_node_state_store (volinfo); +{ + glusterd_defrag_info_t *defrag = NULL; + int ret = -1; + + if (!volinfo->rebal.defrag) { + volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + } else { + /* + * if defrag variable is already initialized, + * we skip the initialization. + */ + ret = 0; + goto out; + } + + if (!volinfo->rebal.defrag) + goto out; + defrag = volinfo->rebal.defrag; + + defrag->cmd = volinfo->rebal.defrag_cmd; + LOCK_INIT(&defrag->lock); + if (cbk) + defrag->cbk_fn = cbk; + ret = 0; +out: + return ret; +} - GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); - ret = mkdir_p (defrag_path, 0777, _gf_true); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to create " - "directory %s", defrag_path); - goto out; - } +int +glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) +{ + dict_t *options = NULL; + char sockfile[PATH_MAX] = { + 0, + }; + int ret = -1; + glusterd_defrag_info_t *defrag = volinfo->rebal.defrag; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + // rebalance process is not started + if (!defrag) + goto out; + + options = dict_new(); + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build(options, sockfile, 600); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL, + "Unix options build failed"); + goto out; + } + + glusterd_volinfo_ref(volinfo); + ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify, + volinfo, _gf_true); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL, + "Glusterd RPC creation failed"); + goto out; + } + ret = 0; +out: + if (options) + dict_unref(options); + return ret; +} - GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); - GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv); - snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", - DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); - runinit (&runner); -#ifdef DEBUG - if (priv->valgrind) { - snprintf (valgrind_logfile, PATH_MAX, - "%s/valgrind-%s-rebalance.log", - DEFAULT_LOG_FILE_DIRECTORY, - volinfo->volname); - - runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); - runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); - } -#endif - - runner_add_args (&runner, SBIN_DIR"/glusterfs", - "-s", "localhost", "--volfile-id", volinfo->volname, - "--xlator-option", "*dht.use-readdirp=yes", - "--xlator-option", "*dht.lookup-unhashed=yes", - "--xlator-option", "*dht.assert-no-child-down=yes", - "--xlator-option", "*replicate*.data-self-heal=off", - "--xlator-option", - "*replicate*.metadata-self-heal=off", - "--xlator-option", "*replicate*.entry-self-heal=off", - NULL); - runner_add_arg (&runner, "--xlator-option"); - runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); - runner_add_arg (&runner, "--xlator-option"); - runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(priv->uuid)); - runner_add_arg (&runner, "--socket-file"); - runner_argprintf (&runner, "%s",sockfile); - runner_add_arg (&runner, "--pid-file"); - runner_argprintf (&runner, "%s",pidfile); - runner_add_arg (&runner, "-l"); - runner_argprintf (&runner, logfile); - if (volinfo->memory_accounting) - runner_add_arg (&runner, "--mem-accounting"); - - ret = runner_run_reuse (&runner); - if (ret) { - runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed"); - runner_end (&runner); - goto out; - } +int +glusterd_rebalance_cmd_validate(int cmd, char *volname, + glusterd_volinfo_t **volinfo, char *op_errstr, + size_t len) +{ + int ret = -1; + + if (glusterd_volinfo_find(volname, volinfo)) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Received rebalance on invalid" + " volname %s", + volname); + snprintf(op_errstr, len, "Volume %s does not exist", volname); + goto out; + } + if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_DISTRIBUTE, + "Volume %s is not a " + "distribute type or contains only 1 brick", + volname); + snprintf(op_errstr, len, + "Volume %s is not a distribute " + "volume or contains only 1 brick.\n" + "Not performing rebalance", + volname); + goto out; + } + + if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_STOPPED, + "Received rebalance on stopped" + " volname %s", + volname); + snprintf(op_errstr, len, + "Volume %s needs to " + "be started to perform rebalance", + volname); + goto out; + } + + ret = 0; - sleep (5); - ret = rpc_clnt_transport_unix_options_build (&options, sockfile); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); - goto out; - } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - ret = glusterd_rpc_create (&defrag->rpc, options, - glusterd_defrag_notify, volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); - goto out; +int +__glusterd_handle_defrag_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + glusterd_conf_t *priv = NULL; + int32_t op = GD_OP_NONE; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; + char msg[2048] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; } - - if (cbk) - defrag->cbk_fn = cbk; - + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + (int32_t *)&cmd); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get command"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + ret = dict_set_static_bin(dict, "node-uuid", MY_UUID, 16); + if (ret) + goto out; + + if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) { + op = GD_OP_DEFRAG_BRICK_VOLUME; + } else + op = GD_OP_REBALANCE; + + if (priv->op_version < GD_OP_VERSION_6_0) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", + GD_OP_VERSION_6_0); + ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg)); + glusterd_friend_sm(); + glusterd_op_sm(); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, op, + dict); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret) { + if (msg[0] == '\0') + snprintf(msg, sizeof(msg), "Operation failed"); + ret = glusterd_op_send_cli_response(GD_OP_REBALANCE, ret, 0, req, dict, + msg); + } + + free(cli_req.dict.dict_val); // malloced by xdr + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } - int -glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, - glusterd_conf_t *priv, int cmd) +glusterd_handle_defrag_volume(rpcsvc_request_t *req) { - dict_t *options = NULL; - char sockfile[PATH_MAX] = {0,}; - int ret = -1; - glusterd_defrag_info_t *defrag = NULL; - - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) - goto out; - - defrag = volinfo->defrag; + return glusterd_big_locked_handler(req, __glusterd_handle_defrag_volume); +} - defrag->cmd = cmd; +static int +glusterd_brick_validation(dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = data; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_volume_brickinfo_get_by_brick(value->data, volinfo, + &brickinfo, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND, + "Incorrect brick %s for " + "volume %s", + value->data, volinfo->volname); + return ret; + } + + if (!brickinfo->decommissioned) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND, + "Incorrect brick %s for " + "volume %s", + value->data, volinfo->volname); + ret = -1; + return ret; + } - LOCK_INIT (&defrag->lock); + return ret; +} - GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); - ret = rpc_clnt_transport_unix_options_build (&options, sockfile); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); - goto out; +int +glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict) +{ + int ret = -1; + int32_t cmd = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(rsp_dict); + GF_ASSERT(req_dict); + + ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = dict_get_int32n(rsp_dict, "rebalance-command", + SLEN("rebalance-command"), &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "failed to validate"); + goto out; + } + + /* reblance id is generted in glusterd_mgmt_v3_op_stage_rebalance(), but + * rsp_dict is unavailable there. So copying it to rsp_dict from req_dict + * here. So that cli can display the rebalance id.*/ + if ((cmd == GF_DEFRAG_CMD_START) || + (cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX) || + (cmd == GF_DEFRAG_CMD_START_FORCE)) { + if (is_origin_glusterd(rsp_dict)) { + ret = dict_get_strn(req_dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance-id"); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_REBALANCE_ID_MISSING, "%s", msg); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + rsp_dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to set rebalance id for volume %s", + volname); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_DICT_SET_FAILED, "%s", msg); + } + } } - - ret = glusterd_rpc_create (&defrag->rpc, options, - glusterd_defrag_notify, volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); + } + + /* Set task-id, if available, in rsp_dict for operations other than + * start. This is needed when we want rebalance id in xml output + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict( + volinfo->rebal.rebalance_id, rsp_dict, + GF_REMOVE_BRICK_TID_KEY, SLEN(GF_REMOVE_BRICK_TID_KEY)); + else + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + rsp_dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set task-id for volume %s", volname); goto out; + } } - ret = 0; + } out: - return ret; + return ret; } int -glusterd_rebalance_cmd_validate (int cmd, char *volname, - glusterd_volinfo_t **volinfo, - char *op_errstr, size_t len) +glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr) { - int ret = -1; - - if (glusterd_volinfo_find(volname, volinfo)) { - gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid" - " volname %s", volname); - snprintf (op_errstr, len, "Volume %s does not exist", - volname); + char *volname = NULL; + char *cmd_str = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "failed to validate"); + goto out; + } + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + /* Check if the connected clients are all of version + * glusterfs-3.6 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support( + volname, GD_OP_VERSION_3_6_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " + "more connected clients of a version" + " lower than GlusterFS-v3.6.0. " + "Starting rebalance in this state " + "could lead to data loss.\nPlease " + "disconnect those clients before " + "attempting this command again.", + volname); goto out; - } - if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) { - gf_log ("glusterd", GF_LOG_ERROR, "Volume %s is not a " - "distribute type or contains only 1 brick", volname); - snprintf (op_errstr, len, "Volume %s is not a distribute " - "volume or contains only 1 brick.\n" - "Not performing rebalance", volname); + } + /* Fall through */ + case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd(dict)) { + ret = glusterd_generate_and_set_task_id( + dict, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance-id"); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_REBALANCE_ID_MISSING, "%s", msg); + ret = 0; + } + } + ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg), + GD_OP_REBALANCE); + if (ret) { + gf_msg_debug(this->name, 0, + "defrag start validate " + "failed for volume %s.", + volinfo->volname); goto out; - } + } + break; + case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STOP: - if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) { - gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped" - " volname %s", volname); - snprintf (op_errstr, len, "Volume %s needs to " - "be started to perform rebalance", volname); + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "command string"); + ret = -1; goto out; - } + } + if ((strstr(cmd_str, "rebalance") != NULL) && + (volinfo->rebal.op != GD_OP_REBALANCE)) { + snprintf(msg, sizeof(msg), + "Rebalance not started " + "for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } + + if (strstr(cmd_str, "remove-brick") != NULL) { + if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { + snprintf(msg, sizeof(msg), + "remove-brick not " + "started for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } - ret = 0; + /* For remove-brick status/stop command check whether + * given input brick is part of volume or not.*/ + + ret = dict_foreach_fnmatch(dict, "brick*", + glusterd_brick_validation, volinfo); + if (ret == -1) { + snprintf(msg, sizeof(msg), + "Incorrect brick" + " for volume %s", + volinfo->volname); + goto out; + } + } + break; + + default: + break; + } + ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); + + return ret; } int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - int32_t ret = -1; - gf_cli_req cli_req = {{0,}}; - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - char *volname = NULL; - gf_cli_defrag_type cmd = 0; - - GF_ASSERT (req); - - priv = THIS->private; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + xlator_t *this = NULL; + uint32_t commit_hash; + int32_t is_force = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not given"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "command not given"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "cmd validate failed"); + goto out; + } + + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_FORCE: - if (!xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf_cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; + ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force); + if (ret) + is_force = 0; + if (!is_force) { + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; } - } + if (!gd_should_i_start_rebalance(volinfo)) { + /* Store the rebalance-id and rebalance command + * even if the peer isn't starting a rebalance + * process. On peers where a rebalance process + * is started, glusterd_handle_defrag_start + * performs the storing. + * Storing this is needed for having + * 'volume status' work correctly. + */ + glusterd_store_perform_node_state_store(volinfo); + break; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg), + cmd, NULL, GD_OP_REBALANCE); + break; + } else { + /* Reset defrag status to 'STARTED' so that the + * pid is checked and restarted accordingly. + * If the pid is not running it executes the + * "NOT_STARTED" case and restarts the process + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = GD_OP_REBALANCE; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_restart_rebalance_for_volume(volinfo); + break; + } + case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + gf_uuid_clear(volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + + /* Fall back to the old volume file in case of decommission*/ + cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, + brick_list) + { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + volfile_update = _gf_true; + } + + if (volfile_update == _gf_false) { + ret = 0; + break; + } - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Failed to get volname"); + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles"); goto out; - } + } - ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Failed to get command"); + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, + "failed to store volinfo"); goto out; - } - - glusterd_rebalance_cmd_attempted_log (cmd, volname); + } - ret = dict_set_static_bin (dict, "node-uuid", priv->uuid, 16); - if (ret) - goto out; + ret = 0; + break; - if ((cmd == GF_DEFRAG_CMD_STATUS) || - (cmd == GF_DEFRAG_CMD_STOP)) { - ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, - dict); - } else - ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict); + case GF_DEFRAG_CMD_STATUS: + break; + default: + break; + } out: + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); - glusterd_friend_sm (); - glusterd_op_sm (); - - if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, req, - NULL, "operation failed"); - } - - if (cli_req.dict.dict_val) - free (cli_req.dict.dict_val);//malloced by xdr - - return 0; + return ret; } - int -glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) +glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not found"); - goto out; - } - ret = dict_get_int32 (dict, "rebalance-command", &cmd); - if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd not found"); - goto out; - } - - ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, - msg, sizeof (msg)); - if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "failed to validate"); - goto out; - } - switch (cmd) { + char *volname = NULL; + char *cmd_str = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "failed to validate"); + goto out; + } + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: + /* Check if the connected clients are all of version + * glusterfs-3.6 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support( + volname, GD_OP_VERSION_3_6_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " + "more connected clients of a version" + " lower than GlusterFS-v3.6.0. " + "Starting rebalance in this state " + "could lead to data loss.\nPlease " + "disconnect those clients before " + "attempting this command again.", + volname); + goto out; + } + /* Fall through */ case GF_DEFRAG_CMD_START_FORCE: - ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + if (is_origin_glusterd(dict)) { + op_ctx = glusterd_op_get_ctx(); + if (!op_ctx) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id( + op_ctx, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, - "start validate failed"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; } - break; + } else { + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance-id"); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_REBALANCE_ID_MISSING, "%s", msg); + ret = 0; + } + } + ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg), + GD_OP_REBALANCE); + if (ret) { + gf_msg_debug(this->name, 0, + "defrag start validate " + "failed for volume %s.", + volinfo->volname); + goto out; + } + break; case GF_DEFRAG_CMD_STATUS: case GF_DEFRAG_CMD_STOP: - break; + + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "command string"); + ret = -1; + goto out; + } + if ((strstr(cmd_str, "rebalance") != NULL) && + (volinfo->rebal.op != GD_OP_REBALANCE)) { + snprintf(msg, sizeof(msg), + "Rebalance not started " + "for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } + + if (strstr(cmd_str, "remove-brick") != NULL) { + if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { + snprintf(msg, sizeof(msg), + "remove-brick not " + "started for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } + + /* For remove-brick status/stop command check whether + * given input brick is part of volume or not.*/ + + ret = dict_foreach_fnmatch(dict, "brick*", + glusterd_brick_validation, volinfo); + if (ret == -1) { + snprintf(msg, sizeof(msg), + "Incorrect brick" + " for volume %s", + volinfo->volname); + goto out; + } + } + break; + default: - break; - } + break; + } - ret = 0; + ret = 0; out: - if (ret && op_errstr && msg[0]) - *op_errstr = gf_strdup (msg); + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); - return ret; + return ret; } - int -glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - gf_boolean_t volfile_update = _gf_false; - - priv = THIS->private; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not given"); + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + uint32_t commit_hash; + int32_t is_force = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not given"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "command not given"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "cmd validate failed"); + goto out; + } + + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Failed to get op_ctx"); + ret = -1; goto out; - } - - ret = dict_get_int32 (dict, "rebalance-command", &cmd); - if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "command not given"); - goto out; - } - - ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, - msg, sizeof (msg)); - if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd validate failed"); + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + ctx, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + else + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + ctx, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to set task-id"); goto out; + } } + } - switch (cmd) { + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cmd, NULL); - break; - case GF_DEFRAG_CMD_STOP: - /* Fall back to the old volume file in case of decommission*/ - list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, - brick_list) { - if (!brickinfo->decommissioned) - continue; - brickinfo->decommissioned = 0; - volfile_update = _gf_true; - } - if (volfile_update == _gf_false) { - ret = 0; - break; - } - - ret = glusterd_create_volfiles_and_notify_services (volinfo); + ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force); + if (ret) + is_force = 0; + if (!is_force) { + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, - "failed to create volfiles"); - goto out; + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; } - - ret = glusterd_store_volinfo (volinfo, - GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (!gd_should_i_start_rebalance(volinfo)) { + /* Store the rebalance-id and rebalance command + * even if the peer isn't starting a rebalance + * process. On peers where a rebalance process + * is started, glusterd_handle_defrag_start + * performs the storing. + * Storing this is needed for having + * 'volume status' work correctly. + */ + glusterd_store_perform_node_state_store(volinfo); + break; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg), + cmd, NULL, GD_OP_REBALANCE); + break; + } else { + /* Reset defrag status to 'STARTED' so that the + * pid is checked and restarted accordingly. + * If the pid is not running it executes the + * "NOT_STARTED" case and restarts the process + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = GD_OP_REBALANCE; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, - "failed to store volinfo"); - goto out; + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; } - + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_restart_rebalance_for_volume(volinfo); + break; + } + case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + gf_uuid_clear(volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + + /* Fall back to the old volume file in case of decommission*/ + cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, + brick_list) + { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + volfile_update = _gf_true; + } + + if (volfile_update == _gf_false) { ret = 0; break; + } + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, + "failed to store volinfo"); + goto out; + } + + ret = 0; + break; case GF_DEFRAG_CMD_STATUS: - break; + break; default: - break; - } - - glusterd_rebalance_cmd_log (cmd, volname, ret); + break; + } out: - if (ret && op_errstr && msg[0]) - *op_errstr = gf_strdup (msg); + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); - return ret; + return ret; } int32_t -glusterd_defrag_event_notify_handle (dict_t *dict) +glusterd_defrag_event_notify_handle(dict_t *dict) { - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; - int32_t ret = -1; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Failed to get volname"); - return ret; - } + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + char *volname_ptr = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volname"); + return ret; + } + + volname_ptr = strstr(volname, "rebalance/"); + if (volname_ptr) { + volname_ptr = strchr(volname_ptr, '/'); + volname = volname_ptr + 1; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, + "volname received (%s) is not prefixed with rebalance.", + volname); + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo for %s", volname); + return ret; + } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Failed to get volinfo for %s" - , volname); - return ret; - } + ret = glusterd_defrag_volume_status_update(volinfo, dict, 0); - ret = glusterd_defrag_volume_status_update (volinfo, dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, + "Failed to update status"); + gf_event(EVENT_REBALANCE_STATUS_UPDATE_FAILED, "volume=%s", + volinfo->volname); + } - if (ret) - gf_log ("", GF_LOG_ERROR, "Failed to update status"); - return ret; +out: + return ret; } |
