/*
   Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
   This file is part of GlusterFS.

   This file is licensed to you under your choice of the GNU Lesser
   General Public License, version 3 or any later version (LGPLv3 or
   later), or the GNU General Public License, version 2 (GPLv2), in all
   cases as published by the Free Software Foundation.
 */

#include "common-utils.h"
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-store.h"
#include "glusterd-geo-rep.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "run.h"
#include "syscall.h"
#include "byte-order.h"
#include "glusterd-svc-helper.h"
#include "compat-errno.h"
#include "glusterd-tierd-svc.h"
#include "glusterd-tierd-svc-helper.h"
#include "glusterd-messages.h"
#include "glusterd-mgmt.h"
#include "glusterd-syncop.h"

#include <sys/wait.h>
#include <dlfcn.h>

extern struct rpc_clnt_program gd_brick_prog;

const char *gd_tier_op_list[GF_DEFRAG_CMD_TYPE_MAX] = {
        [GF_DEFRAG_CMD_START_TIER]            = "start",
        [GF_DEFRAG_CMD_STOP_TIER]             = "stop",
};

int
__glusterd_handle_tier (rpcsvc_request_t *req)
{
        int32_t                         ret             = -1;
        gf_cli_req                      cli_req         = { {0,} };
        dict_t                         *dict            = NULL;
        glusterd_op_t                   cli_op          = GD_OP_TIER_START_STOP;
        char                           *volname         = NULL;
        int32_t                         cmd             = 0;
        char                            msg[2048]       = {0,};
        xlator_t                       *this            = NULL;
        glusterd_conf_t                *conf            = NULL;
        glusterd_volinfo_t              *volinfo        = NULL;
        char                            err_str[2048]   = {0};


        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, req, out);

        conf = this->private;
        GF_VALIDATE_OR_GOTO (this->name, conf, out);

        ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
        if (ret < 0) {
                req->rpc_err = GARBAGE_ARGS;
                goto out;
        }

        if (cli_req.dict.dict_len) {
                /* Unserialize the dictionary */
                dict  = dict_new ();

                ret = dict_unserialize (cli_req.dict.dict_val,
                                        cli_req.dict.dict_len,
                                        &dict);
                if (ret < 0) {
                        gf_msg (this->name, GF_LOG_ERROR, 0,
                                GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
                                "unserialize req-buffer to dictionary");
                        snprintf (msg, sizeof (msg), "Unable to decode the "
                                  "command");
                        goto out;
                } else {
                        dict->extra_stdfree = cli_req.dict.dict_val;
                }
        }

        ret = dict_get_str (dict, "volname", &volname);
        if (ret) {
                snprintf (msg, sizeof (msg), "Unable to get volume name");
                gf_msg (this->name, GF_LOG_ERROR, errno,
                        GD_MSG_DICT_GET_FAILED, "Unable to get volume name, "
                        "while handling tier command");
                goto out;
        }

        ret = dict_get_int32 (dict, "rebalance-command", &cmd);
        if (ret) {
                snprintf (msg, sizeof (msg), "Unable to get the command");
                gf_msg (this->name, GF_LOG_ERROR, errno,
                        GD_MSG_DICT_GET_FAILED, "Unable to get the cmd");
                goto out;
        }

        if (conf->op_version < GD_OP_VERSION_3_7_0) {
                snprintf (msg, sizeof (msg), "Cannot execute command. The "
                          "cluster is operating at version %d. Tier command "
                          "%s is unavailable in this version", conf->op_version,
                          gd_tier_op_list[cmd]);
                ret = -1;
                goto out;
        }

        if (conf->op_version < GD_OP_VERSION_3_10_0) {
                gf_msg_debug (this->name, 0, "The cluster is operating at "
                              "version less than or equal to %d. Falling back "
                              "to syncop framework.",
                              GD_OP_VERSION_3_7_5);
                switch (cmd) {
                case GF_DEFRAG_CMD_DETACH_STOP:
                        ret = dict_set_int32 (dict, "rebalance-command",
                                              GF_DEFRAG_CMD_STOP_DETACH_TIER);
                        break;

                case GF_DEFRAG_CMD_DETACH_COMMIT:
                        ret = glusterd_volinfo_find (volname, &volinfo);
                        if (ret) {
                                snprintf (err_str, sizeof (err_str), "Volume "
                                          "%s does not exist", volname);
                                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
                                        GD_MSG_VOL_NOT_FOUND, "%s", err_str);
                                goto out;
                        }
                        ret = glusterd_set_detach_bricks (dict, volinfo);
                        ret = dict_set_int32 (dict, "command",
                                              GF_OP_CMD_DETACH_COMMIT);
                        break;
                case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
                        ret = glusterd_volinfo_find (volname, &volinfo);
                        if (ret) {
                                snprintf (err_str, sizeof (err_str), "Volume "
                                          "%s does not exist", volname);
                                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
                                        GD_MSG_VOL_NOT_FOUND, "%s", err_str);
                                goto out;
                        }
                        ret = glusterd_set_detach_bricks (dict, volinfo);
                        ret = dict_set_int32 (dict, "command",
                                              GF_OP_CMD_DETACH_COMMIT_FORCE);
                        break;
                case GF_DEFRAG_CMD_DETACH_START:
                        ret = glusterd_volinfo_find (volname, &volinfo);
                        if (ret) {
                                snprintf (err_str, sizeof (err_str), "Volume "
                                          "%s does not exist", volname);
                                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
                                        GD_MSG_VOL_NOT_FOUND, "%s", err_str);
                                goto out;
                        }
                        ret = glusterd_set_detach_bricks (dict, volinfo);
                        ret = dict_set_int32 (dict, "command",
                                              GF_OP_CMD_DETACH_START);
                        break;

                default:
                        break;

                }
                if (ret) {
                        gf_log (this->name, GF_LOG_ERROR,
                                "Failed to set dict");
                        goto out;
                }
                if ((cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
                    (cmd == GF_DEFRAG_CMD_DETACH_STATUS) ||
                    (cmd == GF_DEFRAG_CMD_START_TIER) ||
                    (cmd == GF_DEFRAG_CMD_DETACH_STOP)) {
                        ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
                                                 dict, msg, sizeof (msg));
                } else
                        ret = glusterd_op_begin (req, GD_OP_REMOVE_BRICK, dict,
                                                 msg, sizeof (msg));

                glusterd_friend_sm ();
                glusterd_op_sm ();

        } else {
                switch (cmd) {
                case GF_DEFRAG_CMD_STATUS_TIER:
                        cli_op = GD_OP_TIER_STATUS;
                        break;

                case GF_DEFRAG_CMD_DETACH_STATUS:
                        cli_op = GD_OP_DETACH_TIER_STATUS;
                        break;

                case GF_DEFRAG_CMD_DETACH_STOP:
                        cli_op = GD_OP_REMOVE_TIER_BRICK;
                        break;

                case GF_DEFRAG_CMD_DETACH_COMMIT:
                case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
                case GF_DEFRAG_CMD_DETACH_START:
                        cli_op = GD_OP_REMOVE_TIER_BRICK;
                        ret = glusterd_volinfo_find (volname, &volinfo);
                        if (ret) {
                                snprintf (err_str, sizeof (err_str), "Volume "
                                          "%s does not exist", volname);
                                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
                                        GD_MSG_VOL_NOT_FOUND, "%s", err_str);
                                goto out;
                        }
                        ret = glusterd_set_detach_bricks (dict, volinfo);
                        break;

                default:
                        break;
                }
                if (ret < 0) {
                        gf_msg (this->name, GF_LOG_ERROR, 0,
                                GD_MSG_DICT_SET_FAILED, "dict set failed");
                        goto out;
                }
                ret = glusterd_mgmt_v3_initiate_all_phases (req,
                                                            cli_op,
                                                            dict);
        }

out:
        if (ret) {
                if (msg[0] == '\0')
                        snprintf (msg, sizeof (msg), "Tier operation failed");
                ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
                                                     dict, msg);
        }

        return ret;
}

int
glusterd_handle_tier (rpcsvc_request_t *req)
{
        return glusterd_big_locked_handler (req, __glusterd_handle_tier);
}


static int
glusterd_manage_tier (glusterd_volinfo_t *volinfo, int opcode)
{
        int              ret   = -1;
        xlator_t         *this = NULL;
        glusterd_conf_t  *priv = NULL;

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);

        switch (opcode) {
        case GF_DEFRAG_CMD_START_TIER:
        case GF_DEFRAG_CMD_STOP_TIER:
                ret = volinfo->tierd.svc.manager (&(volinfo->tierd.svc),
                                                volinfo, PROC_START_NO_WAIT);
                break;
        default:
                ret = 0;
                break;
        }

out:
        return ret;

}

static int
glusterd_tier_enable (glusterd_volinfo_t *volinfo, char **op_errstr)
{
        int32_t                 ret                     = -1;
        xlator_t                *this                   = NULL;
        int32_t                 tier_online             = -1;
        char                    pidfile[PATH_MAX]       = {0};
        int32_t                 pid                     = -1;
        glusterd_conf_t         *priv                   = NULL;

        this = THIS;

        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
        GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);

        if (glusterd_is_volume_started (volinfo) == 0) {
                *op_errstr = gf_strdup ("Volume is stopped, start volume "
                                        "to enable tier.");
                ret = -1;
                goto out;
        }

        GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
        tier_online = gf_is_service_running (pidfile, &pid);

        if (tier_online) {
                *op_errstr = gf_strdup ("tier is already enabled");
                ret = -1;
                goto out;
        }

        volinfo->is_tier_enabled = _gf_true;

        ret = 0;
out:
        if (ret && op_errstr && !*op_errstr)
                gf_asprintf (op_errstr, "Enabling tier on volume %s has been "
                             "unsuccessful", volinfo->volname);
        return ret;
}

static int
glusterd_tier_disable (glusterd_volinfo_t *volinfo, char **op_errstr)
{
        int32_t                 ret                     = -1;
        xlator_t                *this                   = NULL;
        int32_t                 tier_online             = -1;
        char                    pidfile[PATH_MAX]       = {0};
        int32_t                 pid                     = -1;
        glusterd_conf_t         *priv                   = NULL;

        this = THIS;

        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
        GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
        priv = this->private;

        GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
        tier_online = gf_is_service_running (pidfile, &pid);

        if (!tier_online) {
                *op_errstr = gf_strdup ("tier is already disabled");
                ret = -1;
                goto out;
        }

        volinfo->is_tier_enabled = _gf_false;

        ret = 0;
out:
        if (ret && op_errstr && !*op_errstr)
                gf_asprintf (op_errstr, "Disabling tier volume %s has "
                             "been unsuccessful", volinfo->volname);
        return ret;
}

int
glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
        glusterd_conf_t        *priv           = NULL;
        xlator_t               *this           = NULL;
        int                     ret            = -1;
        char                    *volname       = NULL;
        glusterd_volinfo_t      *volinfo       = NULL;
        char                    *brick         = NULL;
        int32_t                 count          = 0;
        int32_t                 i              = 1;
        char                    key[256]       = {0,};
        int32_t                 flag           = 0;
        char                    err_str[4096]  = {0,};
        int                     need_rebalance = 0;
        int                     force          = 0;
        int32_t                 cmd            = 0;
        int32_t                 replica_count  = 0;
        glusterd_brickinfo_t    *brickinfo     = NULL;
        glusterd_brickinfo_t    *tmp           = NULL;
        char                    *task_id_str   = NULL;
        dict_t                  *bricks_dict   = NULL;
        char                    *brick_tmpstr  = NULL;
        uint32_t                 commit_hash   = 0;
        int                      detach_commit = 0;
        void                    *tier_info     = NULL;
        char                    *cold_shd_key  = NULL;
        char                    *hot_shd_key   = NULL;
        int                      delete_key    = 1;
        glusterd_svc_t          *svc           = NULL;

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, dict, out);
        GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);

        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);

        ret = dict_get_str (dict, "volname", &volname);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, errno,
                        GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
                goto out;
        }

        ret = glusterd_volinfo_find (volname, &volinfo);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
                        GD_MSG_VOL_NOT_FOUND, "Unable to get volinfo");
                goto out;
        }

        ret = dict_get_int32 (dict, "rebalance-command", &cmd);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
                        "cmd not found");
                goto out;
        }

        if (is_origin_glusterd (dict) &&
                        (cmd != GF_DEFRAG_CMD_DETACH_START)) {
                if (!gf_uuid_is_null (volinfo->rebal.rebalance_id)) {
                        ret = glusterd_copy_uuid_to_dict
                                (volinfo->rebal.rebalance_id, dict,
                                 GF_REMOVE_BRICK_TID_KEY);
                        if (ret) {
                                gf_msg (this->name, GF_LOG_ERROR, 0,
                                        GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
                                        "Failed to set remove-brick-id");
                                goto out;
                        }
                }
        }
        /*check only if a tierd is supposed to be running
         * if no brick in the tierd volume is a local brick
         * skip it */
        cds_list_for_each_entry (brickinfo, &volinfo->bricks,
                                 brick_list) {
                if (glusterd_is_local_brick (this, volinfo,
                                             brickinfo)) {
                        flag = _gf_true;
                        break;
                }
        }
        if (!flag)
                goto out;


        ret = -1;

        switch (cmd) {
        case GF_DEFRAG_CMD_DETACH_STOP:
                        /* Fall back to the old volume file */
                        cds_list_for_each_entry_safe (brickinfo, tmp,
                                        &volinfo->bricks,
                                        brick_list) {
                                if (!brickinfo->decommissioned)
                                        continue;
                                brickinfo->decommissioned = 0;
                        }
                        ret = glusterd_create_volfiles_and_notify_services
                                (volinfo);

                        if (ret) {
                                gf_msg (this->name, GF_LOG_WARNING, 0,
                                        GD_MSG_VOLFILE_CREATE_FAIL,
                                        "failed to create volfiles");
                                goto out;
                        }

                        ret = glusterd_store_volinfo (volinfo,
                                        GLUSTERD_VOLINFO_VER_AC_INCREMENT);
                        if (ret) {
                                gf_msg (this->name, GF_LOG_WARNING, 0,
                                        GD_MSG_VOLINFO_SET_FAIL,
                                        "failed to store volinfo");
                                goto out;
                        }
                        ret = glusterd_tierdsvc_restart ();
                        if (ret) {
                                gf_msg (this->name, GF_LOG_ERROR, 0,
                                        GD_MSG_TIERD_START_FAIL,
                                        "Couldn't restart tierd for "
                                        "vol: %s", volinfo->volname);
                                goto out;
                        }

                        volinfo->tier.op = GD_OP_DETACH_NOT_STARTED;
                        ret = 0;
                        goto out;


        case GF_DEFRAG_CMD_DETACH_START:
                        ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
                                            &task_id_str);
                        if (ret) {
                                gf_msg_debug (this->name, errno,
                                              "Missing remove-brick-id");
                                ret = 0;
                        } else {
                                ret = dict_set_str (rsp_dict,
                                                    GF_REMOVE_BRICK_TID_KEY,
                                                    task_id_str);
                                gf_uuid_parse (task_id_str,
                                               volinfo->tier.rebalance_id);
                        }
                        force = 0;

                        volinfo->tier.op = GD_OP_DETACH_TIER;
                        volinfo->tier.defrag_status = GF_DEFRAG_STATUS_STARTED;
                        break;

        case GF_DEFRAG_CMD_DETACH_COMMIT:
                        if (volinfo->decommission_in_progress) {
                                gf_asprintf (op_errstr, "use 'force' option as "
                                             "migration is in progress");
                                goto out;
                        }
                        if (volinfo->rebal.defrag_status ==
                                        GF_DEFRAG_STATUS_FAILED) {
                                gf_asprintf (op_errstr, "use 'force' option as "
                                             "migration has failed");
                                goto out;
                        }

        case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
                        glusterd_op_perform_detach_tier (volinfo);
                        detach_commit = 1;

                        /* Disabling ctr when detaching a tier, since
                         * currently tier is the only consumer of ctr.
                         * Revisit this code when this constraint no
                         * longer exist.
                         */
                        dict_del (volinfo->dict, "features.ctr-enabled");
                        dict_del (volinfo->dict, "cluster.tier-mode");

                        hot_shd_key = gd_get_shd_key
                                (volinfo->tier_info.hot_type);
                        cold_shd_key = gd_get_shd_key
                                (volinfo->tier_info.cold_type);
                        if (hot_shd_key) {
                                /*
                                 * Since post detach, shd graph will not
                                 * contain hot tier. So we need to clear
                                 * option set for hot tier. For a tiered
                                 * volume there can be different key
                                 * for both hot and cold. If hot tier is
                                 * shd compatible then we need to remove
                                 * the configured value when detaching a tier,
                                 * only if the key's are different or
                                 * cold key is NULL. So we will set
                                 * delete_key first, and if cold key is not
                                 * null and they are equal then we will clear
                                 * the flag. Otherwise we will delete the
                                 * key.
                                 */

                                if (cold_shd_key)
                                        delete_key = strcmp (hot_shd_key,
                                                        cold_shd_key);
                                if (delete_key)
                                        dict_del (volinfo->dict, hot_shd_key);
                        }
                        /* fall through */

                        if (volinfo->decommission_in_progress) {
                                if (volinfo->tier.defrag) {
                                        LOCK (&volinfo->rebal.defrag->lock);
                                        /* Fake 'rebalance-complete' so the
                                         * graph change
                                         * happens right away */
                                        volinfo->tier.defrag_status =
                                                GF_DEFRAG_STATUS_COMPLETE;

                                        UNLOCK (&volinfo->tier.defrag->lock);
                                }
                        }

                        volinfo->tier.op = GD_OP_DETACH_NOT_STARTED;
                        ret = 0;
                        force = 1;
                        break;
        default:
                gf_asprintf (op_errstr, "tier command failed. Invalid "
                             "opcode");
                ret = -1;
                goto out;
        }

        count = glusterd_set_detach_bricks(dict, volinfo);

        if (cmd == GF_DEFRAG_CMD_DETACH_START) {
                bricks_dict = dict_new ();
                if (!bricks_dict) {
                        ret = -1;
                        goto out;
                }
                ret = dict_set_int32 (bricks_dict, "count", count);
                if (ret) {
                        gf_msg (this->name, GF_LOG_ERROR, errno,
                                GD_MSG_DICT_SET_FAILED,
                                "Failed to save remove-brick count");
                        goto out;
                }
        }

        while (i <= count) {
                snprintf (key, 256, "brick%d", i);
                ret = dict_get_str (dict, key, &brick);
                if (ret) {
                        gf_msg (this->name, GF_LOG_ERROR, errno,
                                GD_MSG_DICT_GET_FAILED, "Unable to get %s",
                                key);
                        goto out;
                }

                if (cmd == GF_DEFRAG_CMD_DETACH_START) {
                        brick_tmpstr = gf_strdup (brick);
                        if (!brick_tmpstr) {
                                ret = -1;
                                gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
                                        GD_MSG_NO_MEMORY,
                                        "Failed to duplicate brick name");
                                goto out;
                        }
                        ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr);
                        if (ret) {
                                gf_msg (this->name, GF_LOG_ERROR, errno,
                                        GD_MSG_DICT_SET_FAILED,
                                        "Failed to add brick to dict");
                                goto out;
                        }
                        brick_tmpstr = NULL;
                }

                ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
                                                        &need_rebalance);
                if (ret)
                        goto out;
                i++;
        }

        if (detach_commit) {
                /* Clear related information from volinfo */
                tier_info = ((void *)(&volinfo->tier_info));
                memset (tier_info, 0, sizeof (volinfo->tier_info));
        }

        if (cmd == GF_DEFRAG_CMD_DETACH_START)
                volinfo->tier.dict = dict_ref (bricks_dict);

        ret = dict_get_int32 (dict, "replica-count", &replica_count);
        if (!ret) {
                gf_msg (this->name, GF_LOG_INFO, errno,
                        GD_MSG_DICT_GET_FAILED,
                        "changing replica count %d to %d on volume %s",
                        volinfo->replica_count, replica_count,
                        volinfo->volname);
                volinfo->replica_count = replica_count;
                volinfo->sub_count = replica_count;
                volinfo->dist_leaf_count = glusterd_get_dist_leaf_count
                        (volinfo);

                /*
                 * volinfo->type and sub_count have already been set for
                 * volumes undergoing a detach operation, they should not
                 * be modified here.
                 */
                if ((replica_count == 1) && (cmd != GF_DEFRAG_CMD_DETACH_COMMIT)
                                && (cmd != GF_DEFRAG_CMD_DETACH_COMMIT_FORCE)) {
                        if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
                                volinfo->type = GF_CLUSTER_TYPE_NONE;
                                /* backward compatibility */
                                volinfo->sub_count = 0;
                        } else {
                                volinfo->type = GF_CLUSTER_TYPE_STRIPE;
                                /* backward compatibility */
                                volinfo->sub_count = volinfo->dist_leaf_count;
                        }
                }
        }
        volinfo->subvol_count = (volinfo->brick_count /
                        volinfo->dist_leaf_count);

        ret = glusterd_create_volfiles_and_notify_services (volinfo);
        if (ret) {
                gf_msg (this->name, GF_LOG_WARNING, 0,
                        GD_MSG_VOLFILE_CREATE_FAIL, "failed to create"
                        "volfiles");
                goto out;
        }

        ret = glusterd_store_volinfo (volinfo,
                        GLUSTERD_VOLINFO_VER_AC_INCREMENT);
        if (ret) {
                gf_msg (this->name, GF_LOG_WARNING, 0,
                        GD_MSG_VOLINFO_STORE_FAIL, "failed to store volinfo");
                goto out;
        }

        if (cmd == GF_DEFRAG_CMD_DETACH_START &&
                        volinfo->status == GLUSTERD_STATUS_STARTED) {

                svc = &(volinfo->tierd.svc);
                ret = svc->reconfigure (volinfo);
                if (ret)
                        goto out;

                ret = glusterd_svcs_reconfigure ();
                if (ret) {
                        gf_msg (this->name, GF_LOG_WARNING, 0,
                                GD_MSG_NFS_RECONF_FAIL,
                                "Unable to reconfigure NFS-Server");
                        goto out;
                }
        }
        /* Need to reset the defrag/rebalance status accordingly */
        switch (volinfo->tier.defrag_status) {
        case GF_DEFRAG_STATUS_FAILED:
        case GF_DEFRAG_STATUS_COMPLETE:
                volinfo->tier.defrag_status = 0;
        /* FALLTHROUGH */
        default:
                break;
        }
        if (!force && need_rebalance) {
                if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
                        volinfo->tier.commit_hash = commit_hash;
                }
                /* perform the rebalance operations */
                ret = glusterd_handle_defrag_start
                        (volinfo, err_str, sizeof (err_str),
                         GF_DEFRAG_CMD_START_DETACH_TIER,
                         /*change this label to GF_DEFRAG_CMD_DETACH_START
                          * while removing old code
                          */
                         glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);

                if (!ret)
                        volinfo->decommission_in_progress = 1;

                else if (ret) {
                        gf_msg (this->name, GF_LOG_ERROR, 0,
                                GD_MSG_REBALANCE_START_FAIL,
                                "failed to start the rebalance");
                }
        } else {
                if (GLUSTERD_STATUS_STARTED == volinfo->status)
                        ret = glusterd_svcs_manager (volinfo);
        }

out:
        if (ret && err_str[0] && op_errstr)
                *op_errstr = gf_strdup (err_str);

        GF_FREE (brick_tmpstr);
        if (bricks_dict)
                dict_unref (bricks_dict);

        return ret;

}

int
glusterd_op_tier_start_stop (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
        glusterd_volinfo_t      *volinfo                = NULL;
        int32_t                 ret                     = -1;
        char                    *volname                = NULL;
        int                     cmd                     = -1;
        xlator_t                *this                   = NULL;
        glusterd_brickinfo_t    *brick                  = NULL;
        gf_boolean_t            retval                  = _gf_false;
        glusterd_conf_t         *priv                   = NULL;
        int32_t                 pid                     = -1;
        char                    pidfile[PATH_MAX]       = {0};

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, dict, out);
        GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);

        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);

        ret = dict_get_str (dict, "volname", &volname);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, errno,
                        GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
                goto out;
        }

        ret = glusterd_volinfo_find (volname, &volinfo);
        if (ret) {
                gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
                goto out;
        }

        ret = dict_get_int32 (dict, "rebalance-command", &cmd);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, errno,
                        GD_MSG_DICT_GET_FAILED, "Unable to get cmd from "
                        "dict");
                goto out;
        }

        cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
                if (gf_uuid_compare (MY_UUID, brick->uuid) == 0) {
                        retval = _gf_true;
                        break;
                }
        }
                /*check if this node needs tierd*/

        if (!retval)
                goto out;

        switch (cmd) {
        case GF_DEFRAG_CMD_START_TIER:
                GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
                /* we check if its running and skip so that we dont get a
                 * failure during force start
                 */
                if (gf_is_service_running (pidfile, &pid))
                        goto out;
                ret = glusterd_tier_enable (volinfo, op_errstr);
                if (ret < 0)
                        goto out;
                glusterd_store_perform_node_state_store (volinfo);
                break;

        case GF_DEFRAG_CMD_STOP_TIER:
                ret = glusterd_tier_disable (volinfo, op_errstr);
                if (ret < 0)
                        goto out;
                break;
        default:
                gf_asprintf (op_errstr, "tier command failed. Invalid "
                             "opcode");
                ret = -1;
                goto out;
        }

        ret = glusterd_manage_tier (volinfo, cmd);
        if (ret)
                goto out;

        ret = glusterd_store_volinfo (volinfo,
                                      GLUSTERD_VOLINFO_VER_AC_INCREMENT);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
                        "Failed to store volinfo for tier");
                goto out;
        }

out:
        return ret;
}

int
glusterd_op_stage_tier (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
        char                    *volname             = NULL;
        int                     ret                  = -1;
        int32_t                 cmd                  = 0;
        char                    msg[2048]            = {0};
        glusterd_volinfo_t      *volinfo             = NULL;
        char                    *task_id_str         = NULL;
        xlator_t                *this                = 0;
        int32_t                 is_force             = 0;
        char                    pidfile[PATH_MAX]    = {0};
        int32_t                 tier_online          = -1;
        int32_t                 pid                  = -1;
        int32_t                 brick_count          = 0;
        gsync_status_param_t    param                = {0,};
        glusterd_conf_t         *priv                = NULL;
        gf_boolean_t            flag                 = _gf_false;
        glusterd_brickinfo_t    *brickinfo           = NULL;

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, dict, out);
        GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);

        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);

        ret = dict_get_str (dict, "volname", &volname);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
                        "volname not found");
                goto out;
        }

        ret = dict_get_int32 (dict, "rebalance-command", &cmd);
        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
                        "cmd not found");
                goto out;
        }

        ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
                                               msg, sizeof (msg));
        if (ret) {
                gf_msg_debug (this->name, 0, "cmd validate failed");
                goto out;
        }

        if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
                snprintf (msg, sizeof(msg), "volume %s is not a tier "
                          "volume", volinfo->volname);
                gf_msg (this->name, GF_LOG_ERROR, 0,
                        GD_MSG_VOL_NOT_TIER, "volume: %s is not a tier "
                        "volume", volinfo->volname);
                ret = -1;
                goto out;
        }
        /* Check if the connected clients are all of version
         * glusterfs-3.6 and higher. This is needed to prevent some data
         * loss issues that could occur when older clients are connected
         * when rebalance is run. This check can be bypassed by using
         * 'force'
         */
        ret = glusterd_check_client_op_version_support
                (volname, GD_OP_VERSION_3_6_0, NULL);
        if (ret) {
                ret = gf_asprintf (op_errstr, "Volume %s has one or "
                                   "more connected clients of a version"
                                   " lower than GlusterFS-v3.6.0. "
                                   "Tier operations not supported in"
                                   " below this version", volname);
                goto out;
        }
        /*check only if a tierd is supposed to be running
         * if no brick in the tierd volume is a local brick
         * skip it */
        cds_list_for_each_entry (brickinfo, &volinfo->bricks,
                                 brick_list) {
                if (glusterd_is_local_brick (this, volinfo,
                                             brickinfo)) {
                        flag = _gf_true;
                        break;
                }
        }
        if (!flag)
                goto out;

        GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
        tier_online = gf_is_service_running (pidfile, &pid);

        switch (cmd) {
        case GF_DEFRAG_CMD_START_TIER:
                ret = dict_get_int32 (dict, "force", &is_force);
                if (ret)
                        is_force = 0;

                if (brickinfo->status != GF_BRICK_STARTED) {
                        gf_asprintf (op_errstr, "Received"
                                     " tier start on volume "
                                     "with  stopped brick %s",
                                     brickinfo->path);
                        ret = -1;
                        goto out;
                }
                if ((!is_force) && tier_online) {
                        ret = gf_asprintf (op_errstr, "Tier daemon is "
                                           "already running on volume %s",
                                           volname);
                        ret = -1;
                        goto out;
                }
                ret = glusterd_defrag_start_validate (volinfo, msg,
                                                      sizeof (msg),
                                                      GD_OP_REBALANCE);
                if (ret) {
                        gf_msg (this->name, 0, GF_LOG_ERROR,
                                GD_MSG_REBALANCE_START_FAIL,
                                "start validate failed");
                        goto out;
                }
                break;

        case GF_DEFRAG_CMD_STOP_TIER:

                if (!tier_online) {
                        ret = gf_asprintf (op_errstr, "Tier daemon is "
                                           "not running on volume %s",
                                           volname);
                        ret = -1;
                        goto out;
                }
                break;

        case GF_DEFRAG_CMD_DETACH_START:


                ret = dict_get_int32 (dict, "count", &brick_count);
                if (ret) {
                        gf_msg (this->name, GF_LOG_ERROR, errno,
                                GD_MSG_DICT_GET_FAILED,
                                "Unable to get brick count");
                        goto out;
                }

                if (!tier_online) {
                        ret = gf_asprintf (op_errstr, "Tier daemon is "
                                           "not running on volume %s",
                                           volname);
                        ret = -1;
                        goto out;
                }
                if (volinfo->tier.op == GD_OP_DETACH_TIER) {
                        snprintf (msg, sizeof (msg), "An earlier detach tier "
                                  "task exists for volume %s. Either commit it"
                                  " or stop it before starting a new task.",
                                  volinfo->volname);
                        gf_msg (this->name, GF_LOG_ERROR, 0,
                                GD_MSG_OLD_REMOVE_BRICK_EXISTS,
                                "Earlier remove-brick"
                                " task exists for volume %s.",
                                volinfo->volname);
                        ret = -1;
                        goto out;
                }
                if (glusterd_is_defrag_on(volinfo)) {
                        snprintf (msg, sizeof (msg), "Migration is in progress."
                                  " Please retry after completion");
                        gf_msg (this->name, GF_LOG_WARNING, 0,
                                GD_MSG_OIP_RETRY_LATER, "Migration is"
                                "in progress");
                        goto out;
                }

                ret = glusterd_remove_brick_validate_bricks (GF_OP_CMD_NONE,
                                                             brick_count,
                                                             dict, volinfo,
                                                             op_errstr, cmd);
                if (ret)
                        goto out;

                if (is_origin_glusterd (dict)) {
                        ret = glusterd_generate_and_set_task_id
                                (dict, GF_REMOVE_BRICK_TID_KEY);
                        if (ret) {
                                gf_msg (this->name, GF_LOG_ERROR, 0,
                                        GD_MSG_TASKID_GEN_FAIL,
                                        "Failed to generate task-id");
                                goto out;
                        }
                } else {
                        ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
                                            &task_id_str);
                        if (ret) {
                                gf_msg (this->name, GF_LOG_WARNING, errno,
                                        GD_MSG_DICT_GET_FAILED,
                                        "Missing remove-brick-id");
                                ret = 0;
                        }
                }
                break;

        case GF_DEFRAG_CMD_DETACH_STOP:
                if (volinfo->tier.op != GD_OP_DETACH_TIER) {
                        snprintf (msg, sizeof(msg), "Detach-tier "
                                  "not started");
                        ret = -1;
                        goto out;
                }
                ret = 0;
                break;

        case GF_DEFRAG_CMD_STATUS_TIER:

                if (!tier_online) {
                        ret = gf_asprintf (op_errstr, "Tier daemon is "
                                           "not running on volume %s",
                                           volname);
                        ret = -1;
                        goto out;
                }
                break;

        case GF_DEFRAG_CMD_DETACH_COMMIT:

                if (volinfo->tier.op != GD_OP_DETACH_TIER) {
                        snprintf (msg, sizeof(msg), "Detach-tier "
                                  "not started");
                        ret = -1;
                        goto out;
                }
                if ((volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED)
                                && (volinfo->tier.op == GD_OP_DETACH_TIER)) {
                        ret = -1;
                        snprintf (msg, sizeof (msg), "Detach is in progress. "
                                  "Please retry after completion");
                        gf_msg (this->name, GF_LOG_WARNING, 0,
                                GD_MSG_OIP_RETRY_LATER, "Detach is in "
                                "progress");
                        goto out;
                }

                ret = dict_get_int32 (dict, "count", &brick_count);
                if (ret) {
                        gf_msg (this->name, GF_LOG_ERROR, errno,
                                GD_MSG_DICT_GET_FAILED,
                                "Unable to get brick count");
                        goto out;
                }

                ret = glusterd_remove_brick_validate_bricks (GF_OP_CMD_NONE,
                                                             brick_count,
                                                             dict, volinfo,
                                                             op_errstr, cmd);
                if (ret)
                        goto out;

                /* If geo-rep is configured, for this volume, it should be
                 * stopped.
                 */
                param.volinfo = volinfo;
                ret = glusterd_check_geo_rep_running (&param, op_errstr);
                if (ret || param.is_active) {
                        ret = -1;
                        goto out;
                }

                break;
        case GF_DEFRAG_CMD_DETACH_STATUS:
                if (volinfo->tier.op != GD_OP_DETACH_TIER) {
                        snprintf (msg, sizeof(msg), "Detach-tier "
                                  "not started");
                        ret = -1;
                        goto out;
                }
                break;

        case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
        default:
                break;

        }

        ret = 0;
out:
        if (ret && op_errstr && msg[0])
                *op_errstr = gf_strdup (msg);

        return ret;
}

int32_t
glusterd_add_tierd_to_dict (glusterd_volinfo_t *volinfo,
                            dict_t  *dict, int32_t count)
{

        int             ret                   = -1;
        int32_t         pid                   = -1;
        int32_t         brick_online          = -1;
        char            key[1024]             = {0};
        char            base_key[1024]        = {0};
        char            pidfile[PATH_MAX]     = {0};
        xlator_t        *this                 = NULL;

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);

        GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
        GF_VALIDATE_OR_GOTO (this->name, dict, out);

        snprintf (base_key, sizeof (base_key), "brick%d", count);
        snprintf (key, sizeof (key), "%s.hostname", base_key);
        ret = dict_set_str (dict, key, "Tier Daemon");
        if (ret)
                goto out;

        snprintf (key, sizeof (key), "%s.path", base_key);
        ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID)));
        if (ret)
                goto out;

        glusterd_svc_build_tierd_pidfile (volinfo, pidfile, sizeof (pidfile));

        brick_online = gf_is_service_running (pidfile, &pid);

        memset (key, 0, sizeof (key));
        snprintf (key, sizeof (key), "%s.pid", base_key);
        ret = dict_set_int32 (dict, key, pid);
        if (ret)
                goto out;

        memset (key, 0, sizeof (key));
        snprintf (key, sizeof (key), "%s.status", base_key);
        ret = dict_set_int32 (dict, key, brick_online);

out:
        if (ret)
                gf_msg (this ? this->name : "glusterd",
                        GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
                        "Returning %d. adding values to dict failed", ret);

        return ret;
}

int32_t
__glusterd_tier_status_cbk (struct rpc_req *req, struct iovec *iov,
                            int count, void *myframe)
{
        gd1_mgmt_brick_op_rsp         rsp       = {0};
        int                           ret       = -1;
        call_frame_t                  *frame    = NULL;
        xlator_t                      *this     = NULL;
        glusterd_conf_t               *priv     = NULL;
        struct syncargs               *args     = NULL;

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, req, out);

        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);

        frame = myframe;
        args = frame->local;

        if (-1 == req->rpc_status) {
                args->op_errno = ENOTCONN;
                goto out;
        }

        ret =  xdr_to_generic (*iov, &rsp,
                        (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
        if (ret < 0) {
                gf_msg (this->name, GF_LOG_ERROR, 0,
                        GD_MSG_RES_DECODE_FAIL,
                        "Failed to decode brick op "
                        "response received");
                goto out;
        }

        if (rsp.output.output_len) {
                args->dict  = dict_new ();
                if (!args->dict) {
                        ret = -1;
                        args->op_errno = ENOMEM;
                        goto out;
                }

                ret = dict_unserialize (rsp.output.output_val,
                                        rsp.output.output_len,
                                        &args->dict);
                if (ret < 0)
                        goto out;
        }
        args->op_ret = rsp.op_ret;
        args->op_errno = rsp.op_errno;
        args->errstr = gf_strdup (rsp.op_errstr);

out:
        if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0))
                free (rsp.op_errstr);
        free (rsp.output.output_val);
        if (req->rpc_status != -1)
                GLUSTERD_STACK_DESTROY(frame);
        __wake (args);

        return ret;

}

int32_t
glusterd_tier_status_cbk (struct rpc_req *req, struct iovec *iov,
                          int count, void *myframe)
{
        return glusterd_big_locked_cbk (req, iov, count, myframe,
                                        __glusterd_tier_status_cbk);
}

int
glusterd_op_tier_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict,
                         glusterd_op_t op)
{
        int                             ret       = -1;
        xlator_t                        *this     = NULL;
        struct syncargs                 args = {0, };
        glusterd_req_ctx_t              *data   = NULL;
        gd1_mgmt_brick_op_req           *req = NULL;
        glusterd_conf_t                 *priv = NULL;
        int                             pending_bricks = 0;
        glusterd_pending_node_t         *pending_node;
        glusterd_req_ctx_t              *req_ctx = NULL;
        struct rpc_clnt                 *rpc = NULL;
        uuid_t                          *txn_id = NULL;
        extern                          glusterd_op_info_t opinfo;

        this = THIS;
        GF_VALIDATE_OR_GOTO (THIS->name, this, out);
        GF_VALIDATE_OR_GOTO (this->name, dict, out);
        GF_VALIDATE_OR_GOTO (this->name, rsp_dict, out);

        priv = this->private;
        GF_VALIDATE_OR_GOTO (this->name, priv, out);
        args.op_ret = -1;
        args.op_errno = ENOTCONN;
        data = GF_CALLOC (1, sizeof (*data),
                        gf_gld_mt_op_allack_ctx_t);

        gf_uuid_copy (data->uuid, MY_UUID);

        /* we are printing the detach status for issue of detach start
         * by then we need the op to be GD_OP_DETACH_TIER_STATUS for it to
         * get the status. ad for the rest of the condition it can go as such.
         */

        if (op == GD_OP_REMOVE_TIER_BRICK)
                data->op = GD_OP_DETACH_TIER_STATUS;
        else
                data->op = op;
        data->dict = dict;

        txn_id = &priv->global_txn_id;

        req_ctx = data;
        GF_VALIDATE_OR_GOTO (this->name, req_ctx, out);
        CDS_INIT_LIST_HEAD (&opinfo.pending_bricks);

        ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
        gf_msg_debug (this->name, 0, "transaction ID = %s",
                      uuid_utoa (*txn_id));

        ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, op_errstr,
                                         &opinfo.pending_bricks, NULL);

        if (ret) {
                gf_msg (this->name, GF_LOG_ERROR, 0,
                        GD_MSG_BRICK_SELECT_FAIL, "Failed to select bricks");
                opinfo.op_errstr = *op_errstr;
                goto out;
        }

        cds_list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {
                ret = glusterd_brick_op_build_payload
                        (req_ctx->op, pending_node->node,
                         (gd1_mgmt_brick_op_req **)&req,
                         req_ctx->dict);

                if (ret) {
                        gf_msg (this->name, GF_LOG_ERROR, 0,
                                GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
                                "Failed to build brick op payload during "
                                "'Volume %s'", gd_op_list[req_ctx->op]);
                        goto out;
                }


                rpc = glusterd_pending_node_get_rpc (pending_node);
                if (!rpc) {
                        opinfo.brick_pending_count = 0;
                        ret = 0;
                        if (req) {
                                GF_FREE (req);
                                req = NULL;
                        }
                        glusterd_defrag_volume_node_rsp (req_ctx->dict,
                                                         NULL, rsp_dict);

                        goto out;
                }

                GD_SYNCOP (rpc, (&args), NULL, glusterd_tier_status_cbk, req,
                           &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req);

                if (req) {
                        GF_FREE (req);
                        req = NULL;
                }
                if (!ret)
                        pending_bricks++;

                glusterd_pending_node_put_rpc (pending_node);
        }
        glusterd_handle_node_rsp (req_ctx->dict, pending_node->node,
                                  req_ctx->op, args.dict, rsp_dict, op_errstr,
                                  pending_node->type);
        gf_msg_trace (this->name, 0, "Sent commit op req for operation "
                      "'Volume %s' to %d bricks", gd_op_list[req_ctx->op],
                      pending_bricks);
        opinfo.brick_pending_count = pending_bricks;

out:

        if (ret)
                opinfo.op_ret = ret;

        ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
        if (ret)
                gf_msg (THIS->name, GF_LOG_ERROR, 0,
                        GD_MSG_TRANS_OPINFO_SET_FAIL,
                        "Unable to set transaction's opinfo");

        gf_msg_debug (this ? this->name : "glusterd", 0,
                      "Returning %d. Failed to get tier status", ret);
        return ret;

}