diff options
20 files changed, 1608 insertions, 329 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index e772137ed..5b8abcdbc 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -1140,6 +1140,7 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, uint64_t failures = 0; double elapsed = 0; char *size_str = NULL; + char *task_id_str = NULL; if (-1 == req->rpc_status) { goto out; @@ -1189,15 +1190,24 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, } } - if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS))) { - /* All other possibility is about starting a volume */ - if (rsp.op_ret && strcmp (rsp.op_errstr, "")) + if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) && + !(global_state->mode & GLUSTER_MODE_XML)) { + /* All other possibilites are about starting a rebalance */ + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (rsp.op_ret && strcmp (rsp.op_errstr, "")) { snprintf (msg, sizeof (msg), "%s", rsp.op_errstr); - else - snprintf (msg, sizeof (msg), - "Starting rebalance on volume %s has been %s", - volname, (rsp.op_ret) ? "unsuccessful": - "successful"); + } else { + if (!rsp.op_ret) { + snprintf (msg, sizeof (msg), + "Starting rebalance on volume %s has " + "been successful.\nID: %s", volname, + task_id_str); + } else { + snprintf (msg, sizeof (msg), + "Starting rebalance on volume %s has " + "been unsuccessful.", volname); + } + } goto done; } @@ -1739,6 +1749,8 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, char *cmd_str = "unknown"; cli_local_t *local = NULL; call_frame_t *frame = NULL; + char *task_id_str = NULL; + dict_t *rsp_dict = NULL; if (-1 == req->rpc_status) { goto out; @@ -1759,10 +1771,32 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - switch (cmd) { + if (rsp.dict.dict_len) { + rsp_dict = dict_new (); + if (!rsp_dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, + &rsp_dict); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Failed to unserialize rsp_dict"); + goto out; + } + } + switch (cmd) { case GF_OP_CMD_START: cmd_str = "start"; + + ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "remove-brick-id is not present in dict"); + goto out; + } break; case GF_OP_CMD_COMMIT: cmd_str = "commit"; @@ -1784,7 +1818,7 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, (rsp.op_ret) ? "unsuccessful": "successful"); if (global_state->mode & GLUSTER_MODE_XML) { - ret = cli_xml_output_vol_remove_brick (_gf_false, NULL, + ret = cli_xml_output_vol_remove_brick (_gf_false, rsp_dict, rsp.op_ret, rsp.op_errno, rsp.op_errstr); if (ret) @@ -1793,10 +1827,14 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - if (rsp.op_ret) - cli_err ("volume remove-brick: failed: %s", rsp.op_errstr); - else - cli_out ("volume remove-brick: success"); + if (rsp.op_ret) { + cli_err ("volume remove-brick %s: failed: %s", cmd_str, + rsp.op_errstr); + } else { + cli_out ("volume remove-brick %s: success", cmd_str); + if (GF_OP_CMD_START == cmd) + cli_out ("ID: %s", task_id_str); + } ret = rsp.op_ret; @@ -1825,7 +1863,8 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, gf1_cli_replace_op replace_op = 0; char *rb_operation_str = NULL; dict_t *rsp_dict = NULL; - char msg[1024] = {0,}; + char msg[1024] = {0,}; + char *task_id_str = NULL; if (-1 == req->rpc_status) { goto out; @@ -1850,33 +1889,48 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, + "failed to " + "unserialize rsp buffer to dictionary"); + goto out; + } + } + switch (replace_op) { case GF_REPLACE_OP_START: - if (rsp.op_ret) - rb_operation_str = "replace-brick failed to start"; - else - rb_operation_str = "replace-brick started successfully"; + if (rsp.op_ret) { + rb_operation_str = gf_strdup ("replace-brick failed to" + " start"); + } else { + ret = dict_get_str (rsp_dict, GF_REPLACE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, "Failed to get " + "\"replace-brick-id\" from dict"); + goto out; + } + ret = gf_asprintf (&rb_operation_str, + "replace-brick started successfully" + "\nID: %s", task_id_str); + if (ret < 0) + goto out; + } break; case GF_REPLACE_OP_STATUS: - if (rsp.op_ret || ret) - rb_operation_str = "replace-brick status unknown"; - else { - if (rsp.dict.dict_len) { - /* Unserialize the dictionary */ - rsp_dict = dict_new (); - - ret = dict_unserialize (rsp.dict.dict_val, - rsp.dict.dict_len, - &rsp_dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } - } + if (rsp.op_ret || ret) { + rb_operation_str = gf_strdup ("replace-brick status " + "unknown"); + } else { ret = dict_get_str (rsp_dict, "status-reply", &status_reply); if (ret) { @@ -1885,23 +1939,27 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - rb_operation_str = status_reply; + rb_operation_str = gf_strdup (status_reply); } break; case GF_REPLACE_OP_PAUSE: if (rsp.op_ret) - rb_operation_str = "replace-brick pause failed"; + rb_operation_str = gf_strdup ("replace-brick pause " + "failed"); else - rb_operation_str = "replace-brick paused successfully"; + rb_operation_str = gf_strdup ("replace-brick paused " + "successfully"); break; case GF_REPLACE_OP_ABORT: if (rsp.op_ret) - rb_operation_str = "replace-brick abort failed"; + rb_operation_str = gf_strdup ("replace-brick abort " + "failed"); else - rb_operation_str = "replace-brick aborted successfully"; + rb_operation_str = gf_strdup ("replace-brick aborted " + "successfully"); break; case GF_REPLACE_OP_COMMIT: @@ -1922,9 +1980,11 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, if (rsp.op_ret || ret) - rb_operation_str = "replace-brick commit failed"; + rb_operation_str = gf_strdup ("replace-brick commit " + "failed"); else - rb_operation_str = "replace-brick commit successful"; + rb_operation_str = gf_strdup ("replace-brick commit " + "successful"); break; @@ -1935,7 +1995,7 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, } if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) { - rb_operation_str = rsp.op_errstr; + rb_operation_str = gf_strdup (rsp.op_errstr); } gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick"); @@ -1959,6 +2019,17 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, ret = rsp.op_ret; out: + if (frame) + frame->local = NULL; + + if (local) { + dict_unref (local->dict); + cli_local_wipe (local); + } + + if (rb_operation_str) + GF_FREE (rb_operation_str); + cli_cmd_broadcast_response (ret); free (rsp.dict.dict_val); if (rsp_dict) @@ -3030,7 +3101,7 @@ gf_cli_remove_brick (call_frame_t *frame, xlator_t *this, GLUSTER_CLI_REMOVE_BRICK, this, cli_rpc_prog, NULL); } else { - /* Need rebalance status to e sent :-) */ + /* Need rebalance status to be sent :-) */ req_dict = dict_new (); if (!req_dict) { ret = -1; @@ -5272,6 +5343,55 @@ out: return; } + +static void +cli_print_volume_tasks (dict_t *dict) { + int ret = -1; + int tasks = 0; + char *op = 0; + char *task_id_str = NULL; + int status = 0; + char key[1024] = {0,}; + int i = 0; + + ret = dict_get_int32 (dict, "tasks", &tasks); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Failed to get tasks count"); + return; + } + + if (tasks == 0) { + cli_out ("There are no active volume tasks"); + return; + } + + cli_out ("%15s%40s%15s", "Task", "ID", "Status"); + cli_out ("%15s%40s%15s", "----", "--", "------"); + for (i = 0; i < tasks; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.type", i); + ret = dict_get_str(dict, key, &op); + if (ret) + return; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", i); + ret = dict_get_str (dict, key, &task_id_str); + if (ret) + return; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", i); + ret = dict_get_int32 (dict, key, &status); + if (ret) + return; + + cli_out ("%15s%40s%15d", op, task_id_str, status); + } + +} + static int gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -5518,6 +5638,9 @@ gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov, } } cli_out (" "); + + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) + cli_print_volume_tasks (dict); cont: ret = rsp.op_ret; diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index ffe32cacc..0765dc26a 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -1381,6 +1381,74 @@ out: } int +cli_xml_output_vol_status_tasks (cli_local_t *local, dict_t *dict) { + int ret = -1; + char *task_type = NULL; + char *task_id_str = NULL; + int status = 0; + int tasks = 0; + char key[1024] = {0,}; + int i = 0; + + /* <tasks> */ + ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"tasks"); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = dict_get_int32 (dict, "tasks", &tasks); + if (ret) + goto out; + + for (i = 0; i < tasks; i++) { + /* <task> */ + ret = xmlTextWriterStartElement (local->writer, + (xmlChar *)"task"); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.type", i); + ret = dict_get_str (dict, key, &task_type); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"type", + "%s", task_type); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", i); + ret = dict_get_str (dict, key, &task_id_str); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", i); + ret = dict_get_int32 (dict, key, &status); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"status", + "%d", status); + XML_RET_CHECK_AND_GOTO (ret, out); + + /* </task> */ + ret = xmlTextWriterEndElement (local->writer); + XML_RET_CHECK_AND_GOTO (ret, out); + } + + /* </tasks> */ + ret = xmlTextWriterEndElement (local->writer); + + +out: + gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int cli_xml_output_vol_status (cli_local_t *local, dict_t *dict) { #if (HAVE_LIB_XML) @@ -1507,6 +1575,12 @@ cli_xml_output_vol_status (cli_local_t *local, dict_t *dict) XML_RET_CHECK_AND_GOTO (ret, out); } + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) { + ret = cli_xml_output_vol_status_tasks (local, dict); + if (ret) + goto out; + } + /* </volume> */ ret = xmlTextWriterEndElement (local->writer); XML_RET_CHECK_AND_GOTO (ret, out); @@ -2913,6 +2987,7 @@ cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret, int ret = -1; xmlTextWriterPtr writer = NULL; xmlBufferPtr buf = NULL; + char *task_id_str = NULL; ret = cli_begin_xml_output (&writer, &buf); if (ret) @@ -2926,6 +3001,14 @@ cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret, ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRebalance"); XML_RET_CHECK_AND_GOTO (ret, out); + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret == 0) { + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"task-id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + } + ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op", "%d", op); XML_RET_CHECK_AND_GOTO (ret, out); @@ -2959,6 +3042,7 @@ cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict, int ret = -1; xmlTextWriterPtr writer = NULL; xmlBufferPtr buf = NULL; + char *task_id_str = NULL; ret = cli_begin_xml_output (&writer, &buf); if (ret) @@ -2972,6 +3056,14 @@ cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict, ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRemoveBrick"); XML_RET_CHECK_AND_GOTO (ret, out); + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret == 0) { + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"task-id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + } + if (status_op) { ret = cli_xml_output_vol_rebalance_status (writer, dict); if (ret) @@ -3002,6 +3094,7 @@ cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict, int status = 0; uint64_t files = 0; char *current_file = 0; + char *task_id_str = NULL; xmlTextWriterPtr writer = NULL; xmlBufferPtr buf = NULL; @@ -3017,6 +3110,14 @@ cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict, ret = xmlTextWriterStartElement (writer, (xmlChar *)"volReplaceBrick"); XML_RET_CHECK_AND_GOTO (ret, out); + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str); + if (ret == 0) { + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"task-id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + } + ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op", "%d", op); XML_RET_CHECK_AND_GOTO (ret, out); diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 61edd5ec7..dae539841 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -142,6 +142,10 @@ #define GF_UUID_BUF_SIZE 50 +#define GF_REBALANCE_TID_KEY "rebalance-id" +#define GF_REMOVE_BRICK_TID_KEY "remove-brick-id" +#define GF_REPLACE_BRICK_TID_KEY "replace-brick-id" + /* NOTE: add members ONLY at the end (just before _MAXVALUE) */ typedef enum { GF_FOP_NULL = 0, diff --git a/tests/bugs/bug-857330/common.rc b/tests/bugs/bug-857330/common.rc new file mode 100644 index 000000000..f2327a862 --- /dev/null +++ b/tests/bugs/bug-857330/common.rc @@ -0,0 +1,70 @@ +. $(dirname $0)/../../include.rc + +UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}' + +TASK_ID="" +COMMAND="" +PATTERN="" + +function check-and-store-task-id() +{ + TASK_ID="" + + local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + TASK_ID=$task_id + return 0; +} + +function check-with-stored-task-id() +{ + local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + if [ "$TASK_ID" != "$task_id" ]; then + return 1 + fi + + return 0 +} + +function check-and-store-task-id-xml() +{ + TASK_ID="" + + local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + TASK_ID=$task_id + return 0; +} + +function check-with-stored-task-id-xml() +{ + local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + if [ "$TASK_ID" != "$task_id" ]; then + return 1 + fi + + return 0 +} + +function get-task-status() +{ + $CLI $COMMAND | grep -o $PATTERN +} diff --git a/tests/bugs/bug-857330/normal.t b/tests/bugs/bug-857330/normal.t new file mode 100755 index 000000000..abf8e2ac5 --- /dev/null +++ b/tests/bugs/bug-857330/normal.t @@ -0,0 +1,78 @@ +#!/bin/bash + +. $(dirname $0)/common.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}1; +TEST $CLI volume info $V0; +TEST $CLI volume start $V0; + +TEST glusterfs -s $H0 --volfile-id=$V0 $M0; + +TEST python2 $(dirname $0)/../../utils/create-files.py --multi -b 10 -d 10 -n 10 $M0; + +TEST umount $M0; + +############### +## Rebalance ## +############### +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2; + +COMMAND="volume rebalance $V0 start" +PATTERN="ID:" +TEST check-and-store-task-id + +COMMAND="volume status $V0" +PATTERN="Rebalance" +TEST check-with-stored-task-id + +COMMAND="volume rebalance $V0 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +################### +## Replace-brick ## +################### +REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}3" + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR start" +PATTERN="ID:" +TEST check-and-store-task-id + +COMMAND="volume status $V0" +PATTERN="Replace" +TEST check-with-stored-task-id + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status" +PATTERN="complete" +EXPECT_WITHIN 300 $PATTERN get-task-status + +TEST $CLI volume replace-brick $V0 $REP_BRICK_PAIR commit; + +################## +## Remove-brick ## +################## +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 start" +PATTERN="ID:" +TEST check-and-store-task-id + +COMMAND="volume status $V0" +PATTERN="Remove" +TEST check-with-stored-task-id + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 commit + +TEST $CLI volume stop $V0; +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/tests/bugs/bug-857330/xml.t b/tests/bugs/bug-857330/xml.t new file mode 100755 index 000000000..a6e0b34ca --- /dev/null +++ b/tests/bugs/bug-857330/xml.t @@ -0,0 +1,101 @@ +#!/bin/bash + +. $(dirname $0)/common.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}1; +TEST $CLI volume info $V0; +TEST $CLI volume start $V0; + +TEST glusterfs -s $H0 --volfile-id=$V0 $M0; + +TEST python2 $(dirname $0)/../../utils/create-files.py --multi -b 10 -d 10 -n 10 $M0; + +TEST umount $M0; + + +############### +## Rebalance ## +############### +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2; + +COMMAND="volume rebalance $V0 start" +PATTERN="task-id" +TEST check-and-store-task-id-xml + +COMMAND="volume status $V0" +PATTERN="id" +TEST check-with-stored-task-id-xml + +COMMAND="volume rebalance $V0 status" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +## TODO: Add tests for rebalance stop + +COMMAND="volume rebalance $V0 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +################### +## Replace-brick ## +################### +REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}3" + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR start" +PATTERN="task-id" +TEST check-and-store-task-id-xml + +COMMAND="volume status $V0" +PATTERN="id" +TEST check-with-stored-task-id-xml + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +## TODO: Add more tests for replace-brick pause|abort + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status" +PATTERN="complete" +EXPECT_WITHIN 300 $PATTERN get-task-status + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR commit" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +################## +## Remove-brick ## +################## +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 start" +PATTERN="task-id" +TEST check-and-store-task-id-xml + +COMMAND="volume status $V0" +PATTERN="id" +TEST check-with-stored-task-id-xml + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +## TODO: Add tests for remove-brick stop + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 commit" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +TEST $CLI volume stop $V0; +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py new file mode 100755 index 000000000..0d937eff9 --- /dev/null +++ b/tests/utils/create-files.py @@ -0,0 +1,207 @@ +#!/usr/bin/python + +# This script was developed by Vijaykumar Koppad (vkoppad@redhat.com) +# The latest version of this script can found at +# http://github.com/vijaykumar-koppad/crefi + +from __future__ import with_statement +import sys +import os +import re +import random +from optparse import OptionParser +import time +import string +import errno + +def os_rd(src, size): + fd = os.open(src,os.O_RDONLY) + data = os.read(fd, size) + os.close(fd) + return data + +def os_wr(dest, data): + fd = os.open(dest,os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0644) + os.write(fd, data) + os.close(fd) + return + +def create_sparse_file(fil): + if option.size: + option.random = False + size = option.size + else: + size = random.randint(option.min, option.max) + data = os_rd("/dev/zero", size) + os_wr(fil, data) + return + +def create_binary_file(fil): + if option.size: + option.random = False + size = option.size + else: + size = random.randint(option.min, option.max) + data = os_rd("/dev/urandom", size) + os_wr(fil, data) + return + +def create_txt_file(fil): + if option.size: + option.random = False + size = option.size + else: + size = random.randint(option.min, option.max) + if size < 500*1024: + data = os_rd("/etc/services", size) + os_wr(fil, data) + else: + data = os_rd("/etc/services", 500*1024) + file_size = 0 + fd = os.open(fil,os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0644) + while file_size < size: + os.write(fd, data) + file_size += 500*1024 + os.close(fd) + return + +def get_filename(): + size = option.flen + char = string.uppercase+string.digits + st = ''.join(random.choice(char) for i in range(size)) + ti = str((hex(int(str(time.time()).split('.')[0])))[2:]) + return ti+"~~"+st + +def text_files(files, file_count): + for k in range(files): + if not file_count%option.inter: + print file_count + fil = get_filename() + create_txt_file(fil) + file_count += 1 + return file_count + +def sparse_files(files, file_count): + for k in range(files): + if not file_count%option.inter: + print file_count + fil = get_filename() + create_sparse_file(fil) + file_count += 1 + return file_count + +def binary_files(files, file_count): + for k in range(files): + if not file_count%option.inter: + print file_count + fil = get_filename() + create_binary_file(fil) + file_count += 1 + return file_count + +def human2bytes(size): + size_short = { + 1024 : ['K','KB','KiB','k','kB','kiB'], + 1024*1024 : ['M','MB','MiB'], + 1024*1024*1024 : ['G','GB','GiB'] +} + num = re.search('(\d+)',size).group() + ext = size[len(num):] + num = int(num) + if ext == '': + return num + for value, keys in size_short.items(): + if ext in keys: + size = num*value + return size + +def multipledir(mnt_pnt,brdth,depth,files): + files_count = 1 + for i in range(brdth): + breadth = mnt_pnt+"/"+str(i) + try: + os.makedirs(breadth) + except OSError as ex: + if not ex.errno is errno.EEXIST: + raise + os.chdir(breadth) + dir_depth = breadth + print breadth + for j in range(depth): + dir_depth = dir_depth+"/"+str(j) + try: + os.makedirs(dir_depth) + except OSError as ex: + if not ex.errno is errno.EEXIST: + raise + os.chdir(dir_depth) + if option.file_type == "text": + files_count = text_files(files, files_count) + elif option.file_type == "sparse": + files_count = sparse_files(files, files_count) + elif option.file_type == "binary": + files_count = binary_files(files, files_count) + else: + print "Not a valid file type" + sys.exit(1) + +def singledir(mnt_pnt, files): + files_count = 1 + os.chdir(mnt_pnt) + if option.file_type == "text": + files_count = text_files(files, files_count) + elif option.file_type == "sparse": + files_count = sparse_files(files, files_count) + elif option.file_type == "binary": + files_count = binary_files(files, files_count) + else: + print "Not a valid file type" + sys.exit(1) + +if __name__ == '__main__': + usage = "usage: %prog [option] <MNT_PT>" + parser = OptionParser(usage=usage) + parser.add_option("-n", dest="files",type="int" ,default=100, + help="number of files in each level [default: %default]") + parser.add_option("--size", action = "store",type="string", + help="size of the files to be used") + parser.add_option("--random", action="store_true", default=True, + help="random size of the file between --min and --max " + "[default: %default]") + parser.add_option("--max", action = "store",type="string", default="500K", + help="maximum size of the files, if random is True " + "[default: %default]") + parser.add_option("--min", action = "store",type="string", default="10K", + help="minimum size of the files, if random is True " + "[default: %default]" ) + parser.add_option("--single", action="store_true", dest="dir",default=True, + help="create files in single directory [default: %default]" ) + parser.add_option("--multi", action="store_false", dest="dir", + help="create files in multiple directories") + parser.add_option("-b", dest="brdth",type="int",default=5, + help="number of directories in one level(works with --multi)[default: %default]") + parser.add_option("-d", dest="depth",type="int",default=5, + help="number of levels of directories(works with --multi)[default: %default]") + parser.add_option("-l", dest="flen",type="int" ,default=10, + help="number of bytes for filename " + "[default: %default]") + parser.add_option("-t","--type", action="store", type="string" , dest="file_type",default="text", + help="type of the file to be created (text, sparse, binary) [default: %default]" ) + parser.add_option("-I", dest="inter", type="int", default=100, + help="print number files created of interval [defailt: %dafault]") + (option,args) = parser.parse_args() + if not args: + print "usage: <script> [option] <MNT_PT>" + print "" + sys.exit(1) + args[0] = os.path.abspath(args[0]) + if option.size: + option.size = human2bytes(option.size) + else: + option.max = human2bytes(option.max) + option.min = human2bytes(option.min) + if option.dir: + singledir(args[0], option.files) + else: + multipledir(args[0], option.brdth, option.depth, option.files) + print "creation of files completed.\n" diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index a14828e98..6ab859a10 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -822,6 +822,7 @@ out: ret = 0; //sent error to cli, prevent second reply } + GF_FREE (brick_list); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1157,17 +1158,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) char msg[2048] = {0,}; int32_t flag = 0; gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname); goto out; } @@ -1179,7 +1185,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " "operation is committed or aborted", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; @@ -1187,7 +1193,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } cmd = flag; @@ -1205,20 +1211,38 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_START: { if (GLUSTERD_STATUS_STARTED != volinfo->status) { - snprintf (msg, sizeof (msg), "Volume %s needs to be started " - "before remove-brick (you can use 'force' or " - "'commit' to override this behavior)", - volinfo->volname); + snprintf (msg, sizeof (msg), "Volume %s needs to be " + "started before remove-brick (you can use " + "'force' or 'commit' to override this " + "behavior)", volinfo->volname); errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } if (glusterd_is_defrag_on(volinfo)) { - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); + errstr = gf_strdup("Rebalance is in progress. Please " + "retry after completion"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + + if (is_origin_glusterd ()) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing remove-brick-id"); + ret = 0; + } + } break; } @@ -1240,7 +1264,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "count", &brick_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } @@ -1253,7 +1277,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); if (ret && errstr) { if (op_errstr) *op_errstr = errstr; @@ -1390,10 +1414,10 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } @@ -1412,42 +1436,67 @@ out: int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - int32_t flag = 0; - char err_str[4096] = {0,}; - int need_rebalance = 0; - int force = 0; - gf1_op_commands cmd = 0; - int32_t replica_count = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get command"); goto out; } cmd = flag; + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id on completion/stopping of remove-brick operation */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) + uuid_clear (volinfo->rebal.rebalance_id); + ret = -1; switch (cmd) { case GF_OP_CMD_NONE: @@ -1468,7 +1517,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -1476,7 +1525,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } @@ -1486,6 +1535,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } case GF_OP_CMD_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Missing remove-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } force = 0; break; @@ -1496,13 +1553,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); + if (volinfo->rebal.defrag) { + LOCK (&volinfo->rebal.defrag->lock); /* Fake 'rebalance-complete' so the graph change happens right away */ - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; - UNLOCK (&volinfo->defrag->lock); + UNLOCK (&volinfo->rebal.defrag->lock); } /* Graph change happens in rebalance _cbk function, no need to do anything here */ @@ -1525,7 +1583,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) snprintf (key, 256, "brick%d", i); ret = dict_get_str (dict, key, &brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); + gf_log (this->name, GF_LOG_ERROR, "Unable to get %s", + key); goto out; } @@ -1537,7 +1596,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "changing replica count %d to %d on volume %s", volinfo->replica_count, replica_count, volinfo->volname); @@ -1559,34 +1618,36 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles"); + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo"); + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } if (!force && need_rebalance) { /* perform the rebalance operations */ - ret = glusterd_handle_defrag_start (volinfo, err_str, 4096, - GF_DEFRAG_CMD_START_FORCE, - glusterd_remove_brick_migrate_cbk); + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_FORCE, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + if (!ret) volinfo->decommission_in_progress = 1; if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to start the rebalance"); } } else { diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 9bbfdba8b..7708139fc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -370,7 +370,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; snprintf (key, 256, "volume%d.rebalance", count); - ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd); + ret = dict_set_int32 (volumes, key, volinfo->rebal.defrag_cmd); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 428051d4a..209dd736e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -434,6 +434,8 @@ out: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + ret = 0; + if (clnt_dict) dict_unref (clnt_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 66b58eca4..54faa8fa5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1827,6 +1827,79 @@ out: } static int +_add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) +{ + + int ret = -1; + char key[128] = {0,}; + char *uuid_str = NULL; + int status = 0; + xlator_t *this = NULL; + + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + this = THIS; + GF_ASSERT (this); + + switch (op) { + case GD_OP_REBALANCE: + case GD_OP_REMOVE_BRICK: + uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id)); + status = volinfo->rebal.defrag_status; + break; + + case GD_OP_REPLACE_BRICK: + uuid_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + status = volinfo->rep_brick.rb_status; + break; + + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "%s operation doesn't have a" + " task_id", gd_op_list[op]); + goto out; + } + + snprintf (key, sizeof (key), "task%d.type", index); + ret = dict_set_str (dict, key, + (char *)gd_op_list[op]); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task type in dict"); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", index); + + + if (!uuid_str) + goto out; + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task id in dict"); + goto out; + } + uuid_str = NULL; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", index); + ret = dict_set_int32 (dict, key, status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task status in dict"); + goto out; + } + +out: + if (uuid_str) + GF_FREE (uuid_str); + return ret; +} + +static int glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -1845,6 +1918,8 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *vol_opts = NULL; gf_boolean_t nfs_disabled = _gf_false; gf_boolean_t shd_enabled = _gf_true; + gf_boolean_t origin_glusterd = _gf_false; + int tasks = 0; this = THIS; GF_ASSERT (this); @@ -1854,6 +1929,8 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); + origin_glusterd = is_origin_glusterd (); + ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) goto out; @@ -1866,11 +1943,10 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, if ((cmd & GF_CLI_STATUS_ALL)) { ret = glusterd_get_all_volnames (rsp_dict); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to get all volume " "names for status"); } - } ret = dict_set_uint32 (rsp_dict, "cmd", cmd); @@ -1886,7 +1962,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Volume with name: %s " + gf_log (this->name, GF_LOG_ERROR, "Volume with name: %s " "does not exist", volname); goto out; } @@ -1984,23 +2060,56 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Error setting brick-index-max to dict"); goto out; } ret = dict_set_int32 (rsp_dict, "other-count", other_count); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Error setting other-count to dict"); goto out; } ret = dict_set_int32 (rsp_dict, "count", node_count); - if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Error setting node count to dict"); + goto out; + } + + /* Active tasks */ + if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) || + !origin_glusterd) + goto out; + + if (glusterd_is_defrag_on (volinfo)) { + ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + if (glusterd_is_rb_ongoing (volinfo)) { + ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + + ret = dict_set_int32 (rsp_dict, "tasks", tasks); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error setting tasks count in dict"); out: - gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -2257,9 +2366,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) char *volname = NULL; uint32_t status_cmd = GF_CLI_STATUS_NONE; char *errstr = NULL; + xlator_t *this = THIS; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + req_dict = dict_new (); if (!req_dict) goto out; @@ -2268,7 +2381,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) op = glusterd_op_get_op (); ctx = (void*)glusterd_op_get_ctx (); if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Null Context for " + gf_log (this->name, GF_LOG_ERROR, "Null Context for " "op %d", op); ret = -1; goto out; @@ -2280,8 +2393,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; ctx = op_ctx; - } #undef GD_SYNC_OPCODE_KEY + } dict = ctx; switch (op) { @@ -2316,7 +2429,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "volname is not present in " "operation ctx"); goto out; @@ -2329,6 +2442,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; } + dict_destroy (req_dict); req_dict = dict_ref (dict); } break; @@ -2339,12 +2453,33 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) break; } + case GD_OP_REMOVE_BRICK: + { + dict_t *dict = ctx; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } + + ret = glusterd_dict_set_volid (dict, volname, + op_errstr); + if (ret) + goto out; + + dict_destroy (req_dict); + req_dict = dict_ref (dict); + } + break; + case GD_OP_STATUS_VOLUME: { ret = dict_get_uint32 (dict, "cmd", &status_cmd); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Status command not present " "in op ctx"); goto out; @@ -2361,7 +2496,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_ADD_BRICK: case GD_OP_REPLACE_BRICK: case GD_OP_RESET_VOLUME: - case GD_OP_REMOVE_BRICK: case GD_OP_LOG_ROTATE: case GD_OP_QUOTA: case GD_OP_PROFILE_VOLUME: @@ -2377,7 +2511,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "volname is not present in " "operation ctx"); goto out; @@ -3862,6 +3996,7 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, char buf[1024] = {0,}; char *node_str = NULL; glusterd_conf_t *priv = NULL; + glusterd_rebalance_t *rebal = NULL; priv = THIS->private; GF_ASSERT (req_dict); @@ -3877,6 +4012,8 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, if (ret) goto out; + rebal = &volinfo->rebal; + if (rsp_dict) { ret = glusterd_defrag_volume_status_update (volinfo, rsp_dict); @@ -3905,42 +4042,42 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, memset (key, 0 , 256); snprintf (key, 256, "files-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_files); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_files); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set file count"); memset (key, 0 , 256); snprintf (key, 256, "size-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_data); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_data); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set size of xfer"); memset (key, 0 , 256); snprintf (key, 256, "lookups-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->lookedup_files); + ret = dict_set_uint64 (op_ctx, key, rebal->lookedup_files); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set lookedup file count"); memset (key, 0 , 256); snprintf (key, 256, "status-%d", i); - ret = dict_set_int32 (op_ctx, key, volinfo->defrag_status); + ret = dict_set_int32 (op_ctx, key, rebal->defrag_status); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set status"); memset (key, 0 , 256); snprintf (key, 256, "failures-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_failures); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_failures); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set failure count"); memset (key, 0, 256); snprintf (key, 256, "run-time-%d", i); - ret = dict_set_double (op_ctx, key, volinfo->rebalance_time); + ret = dict_set_double (op_ctx, key, rebal->rebalance_time); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set run-time"); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index fa1af7d29..4c7282e82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -88,12 +88,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, if (!volinfo) return 0; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; if (!defrag) return 0; if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) - volinfo->defrag = NULL; + volinfo->rebal.defrag = NULL; GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); @@ -126,12 +126,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, UNLOCK (&defrag->lock); if (!glusterd_is_service_running (pidfile, NULL)) { - if (volinfo->defrag_status == - GF_DEFRAG_STATUS_STARTED) { - volinfo->defrag_status = - GF_DEFRAG_STATUS_FAILED; + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_FAILED; } else { - volinfo->defrag_cmd = 0; + volinfo->rebal.defrag_cmd = 0; } } @@ -142,7 +142,8 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, defrag->rpc = NULL; } if (defrag->cbk_fn) - defrag->cbk_fn (volinfo, volinfo->defrag_status); + defrag->cbk_fn (volinfo, + volinfo->rebal.defrag_status); GF_FREE (defrag); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", @@ -161,7 +162,8 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk) + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) { int ret = -1; glusterd_defrag_info_t *defrag = NULL; @@ -183,22 +185,24 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); if (ret) goto out; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; + volinfo->rebal.op = op; + LOCK_INIT (&defrag->lock); - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; glusterd_volinfo_reset_defrag_stats (volinfo); - volinfo->defrag_cmd = cmd; glusterd_store_perform_node_state_store (volinfo); GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); @@ -294,13 +298,15 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, int ret = -1; glusterd_defrag_info_t *defrag = NULL; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; @@ -434,10 +440,12 @@ out: glusterd_op_sm (); if (ret) { - ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, req, - dict, "operation failed"); + ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, + req, dict, + "operation failed"); if (dict) dict_unref (dict); + } free (cli_req.dict.dict_val);//malloced by xdr @@ -449,38 +457,71 @@ out: int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not found"); + gf_log (this->name, GF_LOG_DEBUG, "volname not found"); goto out; } + ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd not found"); + gf_log (this->name, GF_LOG_DEBUG, "cmd not found"); goto out; } ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "failed to validate"); + gf_log (this->name, GF_LOG_DEBUG, "failed to validate"); goto out; } switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd ()) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (op_ctx, GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, + &task_id_str); + if (ret) { + snprintf (msg, sizeof (msg), + "Missing rebalance-id"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = 0; + } + } ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, - "start validate failed"); + gf_log (this->name, GF_LOG_DEBUG, + "start validate failed"); goto out; } break; @@ -503,43 +544,86 @@ out: int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - gf_boolean_t volfile_update = _gf_false; - - priv = THIS->private; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not given"); + gf_log (this->name, GF_LOG_DEBUG, "volname not given"); goto out; } ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "command not given"); + gf_log (this->name, GF_LOG_DEBUG, "command not given"); goto out; } + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd validate failed"); + gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed"); goto out; } + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + ret = -1; + goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REMOVE_BRICK_TID_KEY); + else + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set task-id"); + goto out; + } + } + } + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " + "id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cmd, NULL); + cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: /* Fall back to the old volume file in case of decommission*/ @@ -558,7 +642,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -566,7 +650,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index bca306bd8..a6c5a9aaf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -168,7 +168,7 @@ glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, if (!volinfo || !brickinfo) goto out; - *brickinfo = volinfo->dst_brick; + *brickinfo = volinfo->rep_brick.dst_brick; ret = 0; @@ -199,38 +199,43 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, char *savetok = NULL; char voldir[PATH_MAX] = {0}; char pidfile[PATH_MAX] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + + priv = this->private; GF_ASSERT (priv); ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); goto out; } - gf_log ("", GF_LOG_DEBUG, "src brick=%s", src_brick); + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); ret = dict_get_str (dict, "dst-brick", &dst_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get dest brick"); goto out; } - gf_log ("", GF_LOG_DEBUG, "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); if (ret) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "dict get on replace-brick operation failed"); goto out; } @@ -262,7 +267,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, !glusterd_is_valid_volfpath (volname, dst_brick)) { snprintf (msg, sizeof (msg), "brick path %s is too " "long.", dst_brick); - gf_log ("", GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; @@ -271,10 +276,10 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = glusterd_check_gsync_running (volinfo, &is_run); if (ret && (is_run == _gf_false)) - gf_log ("", GF_LOG_WARNING, "Unable to get the status" + gf_log (this->name, GF_LOG_WARNING, "Unable to get the status" " of active "GEOREP" session"); if (is_run) { - gf_log ("", GF_LOG_WARNING, GEOREP" sessions active" + gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active" "for the volume %s ", volname); snprintf (msg, sizeof(msg), GEOREP" sessions are active " "for the volume %s.\nStop "GEOREP " sessions " @@ -289,29 +294,58 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (glusterd_is_defrag_on(volinfo)) { snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; } + ctx = glusterd_op_get_ctx(); + switch (replace_op) { case GF_REPLACE_OP_START: if (glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started for volume "); + snprintf (msg, sizeof (msg), "Replace brick is already " + "started for volume"); + gf_log (this->name, GF_LOG_ERROR, msg); + *op_errstr = gf_strdup (msg); ret = -1; goto out; } + if (is_origin_glusterd ()) { + if (!ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (ctx, GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + + } else { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing replace-brick-id"); + ret = 0; + } + } break; case GF_REPLACE_OP_PAUSE: if (glusterd_is_rb_paused (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "paused for volume "); + gf_log (this->name, GF_LOG_ERROR, "Replace brick is " + "already paused for volume "); ret = -1; goto out; } else if (!glusterd_is_rb_started(volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" " started for volume "); ret = -1; goto out; @@ -320,7 +354,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, case GF_REPLACE_OP_ABORT: if (!glusterd_is_rb_ongoing (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" " started or paused for volume "); ret = -1; goto out; @@ -329,7 +363,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, case GF_REPLACE_OP_COMMIT: if (!glusterd_is_rb_ongoing (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not " "started for volume "); ret = -1; goto out; @@ -349,7 +383,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); ret = -1; goto out; } @@ -369,10 +403,9 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - ctx = glusterd_op_get_ctx(); if (ctx) { if (!glusterd_is_fuse_available ()) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/" + gf_log (this->name, GF_LOG_ERROR, "Unable to open /dev/" "fuse (%s), replace-brick command failed", strerror (errno)); snprintf (msg, sizeof(msg), "Fuse unavailable\n " @@ -384,7 +417,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "I AM THE SOURCE HOST"); if (src_brickinfo->port && rsp_dict) { ret = dict_set_int32 (rsp_dict, "src-brick-port", @@ -416,14 +449,14 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, dup_dstbrick = gf_strdup (dst_brick); if (!dup_dstbrick) { ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory allocation failed"); + gf_log (this->name, GF_LOG_ERROR, "Memory allocation failed"); goto out; } host = strtok_r (dup_dstbrick, ":", &savetok); path = strtok_r (NULL, ":", &savetok); if (!host || !path) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "dst brick %s is not of form <HOSTNAME>:<export-dir>", dst_brick); ret = -1; @@ -439,7 +472,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (ret) { *op_errstr = gf_strdup (msg); ret = -1; - gf_log (THIS->name, GF_LOG_ERROR, *op_errstr); + gf_log (this->name, GF_LOG_ERROR, *op_errstr); goto out; } @@ -447,8 +480,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, (replace_op == GF_REPLACE_OP_START || replace_op == GF_REPLACE_OP_COMMIT_FORCE)) { - volinfo->src_brick = src_brickinfo; - volinfo->dst_brick = dst_brickinfo; + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; } if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { @@ -457,7 +490,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, *op_errstr = gf_strdup ("Incorrect source or " "destination brick"); if (*op_errstr) - gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); goto out; } @@ -499,7 +532,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, out: GF_FREE (dup_dstbrick); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -1487,6 +1520,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) char *dst_brick = NULL; glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; + char *task_id_str = NULL; this = THIS; GF_ASSERT (this); @@ -1496,26 +1530,23 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "src brick=%s", src_brick); + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); ret = dict_get_str (dict, "dst-brick", &dst_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get dst brick"); goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } @@ -1528,28 +1559,30 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get src-brickinfo"); goto out; } ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get " + gf_log (this->name, GF_LOG_ERROR, "Unable to get " "replace brick destination brickinfo"); goto out; } ret = glusterd_resolve_brick (dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to resolve dst-brickinfo"); goto out; } @@ -1558,29 +1591,62 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) if (ret) goto out; + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get op_ctx"); + ret = -1; + goto out; + } + if ((GF_REPLACE_OP_START != replace_op)) { ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, dict, replace_op); if (ret) goto out; + + /* Set task-id, if available, in op_ctx dict for operations + * other than start + */ + if (!uuid_is_null (volinfo->rep_brick.rb_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rep_brick.rb_id, ctx, + GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set replace-brick-id"); + goto out; + } + } } switch (replace_op) { case GF_REPLACE_OP_START: { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Missing replace-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rep_brick.rb_id); + } + if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); if (!glusterd_is_rb_paused (volinfo)) { - ret = rb_spawn_destination_brick (volinfo, dst_brickinfo); + ret = rb_spawn_destination_brick + (volinfo, dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to spawn destination brick"); + gf_log (this->name, GF_LOG_DEBUG, + "Failed to spawn destination " + "brick"); goto out; } } else { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started=> no need to restart dst brick "); + gf_log (this->name, GF_LOG_ERROR, + "Replace brick is already started=> no " + "need to restart dst brick "); } } @@ -1589,14 +1655,14 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = rb_src_brick_restart (volinfo, src_brickinfo, 1); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); + gf_log (this->name, GF_LOG_DEBUG, + "Could not restart src-brick"); goto out; } } if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "adding dst-brick port no"); ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, @@ -1617,7 +1683,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_COMMIT); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Commit operation failed"); goto out; } @@ -1627,16 +1693,17 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) case GF_REPLACE_OP_COMMIT_FORCE: { ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Received commit - will be adding dst brick and " "removing src brick"); if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); + ret = rb_kill_destination_brick (volinfo, + dst_brickinfo); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "Unable to cleanup dst brick"); goto out; } @@ -1644,39 +1711,41 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_nodesvcs_stop (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Unable to stop nfs server, ret: %d", ret); } ret = glusterd_op_perform_replace_brick (volinfo, src_brick, dst_brick); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to add " - "dst-brick: %s to volume: %s", - dst_brick, volinfo->volname); + gf_log (this->name, GF_LOG_CRITICAL, "Unable to add " + "dst-brick: %s to volume: %s", dst_brick, + volinfo->volname); (void) glusterd_nodesvcs_handle_graph_change (volinfo); goto out; } - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; ret = glusterd_nodesvcs_handle_graph_change (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "Failed to generate nfs volume file"); } ret = glusterd_fetchspec_notify (THIS); glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + uuid_clear (volinfo->rep_brick.rb_id); } break; case GF_REPLACE_OP_PAUSE: { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Received pause - doing nothing"); ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1684,7 +1753,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_PAUSE); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Pause operation failed"); goto out; } @@ -1703,7 +1772,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_ABORT); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Abort operation failed"); goto out; } @@ -1711,7 +1780,8 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to disable pump"); + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to disable pump"); } if (!glusterd_is_local_addr (src_brickinfo->hostname)) { @@ -1726,7 +1796,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); ret = rb_kill_destination_brick (volinfo, dst_brickinfo); if (ret) { @@ -1736,14 +1806,15 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } } glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; } break; case GF_REPLACE_OP_STATUS: { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "received status - doing nothing"); ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1751,7 +1822,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_set_str (ctx, "status-reply", "replace brick has been paused"); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to set pump status" " in ctx"); goto out; @@ -1775,7 +1846,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't store" + gf_log (this->name, GF_LOG_ERROR, "Couldn't store" " replace brick operation's state"); out: diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 413c8a39a..bb7c0a76c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -891,29 +891,33 @@ glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - snprintf (buf, sizeof (buf), "%d", volinfo->rb_status); + snprintf (buf, sizeof (buf), "%d", volinfo->rep_brick.rb_status); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, buf); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { snprintf (buf, sizeof (buf), "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK, buf); if (ret) goto out; + + uuid_unparse (volinfo->rep_brick.rb_id, buf); + ret = glusterd_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, + buf); } out: @@ -960,17 +964,29 @@ glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS) { + if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) { ret = 0; goto out; } - snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd); + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf); if (ret) goto out; + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op); + ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, + buf); + if (ret) + goto out; + + if (volinfo->rebal.defrag_cmd) { + uuid_unparse (volinfo->rebal.rebalance_id, buf); + ret = glusterd_store_save_value (fd, + GF_REBALANCE_TID_KEY, + buf); + } out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -2145,22 +2161,26 @@ glusterd_store_retrieve_rbstate (char *volname) while (!ret) { if (!strncmp (key, GLUSTERD_STORE_KEY_RB_STATUS, strlen (GLUSTERD_STORE_KEY_RB_STATUS))) { - volinfo->rb_status = atoi (value); + volinfo->rep_brick.rb_status = atoi (value); } - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { if (!strncmp (key, GLUSTERD_STORE_KEY_RB_SRC_BRICK, strlen (GLUSTERD_STORE_KEY_RB_SRC_BRICK))) { ret = glusterd_brickinfo_new_from_brick (value, - &volinfo->src_brick); + &volinfo->rep_brick.src_brick); if (ret) goto out; } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_BRICK, strlen (GLUSTERD_STORE_KEY_RB_DST_BRICK))) { ret = glusterd_brickinfo_new_from_brick (value, - &volinfo->dst_brick); + &volinfo->rep_brick.dst_brick); if (ret) goto out; + } else if (!strncmp (key, GF_REPLACE_BRICK_TID_KEY, + strlen (GF_REPLACE_BRICK_TID_KEY))) { + uuid_parse (value, + volinfo->rep_brick.rb_id); } } @@ -2227,13 +2247,30 @@ glusterd_store_retrieve_node_state (char *volname) if (ret) goto out; - if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, - strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { - volinfo->defrag_cmd = atoi (value); - } + while (ret == 0) { + if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, + strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { + volinfo->rebal.defrag_cmd = atoi (value); + } - GF_FREE (key); - GF_FREE (value); + if (volinfo->rebal.defrag_cmd) { + if (!strncmp (key, GF_REBALANCE_TID_KEY, + strlen (GF_REBALANCE_TID_KEY))) + uuid_parse (value, volinfo->rebal.rebalance_id); + + if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP, + strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) + volinfo->rebal.op = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = glusterd_store_iter_get_next (iter, &key, &value, + &op_errno); + } if (op_errno != GD_STORE_EOF) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 1ab398c0b..03ef00059 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -52,6 +52,7 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" #define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" #define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" #define GLUSTERD_STORE_KEY_USERNAME "username" #define GLUSTERD_STORE_KEY_PASSWORD "password" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 879bb126a..4d2d5f8ab 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1714,16 +1714,18 @@ int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { - int32_t ret = -1; - char prefix[512] = {0,}; - char key[512] = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t i = 1; - char *volume_id_str = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - char *str = NULL; - glusterd_dict_ctx_t ctx = {0}; + int32_t ret = -1; + char prefix[512] = {0,}; + char key[512] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t i = 1; + char *volume_id_str = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + glusterd_dict_ctx_t ctx = {0}; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1794,14 +1796,16 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); - if (!volume_id_str) + if (!volume_id_str) { + ret = -1; goto out; - + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.volume_id", count); ret = dict_set_dynstr (dict, key, volume_id_str); if (ret) goto out; + volume_id_str = NULL; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.username", count); @@ -1822,24 +1826,46 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); - ret = dict_set_int32 (dict, key, volinfo->rb_status); + snprintf (key, 256, "volume%d.rebalance", count); + ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd); if (ret) goto out; + if (volinfo->rebal.defrag_cmd) { + rebalance_id_str = gf_strdup (uuid_utoa + (volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + ret = -1; + goto out; + } + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance-id", count); + ret = dict_set_dynstr (dict, key, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; + } + memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.rebalance", count); - ret = dict_set_int32 (dict, key, volinfo->defrag_cmd); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_set_uint32 (dict, key, volinfo->rebal.op); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); + ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, count); gf_asprintf (&src_brick, "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = dict_set_dynstr (dict, key, src_brick); if (ret) goto out; @@ -1848,11 +1874,24 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, count); gf_asprintf (&dst_brick, "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = dict_set_dynstr (dict, key, dst_brick); if (ret) goto out; + + rb_id_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + if (!rb_id_str) { + ret = -1; + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_set_dynstr (dict, key, rb_id_str); + if (ret) + goto out; + rb_id_str = NULL; } snprintf (prefix, sizeof (prefix), "volume%d", count); @@ -1907,6 +1946,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, out: + GF_FREE (volume_id_str); + GF_FREE (rebalance_id_str); + GF_FREE (rb_id_str); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -2473,6 +2516,8 @@ glusterd_import_volinfo (dict_t *vols, int count, char *dst_brick = NULL; char *str = NULL; int rb_status = 0; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -2580,6 +2625,8 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + uuid_parse (volume_id_str, new_volinfo->volume_id); + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.username", count); ret = dict_get_str (vols, key, &str); @@ -2609,23 +2656,46 @@ glusterd_import_volinfo (dict_t *vols, int count, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->defrag_cmd); + ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); goto out; } - uuid_parse (volume_id_str, new_volinfo->volume_id); + if (new_volinfo->rebal.defrag_cmd) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-id", count); + ret = dict_get_str (vols, key, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rebalance_id_str, + new_volinfo->rebal.rebalance_id); + } + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); ret = dict_get_int32 (vols, key, &rb_status); if (ret) goto out; - new_volinfo->rb_status = rb_status; + new_volinfo->rep_brick.rb_status = rb_status; - if (new_volinfo->rb_status > GF_RB_STATUS_NONE) { + if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, @@ -2635,7 +2705,7 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; ret = glusterd_brickinfo_new_from_brick (src_brick, - &new_volinfo->src_brick); + &new_volinfo->rep_brick.src_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " src brickinfo"); @@ -2650,12 +2720,24 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; ret = glusterd_brickinfo_new_from_brick (dst_brick, - &new_volinfo->dst_brick); + &new_volinfo->rep_brick.dst_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " dst brickinfo"); goto out; } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_get_str (vols, key, &rb_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rb_id_str, new_volinfo->rep_brick.rb_id); + } } @@ -3161,8 +3243,8 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) } else if (pending_node->type == GD_NODE_REBALANCE) { volinfo = pending_node->node; - if (volinfo->defrag) - rpc = volinfo->defrag->rpc; + if (volinfo->rebal.defrag) + rpc = volinfo->rebal.defrag->rpc; } else if (pending_node->type == GD_NODE_NFS) { nfs = pending_node->node; @@ -4691,7 +4773,7 @@ out: int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo) { - return (volinfo->defrag != NULL); + return (volinfo->rebal.defrag != NULL); } gf_boolean_t @@ -4793,8 +4875,8 @@ int glusterd_is_rb_started(glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_started:status=%d", volinfo->rb_status); - return (volinfo->rb_status == GF_RB_STATUS_STARTED); + "is_rb_started:status=%d", volinfo->rep_brick.rb_status); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_STARTED); } @@ -4802,9 +4884,9 @@ int glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_paused:status=%d", volinfo->rb_status); + "is_rb_paused:status=%d", volinfo->rep_brick.rb_status); - return (volinfo->rb_status == GF_RB_STATUS_PAUSED); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_PAUSED); } inline int @@ -4812,10 +4894,10 @@ glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status) { gf_log ("", GF_LOG_DEBUG, "setting status from %d to %d", - volinfo->rb_status, + volinfo->rep_brick.rb_status, status); - volinfo->rb_status = status; + volinfo->rep_brick.rb_status = status; return 0; } @@ -4823,19 +4905,27 @@ inline int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst) { - if (!volinfo->src_brick || !volinfo->dst_brick) + glusterd_replace_brick_t *rb = NULL; + + GF_ASSERT (volinfo); + + rb = &volinfo->rep_brick; + + if (!rb->src_brick || !rb->dst_brick) return -1; - if (strcmp (volinfo->src_brick->hostname, src->hostname) || - strcmp (volinfo->src_brick->path, src->path)) { + if (strcmp (rb->src_brick->hostname, src->hostname) || + strcmp (rb->src_brick->path, src->path)) { gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ"); return -1; } - if (strcmp (volinfo->dst_brick->hostname, dst->hostname) || - strcmp (volinfo->dst_brick->path, dst->path)) { + + if (strcmp (rb->dst_brick->hostname, dst->hostname) || + strcmp (rb->dst_brick->path, dst->path)) { gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ"); return -1; } + return 0; } @@ -5831,7 +5921,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, if (!glusterd_is_service_running (pidfile, &pid)) { glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd, - cbk); + cbk, volinfo->rebal.op); } else { glusterd_rebalance_rpc_create (volinfo, priv, cmd); } @@ -5847,10 +5937,10 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) char op_errstr[256]; list_for_each_entry (volinfo, &conf->volumes, vol_list) { - if (!volinfo->defrag_cmd) + if (!volinfo->rebal.defrag_cmd) continue; glusterd_volume_defrag_restart (volinfo, op_errstr, 256, - volinfo->defrag_cmd, NULL); + volinfo->rebal.defrag_cmd, NULL); } return ret; } @@ -5859,13 +5949,15 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) void glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) { + glusterd_rebalance_t *rebal = NULL; GF_ASSERT (volinfo); - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; - volinfo->rebalance_failures = 0; - volinfo->rebalance_time = 0; + rebal = &volinfo->rebal; + rebal->rebalance_files = 0; + rebal->rebalance_data = 0; + rebal->lookedup_files = 0; + rebal->rebalance_failures = 0; + rebal->rebalance_time = 0; } @@ -5990,17 +6082,17 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, "failed to get run-time"); if (files) - volinfo->rebalance_files = files; + volinfo->rebal.rebalance_files = files; if (size) - volinfo->rebalance_data = size; + volinfo->rebal.rebalance_data = size; if (lookup) - volinfo->lookedup_files = lookup; + volinfo->rebal.lookedup_files = lookup; if (status) - volinfo->defrag_status = status; + volinfo->rebal.defrag_status = status; if (failures) - volinfo->rebalance_failures = failures; + volinfo->rebal.rebalance_failures = failures; if (run_time) - volinfo->rebalance_time = run_time; + volinfo->rebal.rebalance_time = run_time; return ret; } @@ -6715,7 +6807,8 @@ out: * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () { +is_origin_glusterd () +{ int ret = 0; uuid_t lock_owner = {0,}; @@ -6725,3 +6818,63 @@ is_origin_glusterd () { return (uuid_compare (MY_UUID, lock_owner) == 0); } + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key) +{ + int ret = -1; + uuid_t task_id = {0,}; + char *uuid_str = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + + this = THIS; + GF_ASSERT (this); + + uuid_generate (task_id); + uuid_str = gf_strdup (uuid_utoa (task_id)); + if (!uuid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s in dict", + key); + goto out; + } + gf_log (this->name, GF_LOG_INFO, "Generated task-id %s for key %s", + uuid_str, key); + +out: + if (ret) + GF_FREE (uuid_str); + return ret; +} + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key) +{ + int ret = -1; + char tmp_str[40] = {0,}; + char *task_id_str = NULL; + + GF_ASSERT (dict); + GF_ASSERT (key); + + uuid_unparse (uuid, tmp_str); + task_id_str = gf_strdup (tmp_str); + if (!task_id_str) + return -1; + + ret = dict_set_dynstr (dict, key, task_id_str); + if (ret) { + GF_FREE (task_id_str); + gf_log (THIS->name, GF_LOG_ERROR, + "Error setting uuid in dict with key %s", key); + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index c90c9d918..e5e6123cb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -486,4 +486,10 @@ gf_boolean_t glusterd_is_any_volume_in_server_quorum (xlator_t *this); gf_boolean_t does_gd_meet_server_quorum (xlator_t *this); + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key); + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 9ddeedb10..83e98f78c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -955,7 +955,7 @@ glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } - if (volinfo->rb_status != GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status != GF_RB_STATUS_NONE) { snprintf (msg, sizeof(msg), "replace-brick session is " "in progress for the volume '%s'", volname); gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); @@ -1595,7 +1595,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; list_add_tail (&volinfo->vol_list, &priv->volumes); vol_added = _gf_true; out: diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 204a6adfc..4ef2ab696 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -68,6 +68,33 @@ struct rpcsvc_program *all_programs[] = { }; int rpcsvc_programs_count = (sizeof (all_programs) / sizeof (all_programs[0])); +const char *gd_op_list[GD_OP_MAX + 1] = { + [GD_OP_NONE] = "Invalid op", + [GD_OP_CREATE_VOLUME] = "Create", + [GD_OP_DELETE_VOLUME] = "Delete", + [GD_OP_START_VOLUME] = "Start", + [GD_OP_STOP_VOLUME] = "Stop", + [GD_OP_DEFRAG_VOLUME] = "Rebalance", + [GD_OP_ADD_BRICK] = "Add brick", + [GD_OP_REMOVE_BRICK] = "Remove brick", + [GD_OP_REPLACE_BRICK] = "Replace brick", + [GD_OP_SET_VOLUME] = "Set", + [GD_OP_RESET_VOLUME] = "Reset", + [GD_OP_SYNC_VOLUME] = "Sync", + [GD_OP_LOG_ROTATE] = "Log rotate", + [GD_OP_GSYNC_SET] = "Geo-replication", + [GD_OP_PROFILE_VOLUME] = "Profile", + [GD_OP_QUOTA] = "Quota", + [GD_OP_STATUS_VOLUME] = "Status", + [GD_OP_REBALANCE] = "Rebalance", + [GD_OP_HEAL_VOLUME] = "Heal", + [GD_OP_STATEDUMP_VOLUME] = "Statedump", + [GD_OP_LIST_VOLUME] = "Lists", + [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks", + [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance", + [GD_OP_MAX] = "Invalid op" +}; + static int glusterd_opinfo_init () { diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 7152bd6a2..aaf4df9b9 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -83,6 +83,7 @@ typedef enum glusterd_op_ { GD_OP_MAX, } glusterd_op_t; +extern const char * gd_op_list[]; struct glusterd_store_iter_ { int fd; FILE *file; @@ -236,6 +237,29 @@ typedef enum glusterd_vol_backend_ { GD_VOL_BK_BD = 1, } glusterd_vol_backend_t; +struct glusterd_rebalance_ { + gf_defrag_status_t defrag_status; + uint64_t rebalance_files; + uint64_t rebalance_data; + uint64_t lookedup_files; + glusterd_defrag_info_t *defrag; + gf_cli_defrag_type defrag_cmd; + uint64_t rebalance_failures; + uuid_t rebalance_id; + double rebalance_time; + glusterd_op_t op; +}; + +typedef struct glusterd_rebalance_ glusterd_rebalance_t; + +struct glusterd_replace_brick_ { + gf_rb_status_t rb_status; + glusterd_brickinfo_t *src_brick; + glusterd_brickinfo_t *dst_brick; + uuid_t rb_id; +}; + +typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; struct glusterd_volinfo_ { char volname[GLUSTERD_MAX_VOLUME_NAME]; @@ -255,19 +279,10 @@ struct glusterd_volinfo_ { glusterd_store_handle_t *node_state_shandle; /* Defrag/rebalance related */ - gf_defrag_status_t defrag_status; - uint64_t rebalance_files; - uint64_t rebalance_data; - uint64_t lookedup_files; - glusterd_defrag_info_t *defrag; - gf_cli_defrag_type defrag_cmd; - uint64_t rebalance_failures; - double rebalance_time; + glusterd_rebalance_t rebal; /* Replace brick status */ - gf_rb_status_t rb_status; - glusterd_brickinfo_t *src_brick; - glusterd_brickinfo_t *dst_brick; + glusterd_replace_brick_t rep_brick; int version; uint32_t cksum; @@ -644,7 +659,8 @@ int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req); int glusterd_handle_cli_label_volume (rpcsvc_request_t *req); int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk); + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op); int glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, glusterd_conf_t *priv, int cmd); |