diff options
author | Kaushal M <kaushal@redhat.com> | 2012-09-14 11:45:34 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-12-19 13:32:49 -0800 |
commit | 5eb8bac561b7374589bd72d597ed7eec95aa7de6 (patch) | |
tree | 028a5aa6582b0497cf0ffda62bf023bc88b391b3 | |
parent | da7ca1efcf3a621c27f05d621715e57fdc5aa397 (diff) |
glusterd, cli: Task id's for async tasks
This patch introduces task-id's for async tasks like rebalance, remove-brick and
replace-brick. An id is generated for each task when it is started and displayed
to the user in cli output. The status of running tasks is also included in the
output of "volume status" along with its id, so that a user can easily track the
progress of an async task.
Also,
* added tests for this feature into the regression test suite.
* added a python script for creating files, 'create-files.py', courtesy
Vijaykumar Koppad (vkoppad@redhat.com) into the test suite.
This patch reverts the revert commit 698deb33d731df6de84da8ae8ee4045e1543a168.
BUG: 857330
Change-Id: Id43d7cb629a38f47f733fbc18cb4c5f2f0327c7a
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/4294
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
20 files changed, 1607 insertions, 329 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index c0909cd0dcf..22a69da730f 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -1144,6 +1144,7 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, uint64_t failures = 0; double elapsed = 0; char *size_str = NULL; + char *task_id_str = NULL; if (-1 == req->rpc_status) { goto out; @@ -1193,15 +1194,24 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, } } - if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS))) { - /* All other possibility is about starting a volume */ - if (rsp.op_ret && strcmp (rsp.op_errstr, "")) + if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) && + !(global_state->mode & GLUSTER_MODE_XML)) { + /* All other possibilites are about starting a rebalance */ + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (rsp.op_ret && strcmp (rsp.op_errstr, "")) { snprintf (msg, sizeof (msg), "%s", rsp.op_errstr); - else - snprintf (msg, sizeof (msg), - "Starting rebalance on volume %s has been %s", - volname, (rsp.op_ret) ? "unsuccessful": - "successful"); + } else { + if (!rsp.op_ret) { + snprintf (msg, sizeof (msg), + "Starting rebalance on volume %s has " + "been successful.\nID: %s", volname, + task_id_str); + } else { + snprintf (msg, sizeof (msg), + "Starting rebalance on volume %s has " + "been unsuccessful.", volname); + } + } goto done; } @@ -1743,6 +1753,8 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, char *cmd_str = "unknown"; cli_local_t *local = NULL; call_frame_t *frame = NULL; + char *task_id_str = NULL; + dict_t *rsp_dict = NULL; if (-1 == req->rpc_status) { goto out; @@ -1763,10 +1775,32 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - switch (cmd) { + if (rsp.dict.dict_len) { + rsp_dict = dict_new (); + if (!rsp_dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, + &rsp_dict); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Failed to unserialize rsp_dict"); + goto out; + } + } + switch (cmd) { case GF_OP_CMD_START: cmd_str = "start"; + + ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "remove-brick-id is not present in dict"); + goto out; + } break; case GF_OP_CMD_COMMIT: cmd_str = "commit"; @@ -1788,7 +1822,7 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, (rsp.op_ret) ? "unsuccessful": "successful"); if (global_state->mode & GLUSTER_MODE_XML) { - ret = cli_xml_output_vol_remove_brick (_gf_false, NULL, + ret = cli_xml_output_vol_remove_brick (_gf_false, rsp_dict, rsp.op_ret, rsp.op_errno, rsp.op_errstr); if (ret) @@ -1797,10 +1831,14 @@ gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - if (rsp.op_ret) - cli_err ("volume remove-brick: failed: %s", rsp.op_errstr); - else - cli_out ("volume remove-brick: success"); + if (rsp.op_ret) { + cli_err ("volume remove-brick %s: failed: %s", cmd_str, + rsp.op_errstr); + } else { + cli_out ("volume remove-brick %s: success", cmd_str); + if (GF_OP_CMD_START == cmd) + cli_out ("ID: %s", task_id_str); + } ret = rsp.op_ret; @@ -1829,7 +1867,8 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, gf1_cli_replace_op replace_op = 0; char *rb_operation_str = NULL; dict_t *rsp_dict = NULL; - char msg[1024] = {0,}; + char msg[1024] = {0,}; + char *task_id_str = NULL; if (-1 == req->rpc_status) { goto out; @@ -1854,33 +1893,48 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, + "failed to " + "unserialize rsp buffer to dictionary"); + goto out; + } + } + switch (replace_op) { case GF_REPLACE_OP_START: - if (rsp.op_ret) - rb_operation_str = "replace-brick failed to start"; - else - rb_operation_str = "replace-brick started successfully"; + if (rsp.op_ret) { + rb_operation_str = gf_strdup ("replace-brick failed to" + " start"); + } else { + ret = dict_get_str (rsp_dict, GF_REPLACE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, "Failed to get " + "\"replace-brick-id\" from dict"); + goto out; + } + ret = gf_asprintf (&rb_operation_str, + "replace-brick started successfully" + "\nID: %s", task_id_str); + if (ret < 0) + goto out; + } break; case GF_REPLACE_OP_STATUS: - if (rsp.op_ret || ret) - rb_operation_str = "replace-brick status unknown"; - else { - if (rsp.dict.dict_len) { - /* Unserialize the dictionary */ - rsp_dict = dict_new (); - - ret = dict_unserialize (rsp.dict.dict_val, - rsp.dict.dict_len, - &rsp_dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } - } + if (rsp.op_ret || ret) { + rb_operation_str = gf_strdup ("replace-brick status " + "unknown"); + } else { ret = dict_get_str (rsp_dict, "status-reply", &status_reply); if (ret) { @@ -1889,23 +1943,27 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - rb_operation_str = status_reply; + rb_operation_str = gf_strdup (status_reply); } break; case GF_REPLACE_OP_PAUSE: if (rsp.op_ret) - rb_operation_str = "replace-brick pause failed"; + rb_operation_str = gf_strdup ("replace-brick pause " + "failed"); else - rb_operation_str = "replace-brick paused successfully"; + rb_operation_str = gf_strdup ("replace-brick paused " + "successfully"); break; case GF_REPLACE_OP_ABORT: if (rsp.op_ret) - rb_operation_str = "replace-brick abort failed"; + rb_operation_str = gf_strdup ("replace-brick abort " + "failed"); else - rb_operation_str = "replace-brick aborted successfully"; + rb_operation_str = gf_strdup ("replace-brick aborted " + "successfully"); break; case GF_REPLACE_OP_COMMIT: @@ -1926,9 +1984,11 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, if (rsp.op_ret || ret) - rb_operation_str = "replace-brick commit failed"; + rb_operation_str = gf_strdup ("replace-brick commit " + "failed"); else - rb_operation_str = "replace-brick commit successful"; + rb_operation_str = gf_strdup ("replace-brick commit " + "successful"); break; @@ -1939,7 +1999,7 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, } if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) { - rb_operation_str = rsp.op_errstr; + rb_operation_str = gf_strdup (rsp.op_errstr); } gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick"); @@ -1963,6 +2023,17 @@ gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, ret = rsp.op_ret; out: + if (frame) + frame->local = NULL; + + if (local) { + dict_unref (local->dict); + cli_local_wipe (local); + } + + if (rb_operation_str) + GF_FREE (rb_operation_str); + cli_cmd_broadcast_response (ret); free (rsp.dict.dict_val); if (rsp_dict) @@ -3034,7 +3105,7 @@ gf_cli_remove_brick (call_frame_t *frame, xlator_t *this, GLUSTER_CLI_REMOVE_BRICK, this, cli_rpc_prog, NULL); } else { - /* Need rebalance status to e sent :-) */ + /* Need rebalance status to be sent :-) */ req_dict = dict_new (); if (!req_dict) { ret = -1; @@ -5276,6 +5347,55 @@ out: return; } + +static void +cli_print_volume_tasks (dict_t *dict) { + int ret = -1; + int tasks = 0; + char *op = 0; + char *task_id_str = NULL; + int status = 0; + char key[1024] = {0,}; + int i = 0; + + ret = dict_get_int32 (dict, "tasks", &tasks); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Failed to get tasks count"); + return; + } + + if (tasks == 0) { + cli_out ("There are no active volume tasks"); + return; + } + + cli_out ("%15s%40s%15s", "Task", "ID", "Status"); + cli_out ("%15s%40s%15s", "----", "--", "------"); + for (i = 0; i < tasks; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.type", i); + ret = dict_get_str(dict, key, &op); + if (ret) + return; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", i); + ret = dict_get_str (dict, key, &task_id_str); + if (ret) + return; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", i); + ret = dict_get_int32 (dict, key, &status); + if (ret) + return; + + cli_out ("%15s%40s%15d", op, task_id_str, status); + } + +} + static int gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -5522,6 +5642,9 @@ gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov, } } cli_out (" "); + + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) + cli_print_volume_tasks (dict); cont: ret = rsp.op_ret; diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index e4393266238..88410ef5767 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -1387,6 +1387,74 @@ out: } int +cli_xml_output_vol_status_tasks (cli_local_t *local, dict_t *dict) { + int ret = -1; + char *task_type = NULL; + char *task_id_str = NULL; + int status = 0; + int tasks = 0; + char key[1024] = {0,}; + int i = 0; + + /* <tasks> */ + ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"tasks"); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = dict_get_int32 (dict, "tasks", &tasks); + if (ret) + goto out; + + for (i = 0; i < tasks; i++) { + /* <task> */ + ret = xmlTextWriterStartElement (local->writer, + (xmlChar *)"task"); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.type", i); + ret = dict_get_str (dict, key, &task_type); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"type", + "%s", task_type); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", i); + ret = dict_get_str (dict, key, &task_id_str); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", i); + ret = dict_get_int32 (dict, key, &status); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"status", + "%d", status); + XML_RET_CHECK_AND_GOTO (ret, out); + + /* </task> */ + ret = xmlTextWriterEndElement (local->writer); + XML_RET_CHECK_AND_GOTO (ret, out); + } + + /* </tasks> */ + ret = xmlTextWriterEndElement (local->writer); + + +out: + gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int cli_xml_output_vol_status (cli_local_t *local, dict_t *dict) { #if (HAVE_LIB_XML) @@ -1513,6 +1581,12 @@ cli_xml_output_vol_status (cli_local_t *local, dict_t *dict) XML_RET_CHECK_AND_GOTO (ret, out); } + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) { + ret = cli_xml_output_vol_status_tasks (local, dict); + if (ret) + goto out; + } + /* </volume> */ ret = xmlTextWriterEndElement (local->writer); XML_RET_CHECK_AND_GOTO (ret, out); @@ -2919,6 +2993,7 @@ cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret, int ret = -1; xmlTextWriterPtr writer = NULL; xmlBufferPtr buf = NULL; + char *task_id_str = NULL; ret = cli_begin_xml_output (&writer, &buf); if (ret) @@ -2932,6 +3007,14 @@ cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret, ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRebalance"); XML_RET_CHECK_AND_GOTO (ret, out); + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret == 0) { + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"task-id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + } + ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op", "%d", op); XML_RET_CHECK_AND_GOTO (ret, out); @@ -2965,6 +3048,7 @@ cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict, int ret = -1; xmlTextWriterPtr writer = NULL; xmlBufferPtr buf = NULL; + char *task_id_str = NULL; ret = cli_begin_xml_output (&writer, &buf); if (ret) @@ -2978,6 +3062,14 @@ cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict, ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRemoveBrick"); XML_RET_CHECK_AND_GOTO (ret, out); + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret == 0) { + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"task-id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + } + if (status_op) { ret = cli_xml_output_vol_rebalance_status (writer, dict); if (ret) @@ -3008,6 +3100,7 @@ cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict, int status = 0; uint64_t files = 0; char *current_file = 0; + char *task_id_str = NULL; xmlTextWriterPtr writer = NULL; xmlBufferPtr buf = NULL; @@ -3023,6 +3116,14 @@ cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict, ret = xmlTextWriterStartElement (writer, (xmlChar *)"volReplaceBrick"); XML_RET_CHECK_AND_GOTO (ret, out); + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str); + if (ret == 0) { + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"task-id", + "%s", task_id_str); + XML_RET_CHECK_AND_GOTO (ret, out); + } + ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op", "%d", op); XML_RET_CHECK_AND_GOTO (ret, out); diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 61edd5ec7c9..dae5398415e 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -142,6 +142,10 @@ #define GF_UUID_BUF_SIZE 50 +#define GF_REBALANCE_TID_KEY "rebalance-id" +#define GF_REMOVE_BRICK_TID_KEY "remove-brick-id" +#define GF_REPLACE_BRICK_TID_KEY "replace-brick-id" + /* NOTE: add members ONLY at the end (just before _MAXVALUE) */ typedef enum { GF_FOP_NULL = 0, diff --git a/tests/bugs/bug-857330/common.rc b/tests/bugs/bug-857330/common.rc new file mode 100644 index 00000000000..f2327a86217 --- /dev/null +++ b/tests/bugs/bug-857330/common.rc @@ -0,0 +1,70 @@ +. $(dirname $0)/../../include.rc + +UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}' + +TASK_ID="" +COMMAND="" +PATTERN="" + +function check-and-store-task-id() +{ + TASK_ID="" + + local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + TASK_ID=$task_id + return 0; +} + +function check-with-stored-task-id() +{ + local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + if [ "$TASK_ID" != "$task_id" ]; then + return 1 + fi + + return 0 +} + +function check-and-store-task-id-xml() +{ + TASK_ID="" + + local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + TASK_ID=$task_id + return 0; +} + +function check-with-stored-task-id-xml() +{ + local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX") + + if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then + return 1 + fi + + if [ "$TASK_ID" != "$task_id" ]; then + return 1 + fi + + return 0 +} + +function get-task-status() +{ + $CLI $COMMAND | grep -o $PATTERN +} diff --git a/tests/bugs/bug-857330/normal.t b/tests/bugs/bug-857330/normal.t new file mode 100755 index 00000000000..abf8e2ac503 --- /dev/null +++ b/tests/bugs/bug-857330/normal.t @@ -0,0 +1,78 @@ +#!/bin/bash + +. $(dirname $0)/common.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}1; +TEST $CLI volume info $V0; +TEST $CLI volume start $V0; + +TEST glusterfs -s $H0 --volfile-id=$V0 $M0; + +TEST python2 $(dirname $0)/../../utils/create-files.py --multi -b 10 -d 10 -n 10 $M0; + +TEST umount $M0; + +############### +## Rebalance ## +############### +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2; + +COMMAND="volume rebalance $V0 start" +PATTERN="ID:" +TEST check-and-store-task-id + +COMMAND="volume status $V0" +PATTERN="Rebalance" +TEST check-with-stored-task-id + +COMMAND="volume rebalance $V0 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +################### +## Replace-brick ## +################### +REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}3" + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR start" +PATTERN="ID:" +TEST check-and-store-task-id + +COMMAND="volume status $V0" +PATTERN="Replace" +TEST check-with-stored-task-id + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status" +PATTERN="complete" +EXPECT_WITHIN 300 $PATTERN get-task-status + +TEST $CLI volume replace-brick $V0 $REP_BRICK_PAIR commit; + +################## +## Remove-brick ## +################## +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 start" +PATTERN="ID:" +TEST check-and-store-task-id + +COMMAND="volume status $V0" +PATTERN="Remove" +TEST check-with-stored-task-id + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 commit + +TEST $CLI volume stop $V0; +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/tests/bugs/bug-857330/xml.t b/tests/bugs/bug-857330/xml.t new file mode 100755 index 00000000000..a6e0b34cace --- /dev/null +++ b/tests/bugs/bug-857330/xml.t @@ -0,0 +1,101 @@ +#!/bin/bash + +. $(dirname $0)/common.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}1; +TEST $CLI volume info $V0; +TEST $CLI volume start $V0; + +TEST glusterfs -s $H0 --volfile-id=$V0 $M0; + +TEST python2 $(dirname $0)/../../utils/create-files.py --multi -b 10 -d 10 -n 10 $M0; + +TEST umount $M0; + + +############### +## Rebalance ## +############### +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2; + +COMMAND="volume rebalance $V0 start" +PATTERN="task-id" +TEST check-and-store-task-id-xml + +COMMAND="volume status $V0" +PATTERN="id" +TEST check-with-stored-task-id-xml + +COMMAND="volume rebalance $V0 status" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +## TODO: Add tests for rebalance stop + +COMMAND="volume rebalance $V0 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +################### +## Replace-brick ## +################### +REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}3" + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR start" +PATTERN="task-id" +TEST check-and-store-task-id-xml + +COMMAND="volume status $V0" +PATTERN="id" +TEST check-with-stored-task-id-xml + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +## TODO: Add more tests for replace-brick pause|abort + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status" +PATTERN="complete" +EXPECT_WITHIN 300 $PATTERN get-task-status + +COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR commit" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +################## +## Remove-brick ## +################## +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 start" +PATTERN="task-id" +TEST check-and-store-task-id-xml + +COMMAND="volume status $V0" +PATTERN="id" +TEST check-with-stored-task-id-xml + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status" +PATTERN="completed" +EXPECT_WITHIN 300 $PATTERN get-task-status + +## TODO: Add tests for remove-brick stop + +COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 commit" +PATTERN="task-id" +TEST check-with-stored-task-id-xml + +TEST $CLI volume stop $V0; +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py new file mode 100755 index 00000000000..0d937eff978 --- /dev/null +++ b/tests/utils/create-files.py @@ -0,0 +1,207 @@ +#!/usr/bin/python + +# This script was developed by Vijaykumar Koppad (vkoppad@redhat.com) +# The latest version of this script can found at +# http://github.com/vijaykumar-koppad/crefi + +from __future__ import with_statement +import sys +import os +import re +import random +from optparse import OptionParser +import time +import string +import errno + +def os_rd(src, size): + fd = os.open(src,os.O_RDONLY) + data = os.read(fd, size) + os.close(fd) + return data + +def os_wr(dest, data): + fd = os.open(dest,os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0644) + os.write(fd, data) + os.close(fd) + return + +def create_sparse_file(fil): + if option.size: + option.random = False + size = option.size + else: + size = random.randint(option.min, option.max) + data = os_rd("/dev/zero", size) + os_wr(fil, data) + return + +def create_binary_file(fil): + if option.size: + option.random = False + size = option.size + else: + size = random.randint(option.min, option.max) + data = os_rd("/dev/urandom", size) + os_wr(fil, data) + return + +def create_txt_file(fil): + if option.size: + option.random = False + size = option.size + else: + size = random.randint(option.min, option.max) + if size < 500*1024: + data = os_rd("/etc/services", size) + os_wr(fil, data) + else: + data = os_rd("/etc/services", 500*1024) + file_size = 0 + fd = os.open(fil,os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0644) + while file_size < size: + os.write(fd, data) + file_size += 500*1024 + os.close(fd) + return + +def get_filename(): + size = option.flen + char = string.uppercase+string.digits + st = ''.join(random.choice(char) for i in range(size)) + ti = str((hex(int(str(time.time()).split('.')[0])))[2:]) + return ti+"~~"+st + +def text_files(files, file_count): + for k in range(files): + if not file_count%option.inter: + print file_count + fil = get_filename() + create_txt_file(fil) + file_count += 1 + return file_count + +def sparse_files(files, file_count): + for k in range(files): + if not file_count%option.inter: + print file_count + fil = get_filename() + create_sparse_file(fil) + file_count += 1 + return file_count + +def binary_files(files, file_count): + for k in range(files): + if not file_count%option.inter: + print file_count + fil = get_filename() + create_binary_file(fil) + file_count += 1 + return file_count + +def human2bytes(size): + size_short = { + 1024 : ['K','KB','KiB','k','kB','kiB'], + 1024*1024 : ['M','MB','MiB'], + 1024*1024*1024 : ['G','GB','GiB'] +} + num = re.search('(\d+)',size).group() + ext = size[len(num):] + num = int(num) + if ext == '': + return num + for value, keys in size_short.items(): + if ext in keys: + size = num*value + return size + +def multipledir(mnt_pnt,brdth,depth,files): + files_count = 1 + for i in range(brdth): + breadth = mnt_pnt+"/"+str(i) + try: + os.makedirs(breadth) + except OSError as ex: + if not ex.errno is errno.EEXIST: + raise + os.chdir(breadth) + dir_depth = breadth + print breadth + for j in range(depth): + dir_depth = dir_depth+"/"+str(j) + try: + os.makedirs(dir_depth) + except OSError as ex: + if not ex.errno is errno.EEXIST: + raise + os.chdir(dir_depth) + if option.file_type == "text": + files_count = text_files(files, files_count) + elif option.file_type == "sparse": + files_count = sparse_files(files, files_count) + elif option.file_type == "binary": + files_count = binary_files(files, files_count) + else: + print "Not a valid file type" + sys.exit(1) + +def singledir(mnt_pnt, files): + files_count = 1 + os.chdir(mnt_pnt) + if option.file_type == "text": + files_count = text_files(files, files_count) + elif option.file_type == "sparse": + files_count = sparse_files(files, files_count) + elif option.file_type == "binary": + files_count = binary_files(files, files_count) + else: + print "Not a valid file type" + sys.exit(1) + +if __name__ == '__main__': + usage = "usage: %prog [option] <MNT_PT>" + parser = OptionParser(usage=usage) + parser.add_option("-n", dest="files",type="int" ,default=100, + help="number of files in each level [default: %default]") + parser.add_option("--size", action = "store",type="string", + help="size of the files to be used") + parser.add_option("--random", action="store_true", default=True, + help="random size of the file between --min and --max " + "[default: %default]") + parser.add_option("--max", action = "store",type="string", default="500K", + help="maximum size of the files, if random is True " + "[default: %default]") + parser.add_option("--min", action = "store",type="string", default="10K", + help="minimum size of the files, if random is True " + "[default: %default]" ) + parser.add_option("--single", action="store_true", dest="dir",default=True, + help="create files in single directory [default: %default]" ) + parser.add_option("--multi", action="store_false", dest="dir", + help="create files in multiple directories") + parser.add_option("-b", dest="brdth",type="int",default=5, + help="number of directories in one level(works with --multi)[default: %default]") + parser.add_option("-d", dest="depth",type="int",default=5, + help="number of levels of directories(works with --multi)[default: %default]") + parser.add_option("-l", dest="flen",type="int" ,default=10, + help="number of bytes for filename " + "[default: %default]") + parser.add_option("-t","--type", action="store", type="string" , dest="file_type",default="text", + help="type of the file to be created (text, sparse, binary) [default: %default]" ) + parser.add_option("-I", dest="inter", type="int", default=100, + help="print number files created of interval [defailt: %dafault]") + (option,args) = parser.parse_args() + if not args: + print "usage: <script> [option] <MNT_PT>" + print "" + sys.exit(1) + args[0] = os.path.abspath(args[0]) + if option.size: + option.size = human2bytes(option.size) + else: + option.max = human2bytes(option.max) + option.min = human2bytes(option.min) + if option.dir: + singledir(args[0], option.files) + else: + multipledir(args[0], option.brdth, option.depth, option.files) + print "creation of files completed.\n" diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 1ca52bc5fc4..105e74079cf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -838,6 +838,7 @@ out: ret = 0; //sent error to cli, prevent second reply } + GF_FREE (brick_list); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1173,17 +1174,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) char msg[2048] = {0,}; int32_t flag = 0; gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname); goto out; } @@ -1195,7 +1201,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " "operation is committed or aborted", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; @@ -1203,7 +1209,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } cmd = flag; @@ -1221,20 +1227,38 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_START: { if (GLUSTERD_STATUS_STARTED != volinfo->status) { - snprintf (msg, sizeof (msg), "Volume %s needs to be started " - "before remove-brick (you can use 'force' or " - "'commit' to override this behavior)", - volinfo->volname); + snprintf (msg, sizeof (msg), "Volume %s needs to be " + "started before remove-brick (you can use " + "'force' or 'commit' to override this " + "behavior)", volinfo->volname); errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } if (glusterd_is_defrag_on(volinfo)) { - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); + errstr = gf_strdup("Rebalance is in progress. Please " + "retry after completion"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + + if (is_origin_glusterd ()) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing remove-brick-id"); + ret = 0; + } + } break; } @@ -1256,7 +1280,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "count", &brick_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } @@ -1269,7 +1293,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); if (ret && errstr) { if (op_errstr) *op_errstr = errstr; @@ -1406,10 +1430,10 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } @@ -1428,42 +1452,67 @@ out: int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - int32_t flag = 0; - char err_str[4096] = {0,}; - int need_rebalance = 0; - int force = 0; - gf1_op_commands cmd = 0; - int32_t replica_count = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get command"); goto out; } cmd = flag; + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id on completion/stopping of remove-brick operation */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) + uuid_clear (volinfo->rebal.rebalance_id); + ret = -1; switch (cmd) { case GF_OP_CMD_NONE: @@ -1484,7 +1533,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -1492,7 +1541,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } @@ -1502,6 +1551,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } case GF_OP_CMD_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Missing remove-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } force = 0; break; @@ -1512,13 +1569,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); + if (volinfo->rebal.defrag) { + LOCK (&volinfo->rebal.defrag->lock); /* Fake 'rebalance-complete' so the graph change happens right away */ - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; - UNLOCK (&volinfo->defrag->lock); + UNLOCK (&volinfo->rebal.defrag->lock); } /* Graph change happens in rebalance _cbk function, no need to do anything here */ @@ -1541,7 +1599,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) snprintf (key, 256, "brick%d", i); ret = dict_get_str (dict, key, &brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); + gf_log (this->name, GF_LOG_ERROR, "Unable to get %s", + key); goto out; } @@ -1553,7 +1612,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "changing replica count %d to %d on volume %s", volinfo->replica_count, replica_count, volinfo->volname); @@ -1575,34 +1634,36 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles"); + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo"); + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } if (!force && need_rebalance) { /* perform the rebalance operations */ - ret = glusterd_handle_defrag_start (volinfo, err_str, 4096, - GF_DEFRAG_CMD_START_FORCE, - glusterd_remove_brick_migrate_cbk); + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_FORCE, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + if (!ret) volinfo->decommission_in_progress = 1; if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to start the rebalance"); } } else { diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 9e5cae44963..b4ac33cce3b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -370,7 +370,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; snprintf (key, 256, "volume%d.rebalance", count); - ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd); + ret = dict_set_int32 (volumes, key, volinfo->rebal.defrag_cmd); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 7a2075974e0..adbb40c4fed 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -438,6 +438,8 @@ out: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + ret = 0; + if (clnt_dict) dict_unref (clnt_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index b38b7ff5a59..e0683d5ba6a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1827,6 +1827,79 @@ out: } static int +_add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) +{ + + int ret = -1; + char key[128] = {0,}; + char *uuid_str = NULL; + int status = 0; + xlator_t *this = NULL; + + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + this = THIS; + GF_ASSERT (this); + + switch (op) { + case GD_OP_REBALANCE: + case GD_OP_REMOVE_BRICK: + uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id)); + status = volinfo->rebal.defrag_status; + break; + + case GD_OP_REPLACE_BRICK: + uuid_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + status = volinfo->rep_brick.rb_status; + break; + + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "%s operation doesn't have a" + " task_id", gd_op_list[op]); + goto out; + } + + snprintf (key, sizeof (key), "task%d.type", index); + ret = dict_set_str (dict, key, + (char *)gd_op_list[op]); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task type in dict"); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", index); + + + if (!uuid_str) + goto out; + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task id in dict"); + goto out; + } + uuid_str = NULL; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", index); + ret = dict_set_int32 (dict, key, status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task status in dict"); + goto out; + } + +out: + if (uuid_str) + GF_FREE (uuid_str); + return ret; +} + +static int glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -1845,6 +1918,8 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *vol_opts = NULL; gf_boolean_t nfs_disabled = _gf_false; gf_boolean_t shd_enabled = _gf_true; + gf_boolean_t origin_glusterd = _gf_false; + int tasks = 0; this = THIS; GF_ASSERT (this); @@ -1854,6 +1929,8 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); + origin_glusterd = is_origin_glusterd (); + ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) goto out; @@ -1866,11 +1943,10 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, if ((cmd & GF_CLI_STATUS_ALL)) { ret = glusterd_get_all_volnames (rsp_dict); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to get all volume " "names for status"); } - } ret = dict_set_uint32 (rsp_dict, "cmd", cmd); @@ -1886,7 +1962,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Volume with name: %s " + gf_log (this->name, GF_LOG_ERROR, "Volume with name: %s " "does not exist", volname); goto out; } @@ -1984,23 +2060,56 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Error setting brick-index-max to dict"); goto out; } ret = dict_set_int32 (rsp_dict, "other-count", other_count); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Error setting other-count to dict"); goto out; } ret = dict_set_int32 (rsp_dict, "count", node_count); - if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Error setting node count to dict"); + goto out; + } + + /* Active tasks */ + if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) || + !origin_glusterd) + goto out; + + if (glusterd_is_defrag_on (volinfo)) { + ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + if (glusterd_is_rb_ongoing (volinfo)) { + ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + + ret = dict_set_int32 (rsp_dict, "tasks", tasks); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error setting tasks count in dict"); out: - gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -2257,9 +2366,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) char *volname = NULL; uint32_t status_cmd = GF_CLI_STATUS_NONE; char *errstr = NULL; + xlator_t *this = THIS; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + req_dict = dict_new (); if (!req_dict) goto out; @@ -2268,7 +2381,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) op = glusterd_op_get_op (); ctx = (void*)glusterd_op_get_ctx (); if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Null Context for " + gf_log (this->name, GF_LOG_ERROR, "Null Context for " "op %d", op); ret = -1; goto out; @@ -2280,8 +2393,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; ctx = op_ctx; - } #undef GD_SYNC_OPCODE_KEY + } dict = ctx; switch (op) { @@ -2316,7 +2429,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "volname is not present in " "operation ctx"); goto out; @@ -2329,6 +2442,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; } + dict_destroy (req_dict); req_dict = dict_ref (dict); } break; @@ -2339,12 +2453,33 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) break; } + case GD_OP_REMOVE_BRICK: + { + dict_t *dict = ctx; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } + + ret = glusterd_dict_set_volid (dict, volname, + op_errstr); + if (ret) + goto out; + + dict_destroy (req_dict); + req_dict = dict_ref (dict); + } + break; + case GD_OP_STATUS_VOLUME: { ret = dict_get_uint32 (dict, "cmd", &status_cmd); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Status command not present " "in op ctx"); goto out; @@ -2361,7 +2496,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_ADD_BRICK: case GD_OP_REPLACE_BRICK: case GD_OP_RESET_VOLUME: - case GD_OP_REMOVE_BRICK: case GD_OP_LOG_ROTATE: case GD_OP_QUOTA: case GD_OP_PROFILE_VOLUME: @@ -2376,7 +2510,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "volname is not present in " "operation ctx"); goto out; @@ -3842,6 +3976,7 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, char buf[1024] = {0,}; char *node_str = NULL; glusterd_conf_t *priv = NULL; + glusterd_rebalance_t *rebal = NULL; priv = THIS->private; GF_ASSERT (req_dict); @@ -3857,6 +3992,8 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, if (ret) goto out; + rebal = &volinfo->rebal; + if (rsp_dict) { ret = glusterd_defrag_volume_status_update (volinfo, rsp_dict); @@ -3885,42 +4022,42 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, memset (key, 0 , 256); snprintf (key, 256, "files-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_files); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_files); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set file count"); memset (key, 0 , 256); snprintf (key, 256, "size-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_data); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_data); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set size of xfer"); memset (key, 0 , 256); snprintf (key, 256, "lookups-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->lookedup_files); + ret = dict_set_uint64 (op_ctx, key, rebal->lookedup_files); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set lookedup file count"); memset (key, 0 , 256); snprintf (key, 256, "status-%d", i); - ret = dict_set_int32 (op_ctx, key, volinfo->defrag_status); + ret = dict_set_int32 (op_ctx, key, rebal->defrag_status); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set status"); memset (key, 0 , 256); snprintf (key, 256, "failures-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_failures); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_failures); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set failure count"); memset (key, 0, 256); snprintf (key, 256, "run-time-%d", i); - ret = dict_set_double (op_ctx, key, volinfo->rebalance_time); + ret = dict_set_double (op_ctx, key, rebal->rebalance_time); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set run-time"); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index cab7bde0a9a..3fb00b6de5b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -88,12 +88,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, if (!volinfo) return 0; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; if (!defrag) return 0; if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) - volinfo->defrag = NULL; + volinfo->rebal.defrag = NULL; GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); @@ -126,12 +126,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, UNLOCK (&defrag->lock); if (!glusterd_is_service_running (pidfile, NULL)) { - if (volinfo->defrag_status == - GF_DEFRAG_STATUS_STARTED) { - volinfo->defrag_status = - GF_DEFRAG_STATUS_FAILED; + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_FAILED; } else { - volinfo->defrag_cmd = 0; + volinfo->rebal.defrag_cmd = 0; } } @@ -142,7 +142,8 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, defrag->rpc = NULL; } if (defrag->cbk_fn) - defrag->cbk_fn (volinfo, volinfo->defrag_status); + defrag->cbk_fn (volinfo, + volinfo->rebal.defrag_status); GF_FREE (defrag); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", @@ -161,7 +162,8 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk) + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) { int ret = -1; glusterd_defrag_info_t *defrag = NULL; @@ -183,22 +185,24 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); if (ret) goto out; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; + volinfo->rebal.op = op; + LOCK_INIT (&defrag->lock); - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; glusterd_volinfo_reset_defrag_stats (volinfo); - volinfo->defrag_cmd = cmd; glusterd_store_perform_node_state_store (volinfo); GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); @@ -295,13 +299,15 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, int ret = -1; glusterd_defrag_info_t *defrag = NULL; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; @@ -392,7 +398,8 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req) req->rpc_err = GARBAGE_ARGS; goto out; } - if (cli_req.dict.dict_len) { + + if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new (); @@ -400,8 +407,7 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to " + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); snprintf (msg, sizeof (msg), "Unable to decode the " "command"); @@ -447,6 +453,7 @@ out: req, dict, msg); if (dict) dict_unref (dict); + } free (cli_req.dict.dict_val);//malloced by xdr @@ -458,38 +465,71 @@ out: int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not found"); + gf_log (this->name, GF_LOG_DEBUG, "volname not found"); goto out; } + ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd not found"); + gf_log (this->name, GF_LOG_DEBUG, "cmd not found"); goto out; } ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "failed to validate"); + gf_log (this->name, GF_LOG_DEBUG, "failed to validate"); goto out; } switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd ()) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (op_ctx, GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, + &task_id_str); + if (ret) { + snprintf (msg, sizeof (msg), + "Missing rebalance-id"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = 0; + } + } ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, - "start validate failed"); + gf_log (this->name, GF_LOG_DEBUG, + "start validate failed"); goto out; } break; @@ -512,43 +552,86 @@ out: int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - gf_boolean_t volfile_update = _gf_false; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + priv = this->private; ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not given"); + gf_log (this->name, GF_LOG_DEBUG, "volname not given"); goto out; } ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "command not given"); + gf_log (this->name, GF_LOG_DEBUG, "command not given"); goto out; } + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd validate failed"); + gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed"); goto out; } + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + ret = -1; + goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REMOVE_BRICK_TID_KEY); + else + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set task-id"); + goto out; + } + } + } + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " + "id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cmd, NULL); + cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: /* Fall back to the old volume file in case of decommission*/ @@ -567,7 +650,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -575,7 +658,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 73e94781717..94f498d33bc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -180,7 +180,7 @@ glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, if (!volinfo || !brickinfo) goto out; - *brickinfo = volinfo->dst_brick; + *brickinfo = volinfo->rep_brick.dst_brick; ret = 0; @@ -211,38 +211,43 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, char *savetok = NULL; char voldir[PATH_MAX] = {0}; char pidfile[PATH_MAX] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + + priv = this->private; GF_ASSERT (priv); ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); goto out; } - gf_log ("", GF_LOG_DEBUG, "src brick=%s", src_brick); + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); ret = dict_get_str (dict, "dst-brick", &dst_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get dest brick"); goto out; } - gf_log ("", GF_LOG_DEBUG, "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); if (ret) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "dict get on replace-brick operation failed"); goto out; } @@ -274,7 +279,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, !glusterd_is_valid_volfpath (volname, dst_brick)) { snprintf (msg, sizeof (msg), "brick path %s is too " "long.", dst_brick); - gf_log ("", GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; @@ -283,10 +288,10 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = glusterd_check_gsync_running (volinfo, &is_run); if (ret && (is_run == _gf_false)) - gf_log ("", GF_LOG_WARNING, "Unable to get the status" + gf_log (this->name, GF_LOG_WARNING, "Unable to get the status" " of active "GEOREP" session"); if (is_run) { - gf_log ("", GF_LOG_WARNING, GEOREP" sessions active" + gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active" "for the volume %s ", volname); snprintf (msg, sizeof(msg), GEOREP" sessions are active " "for the volume %s.\nStop "GEOREP " sessions " @@ -301,29 +306,58 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (glusterd_is_defrag_on(volinfo)) { snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; } + ctx = glusterd_op_get_ctx(); + switch (replace_op) { case GF_REPLACE_OP_START: if (glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started for volume "); + snprintf (msg, sizeof (msg), "Replace brick is already " + "started for volume"); + gf_log (this->name, GF_LOG_ERROR, msg); + *op_errstr = gf_strdup (msg); ret = -1; goto out; } + if (is_origin_glusterd ()) { + if (!ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (ctx, GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + + } else { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing replace-brick-id"); + ret = 0; + } + } break; case GF_REPLACE_OP_PAUSE: if (glusterd_is_rb_paused (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "paused for volume "); + gf_log (this->name, GF_LOG_ERROR, "Replace brick is " + "already paused for volume "); ret = -1; goto out; } else if (!glusterd_is_rb_started(volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" " started for volume "); ret = -1; goto out; @@ -332,7 +366,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, case GF_REPLACE_OP_ABORT: if (!glusterd_is_rb_ongoing (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" " started or paused for volume "); ret = -1; goto out; @@ -341,7 +375,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, case GF_REPLACE_OP_COMMIT: if (!glusterd_is_rb_ongoing (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not " "started for volume "); ret = -1; goto out; @@ -361,7 +395,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); ret = -1; goto out; } @@ -381,10 +415,9 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - ctx = glusterd_op_get_ctx(); if (ctx) { if (!glusterd_is_fuse_available ()) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/" + gf_log (this->name, GF_LOG_ERROR, "Unable to open /dev/" "fuse (%s), replace-brick command failed", strerror (errno)); snprintf (msg, sizeof(msg), "Fuse unavailable\n " @@ -396,7 +429,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "I AM THE SOURCE HOST"); if (src_brickinfo->port && rsp_dict) { ret = dict_set_int32 (rsp_dict, "src-brick-port", @@ -428,14 +461,14 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, dup_dstbrick = gf_strdup (dst_brick); if (!dup_dstbrick) { ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory allocation failed"); + gf_log (this->name, GF_LOG_ERROR, "Memory allocation failed"); goto out; } host = strtok_r (dup_dstbrick, ":", &savetok); path = strtok_r (NULL, ":", &savetok); if (!host || !path) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "dst brick %s is not of form <HOSTNAME>:<export-dir>", dst_brick); ret = -1; @@ -451,7 +484,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (ret) { *op_errstr = gf_strdup (msg); ret = -1; - gf_log (THIS->name, GF_LOG_ERROR, *op_errstr); + gf_log (this->name, GF_LOG_ERROR, *op_errstr); goto out; } @@ -459,8 +492,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, (replace_op == GF_REPLACE_OP_START || replace_op == GF_REPLACE_OP_COMMIT_FORCE)) { - volinfo->src_brick = src_brickinfo; - volinfo->dst_brick = dst_brickinfo; + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; } if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { @@ -469,7 +502,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, *op_errstr = gf_strdup ("Incorrect source or " "destination brick"); if (*op_errstr) - gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); goto out; } @@ -511,7 +544,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, out: GF_FREE (dup_dstbrick); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -1499,6 +1532,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) char *dst_brick = NULL; glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; + char *task_id_str = NULL; this = THIS; GF_ASSERT (this); @@ -1508,26 +1542,23 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "src brick=%s", src_brick); + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); ret = dict_get_str (dict, "dst-brick", &dst_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get dst brick"); goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } @@ -1540,28 +1571,30 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get src-brickinfo"); goto out; } ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get " + gf_log (this->name, GF_LOG_ERROR, "Unable to get " "replace brick destination brickinfo"); goto out; } ret = glusterd_resolve_brick (dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to resolve dst-brickinfo"); goto out; } @@ -1570,29 +1603,62 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) if (ret) goto out; + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get op_ctx"); + ret = -1; + goto out; + } + if ((GF_REPLACE_OP_START != replace_op)) { ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, dict, replace_op); if (ret) goto out; + + /* Set task-id, if available, in op_ctx dict for operations + * other than start + */ + if (!uuid_is_null (volinfo->rep_brick.rb_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rep_brick.rb_id, ctx, + GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set replace-brick-id"); + goto out; + } + } } switch (replace_op) { case GF_REPLACE_OP_START: { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Missing replace-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rep_brick.rb_id); + } + if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); if (!glusterd_is_rb_paused (volinfo)) { - ret = rb_spawn_destination_brick (volinfo, dst_brickinfo); + ret = rb_spawn_destination_brick + (volinfo, dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to spawn destination brick"); + gf_log (this->name, GF_LOG_DEBUG, + "Failed to spawn destination " + "brick"); goto out; } } else { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started=> no need to restart dst brick "); + gf_log (this->name, GF_LOG_ERROR, + "Replace brick is already started=> no " + "need to restart dst brick "); } } @@ -1601,14 +1667,14 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = rb_src_brick_restart (volinfo, src_brickinfo, 1); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); + gf_log (this->name, GF_LOG_DEBUG, + "Could not restart src-brick"); goto out; } } if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "adding dst-brick port no"); ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, @@ -1629,7 +1695,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_COMMIT); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Commit operation failed"); goto out; } @@ -1639,16 +1705,17 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) case GF_REPLACE_OP_COMMIT_FORCE: { ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Received commit - will be adding dst brick and " "removing src brick"); if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); + ret = rb_kill_destination_brick (volinfo, + dst_brickinfo); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "Unable to cleanup dst brick"); goto out; } @@ -1656,39 +1723,41 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_nodesvcs_stop (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Unable to stop nfs server, ret: %d", ret); } ret = glusterd_op_perform_replace_brick (volinfo, src_brick, dst_brick); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to add " - "dst-brick: %s to volume: %s", - dst_brick, volinfo->volname); + gf_log (this->name, GF_LOG_CRITICAL, "Unable to add " + "dst-brick: %s to volume: %s", dst_brick, + volinfo->volname); (void) glusterd_nodesvcs_handle_graph_change (volinfo); goto out; } - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; ret = glusterd_nodesvcs_handle_graph_change (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "Failed to generate nfs volume file"); } ret = glusterd_fetchspec_notify (THIS); glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + uuid_clear (volinfo->rep_brick.rb_id); } break; case GF_REPLACE_OP_PAUSE: { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Received pause - doing nothing"); ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1696,7 +1765,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_PAUSE); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Pause operation failed"); goto out; } @@ -1715,7 +1784,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_ABORT); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Abort operation failed"); goto out; } @@ -1723,7 +1792,8 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to disable pump"); + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to disable pump"); } if (!glusterd_is_local_addr (src_brickinfo->hostname)) { @@ -1738,7 +1808,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); ret = rb_kill_destination_brick (volinfo, dst_brickinfo); if (ret) { @@ -1748,14 +1818,15 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } } glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; } break; case GF_REPLACE_OP_STATUS: { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "received status - doing nothing"); ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1763,7 +1834,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_set_str (ctx, "status-reply", "replace brick has been paused"); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to set pump status" " in ctx"); goto out; @@ -1787,7 +1858,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't store" + gf_log (this->name, GF_LOG_ERROR, "Couldn't store" " replace brick operation's state"); out: diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 413c8a39abb..bb7c0a76c4a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -891,29 +891,33 @@ glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - snprintf (buf, sizeof (buf), "%d", volinfo->rb_status); + snprintf (buf, sizeof (buf), "%d", volinfo->rep_brick.rb_status); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, buf); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { snprintf (buf, sizeof (buf), "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK, buf); if (ret) goto out; + + uuid_unparse (volinfo->rep_brick.rb_id, buf); + ret = glusterd_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, + buf); } out: @@ -960,17 +964,29 @@ glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS) { + if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) { ret = 0; goto out; } - snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd); + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf); if (ret) goto out; + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op); + ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, + buf); + if (ret) + goto out; + + if (volinfo->rebal.defrag_cmd) { + uuid_unparse (volinfo->rebal.rebalance_id, buf); + ret = glusterd_store_save_value (fd, + GF_REBALANCE_TID_KEY, + buf); + } out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -2145,22 +2161,26 @@ glusterd_store_retrieve_rbstate (char *volname) while (!ret) { if (!strncmp (key, GLUSTERD_STORE_KEY_RB_STATUS, strlen (GLUSTERD_STORE_KEY_RB_STATUS))) { - volinfo->rb_status = atoi (value); + volinfo->rep_brick.rb_status = atoi (value); } - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { if (!strncmp (key, GLUSTERD_STORE_KEY_RB_SRC_BRICK, strlen (GLUSTERD_STORE_KEY_RB_SRC_BRICK))) { ret = glusterd_brickinfo_new_from_brick (value, - &volinfo->src_brick); + &volinfo->rep_brick.src_brick); if (ret) goto out; } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_BRICK, strlen (GLUSTERD_STORE_KEY_RB_DST_BRICK))) { ret = glusterd_brickinfo_new_from_brick (value, - &volinfo->dst_brick); + &volinfo->rep_brick.dst_brick); if (ret) goto out; + } else if (!strncmp (key, GF_REPLACE_BRICK_TID_KEY, + strlen (GF_REPLACE_BRICK_TID_KEY))) { + uuid_parse (value, + volinfo->rep_brick.rb_id); } } @@ -2227,13 +2247,30 @@ glusterd_store_retrieve_node_state (char *volname) if (ret) goto out; - if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, - strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { - volinfo->defrag_cmd = atoi (value); - } + while (ret == 0) { + if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, + strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { + volinfo->rebal.defrag_cmd = atoi (value); + } - GF_FREE (key); - GF_FREE (value); + if (volinfo->rebal.defrag_cmd) { + if (!strncmp (key, GF_REBALANCE_TID_KEY, + strlen (GF_REBALANCE_TID_KEY))) + uuid_parse (value, volinfo->rebal.rebalance_id); + + if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP, + strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) + volinfo->rebal.op = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = glusterd_store_iter_get_next (iter, &key, &value, + &op_errno); + } if (op_errno != GD_STORE_EOF) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 1ab398c0b77..03ef0005918 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -52,6 +52,7 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" #define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" #define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" #define GLUSTERD_STORE_KEY_USERNAME "username" #define GLUSTERD_STORE_KEY_PASSWORD "password" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 879bb126a30..4d2d5f8ab85 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1714,16 +1714,18 @@ int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { - int32_t ret = -1; - char prefix[512] = {0,}; - char key[512] = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t i = 1; - char *volume_id_str = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - char *str = NULL; - glusterd_dict_ctx_t ctx = {0}; + int32_t ret = -1; + char prefix[512] = {0,}; + char key[512] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t i = 1; + char *volume_id_str = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + glusterd_dict_ctx_t ctx = {0}; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1794,14 +1796,16 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); - if (!volume_id_str) + if (!volume_id_str) { + ret = -1; goto out; - + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.volume_id", count); ret = dict_set_dynstr (dict, key, volume_id_str); if (ret) goto out; + volume_id_str = NULL; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.username", count); @@ -1822,24 +1826,46 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); - ret = dict_set_int32 (dict, key, volinfo->rb_status); + snprintf (key, 256, "volume%d.rebalance", count); + ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd); if (ret) goto out; + if (volinfo->rebal.defrag_cmd) { + rebalance_id_str = gf_strdup (uuid_utoa + (volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + ret = -1; + goto out; + } + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance-id", count); + ret = dict_set_dynstr (dict, key, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; + } + memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.rebalance", count); - ret = dict_set_int32 (dict, key, volinfo->defrag_cmd); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_set_uint32 (dict, key, volinfo->rebal.op); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); + ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, count); gf_asprintf (&src_brick, "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = dict_set_dynstr (dict, key, src_brick); if (ret) goto out; @@ -1848,11 +1874,24 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, count); gf_asprintf (&dst_brick, "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = dict_set_dynstr (dict, key, dst_brick); if (ret) goto out; + + rb_id_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + if (!rb_id_str) { + ret = -1; + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_set_dynstr (dict, key, rb_id_str); + if (ret) + goto out; + rb_id_str = NULL; } snprintf (prefix, sizeof (prefix), "volume%d", count); @@ -1907,6 +1946,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, out: + GF_FREE (volume_id_str); + GF_FREE (rebalance_id_str); + GF_FREE (rb_id_str); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -2473,6 +2516,8 @@ glusterd_import_volinfo (dict_t *vols, int count, char *dst_brick = NULL; char *str = NULL; int rb_status = 0; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -2580,6 +2625,8 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + uuid_parse (volume_id_str, new_volinfo->volume_id); + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.username", count); ret = dict_get_str (vols, key, &str); @@ -2609,23 +2656,46 @@ glusterd_import_volinfo (dict_t *vols, int count, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->defrag_cmd); + ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); goto out; } - uuid_parse (volume_id_str, new_volinfo->volume_id); + if (new_volinfo->rebal.defrag_cmd) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-id", count); + ret = dict_get_str (vols, key, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rebalance_id_str, + new_volinfo->rebal.rebalance_id); + } + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); ret = dict_get_int32 (vols, key, &rb_status); if (ret) goto out; - new_volinfo->rb_status = rb_status; + new_volinfo->rep_brick.rb_status = rb_status; - if (new_volinfo->rb_status > GF_RB_STATUS_NONE) { + if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, @@ -2635,7 +2705,7 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; ret = glusterd_brickinfo_new_from_brick (src_brick, - &new_volinfo->src_brick); + &new_volinfo->rep_brick.src_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " src brickinfo"); @@ -2650,12 +2720,24 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; ret = glusterd_brickinfo_new_from_brick (dst_brick, - &new_volinfo->dst_brick); + &new_volinfo->rep_brick.dst_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " dst brickinfo"); goto out; } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_get_str (vols, key, &rb_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rb_id_str, new_volinfo->rep_brick.rb_id); + } } @@ -3161,8 +3243,8 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) } else if (pending_node->type == GD_NODE_REBALANCE) { volinfo = pending_node->node; - if (volinfo->defrag) - rpc = volinfo->defrag->rpc; + if (volinfo->rebal.defrag) + rpc = volinfo->rebal.defrag->rpc; } else if (pending_node->type == GD_NODE_NFS) { nfs = pending_node->node; @@ -4691,7 +4773,7 @@ out: int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo) { - return (volinfo->defrag != NULL); + return (volinfo->rebal.defrag != NULL); } gf_boolean_t @@ -4793,8 +4875,8 @@ int glusterd_is_rb_started(glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_started:status=%d", volinfo->rb_status); - return (volinfo->rb_status == GF_RB_STATUS_STARTED); + "is_rb_started:status=%d", volinfo->rep_brick.rb_status); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_STARTED); } @@ -4802,9 +4884,9 @@ int glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_paused:status=%d", volinfo->rb_status); + "is_rb_paused:status=%d", volinfo->rep_brick.rb_status); - return (volinfo->rb_status == GF_RB_STATUS_PAUSED); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_PAUSED); } inline int @@ -4812,10 +4894,10 @@ glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status) { gf_log ("", GF_LOG_DEBUG, "setting status from %d to %d", - volinfo->rb_status, + volinfo->rep_brick.rb_status, status); - volinfo->rb_status = status; + volinfo->rep_brick.rb_status = status; return 0; } @@ -4823,19 +4905,27 @@ inline int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst) { - if (!volinfo->src_brick || !volinfo->dst_brick) + glusterd_replace_brick_t *rb = NULL; + + GF_ASSERT (volinfo); + + rb = &volinfo->rep_brick; + + if (!rb->src_brick || !rb->dst_brick) return -1; - if (strcmp (volinfo->src_brick->hostname, src->hostname) || - strcmp (volinfo->src_brick->path, src->path)) { + if (strcmp (rb->src_brick->hostname, src->hostname) || + strcmp (rb->src_brick->path, src->path)) { gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ"); return -1; } - if (strcmp (volinfo->dst_brick->hostname, dst->hostname) || - strcmp (volinfo->dst_brick->path, dst->path)) { + + if (strcmp (rb->dst_brick->hostname, dst->hostname) || + strcmp (rb->dst_brick->path, dst->path)) { gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ"); return -1; } + return 0; } @@ -5831,7 +5921,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, if (!glusterd_is_service_running (pidfile, &pid)) { glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd, - cbk); + cbk, volinfo->rebal.op); } else { glusterd_rebalance_rpc_create (volinfo, priv, cmd); } @@ -5847,10 +5937,10 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) char op_errstr[256]; list_for_each_entry (volinfo, &conf->volumes, vol_list) { - if (!volinfo->defrag_cmd) + if (!volinfo->rebal.defrag_cmd) continue; glusterd_volume_defrag_restart (volinfo, op_errstr, 256, - volinfo->defrag_cmd, NULL); + volinfo->rebal.defrag_cmd, NULL); } return ret; } @@ -5859,13 +5949,15 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) void glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) { + glusterd_rebalance_t *rebal = NULL; GF_ASSERT (volinfo); - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; - volinfo->rebalance_failures = 0; - volinfo->rebalance_time = 0; + rebal = &volinfo->rebal; + rebal->rebalance_files = 0; + rebal->rebalance_data = 0; + rebal->lookedup_files = 0; + rebal->rebalance_failures = 0; + rebal->rebalance_time = 0; } @@ -5990,17 +6082,17 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, "failed to get run-time"); if (files) - volinfo->rebalance_files = files; + volinfo->rebal.rebalance_files = files; if (size) - volinfo->rebalance_data = size; + volinfo->rebal.rebalance_data = size; if (lookup) - volinfo->lookedup_files = lookup; + volinfo->rebal.lookedup_files = lookup; if (status) - volinfo->defrag_status = status; + volinfo->rebal.defrag_status = status; if (failures) - volinfo->rebalance_failures = failures; + volinfo->rebal.rebalance_failures = failures; if (run_time) - volinfo->rebalance_time = run_time; + volinfo->rebal.rebalance_time = run_time; return ret; } @@ -6715,7 +6807,8 @@ out: * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () { +is_origin_glusterd () +{ int ret = 0; uuid_t lock_owner = {0,}; @@ -6725,3 +6818,63 @@ is_origin_glusterd () { return (uuid_compare (MY_UUID, lock_owner) == 0); } + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key) +{ + int ret = -1; + uuid_t task_id = {0,}; + char *uuid_str = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + + this = THIS; + GF_ASSERT (this); + + uuid_generate (task_id); + uuid_str = gf_strdup (uuid_utoa (task_id)); + if (!uuid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s in dict", + key); + goto out; + } + gf_log (this->name, GF_LOG_INFO, "Generated task-id %s for key %s", + uuid_str, key); + +out: + if (ret) + GF_FREE (uuid_str); + return ret; +} + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key) +{ + int ret = -1; + char tmp_str[40] = {0,}; + char *task_id_str = NULL; + + GF_ASSERT (dict); + GF_ASSERT (key); + + uuid_unparse (uuid, tmp_str); + task_id_str = gf_strdup (tmp_str); + if (!task_id_str) + return -1; + + ret = dict_set_dynstr (dict, key, task_id_str); + if (ret) { + GF_FREE (task_id_str); + gf_log (THIS->name, GF_LOG_ERROR, + "Error setting uuid in dict with key %s", key); + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index c90c9d9182e..e5e6123cb71 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -486,4 +486,10 @@ gf_boolean_t glusterd_is_any_volume_in_server_quorum (xlator_t *this); gf_boolean_t does_gd_meet_server_quorum (xlator_t *this); + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key); + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 6ef50342e6c..8f3ae9ae4df 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1007,7 +1007,7 @@ glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } - if (volinfo->rb_status != GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status != GF_RB_STATUS_NONE) { snprintf (msg, sizeof(msg), "replace-brick session is " "in progress for the volume '%s'", volname); gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); @@ -1601,7 +1601,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; list_add_tail (&volinfo->vol_list, &priv->volumes); vol_added = _gf_true; out: diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 204a6adfce5..4ef2ab696a9 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -68,6 +68,33 @@ struct rpcsvc_program *all_programs[] = { }; int rpcsvc_programs_count = (sizeof (all_programs) / sizeof (all_programs[0])); +const char *gd_op_list[GD_OP_MAX + 1] = { + [GD_OP_NONE] = "Invalid op", + [GD_OP_CREATE_VOLUME] = "Create", + [GD_OP_DELETE_VOLUME] = "Delete", + [GD_OP_START_VOLUME] = "Start", + [GD_OP_STOP_VOLUME] = "Stop", + [GD_OP_DEFRAG_VOLUME] = "Rebalance", + [GD_OP_ADD_BRICK] = "Add brick", + [GD_OP_REMOVE_BRICK] = "Remove brick", + [GD_OP_REPLACE_BRICK] = "Replace brick", + [GD_OP_SET_VOLUME] = "Set", + [GD_OP_RESET_VOLUME] = "Reset", + [GD_OP_SYNC_VOLUME] = "Sync", + [GD_OP_LOG_ROTATE] = "Log rotate", + [GD_OP_GSYNC_SET] = "Geo-replication", + [GD_OP_PROFILE_VOLUME] = "Profile", + [GD_OP_QUOTA] = "Quota", + [GD_OP_STATUS_VOLUME] = "Status", + [GD_OP_REBALANCE] = "Rebalance", + [GD_OP_HEAL_VOLUME] = "Heal", + [GD_OP_STATEDUMP_VOLUME] = "Statedump", + [GD_OP_LIST_VOLUME] = "Lists", + [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks", + [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance", + [GD_OP_MAX] = "Invalid op" +}; + static int glusterd_opinfo_init () { diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index b80964346b8..8368d680102 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -82,6 +82,7 @@ typedef enum glusterd_op_ { GD_OP_MAX, } glusterd_op_t; +extern const char * gd_op_list[]; struct glusterd_store_iter_ { int fd; FILE *file; @@ -235,6 +236,29 @@ typedef enum glusterd_vol_backend_ { GD_VOL_BK_BD = 1, } glusterd_vol_backend_t; +struct glusterd_rebalance_ { + gf_defrag_status_t defrag_status; + uint64_t rebalance_files; + uint64_t rebalance_data; + uint64_t lookedup_files; + glusterd_defrag_info_t *defrag; + gf_cli_defrag_type defrag_cmd; + uint64_t rebalance_failures; + uuid_t rebalance_id; + double rebalance_time; + glusterd_op_t op; +}; + +typedef struct glusterd_rebalance_ glusterd_rebalance_t; + +struct glusterd_replace_brick_ { + gf_rb_status_t rb_status; + glusterd_brickinfo_t *src_brick; + glusterd_brickinfo_t *dst_brick; + uuid_t rb_id; +}; + +typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; struct glusterd_volinfo_ { char volname[GLUSTERD_MAX_VOLUME_NAME]; @@ -254,19 +278,10 @@ struct glusterd_volinfo_ { glusterd_store_handle_t *node_state_shandle; /* Defrag/rebalance related */ - gf_defrag_status_t defrag_status; - uint64_t rebalance_files; - uint64_t rebalance_data; - uint64_t lookedup_files; - glusterd_defrag_info_t *defrag; - gf_cli_defrag_type defrag_cmd; - uint64_t rebalance_failures; - double rebalance_time; + glusterd_rebalance_t rebal; /* Replace brick status */ - gf_rb_status_t rb_status; - glusterd_brickinfo_t *src_brick; - glusterd_brickinfo_t *dst_brick; + glusterd_replace_brick_t rep_brick; int version; uint32_t cksum; @@ -643,7 +658,8 @@ int glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req); int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req); int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk); + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op); int glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, glusterd_conf_t *priv, int cmd); |