diff options
author | Kaushal M <kaushal@redhat.com> | 2012-09-14 11:45:34 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-12-04 14:44:36 -0800 |
commit | 7b5a21707edbbee1940f7cd3d05043bec998e51a (patch) | |
tree | 0e5fdbc08d6461ec30d8f5cf829d61e187a58c27 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c | |
parent | 6a1b89c0c4221b13c21e66a048e08e4eb95de7bd (diff) |
glusterd, cli: Task id's for async tasks
This patch introduces task-id's for async tasks like rebalance, remove-brick and
replace-brick. An id is generated for each task when it is started and displayed
to the user in cli output. The status of running tasks is also included in the
output of "volume status" along with its id, so that a user can easily track the
progress of an async task.
Also,
* added tests for this feature into the regression test suite.
* added a python script for creating files, 'create-files.py', courtesy
Vijaykumar Koppad (vkoppad@redhat.com) into the test suite.
Change-Id: Ib0c0d12e0d6c8f72ace48d303d7ff3102157e876
BUG: 857330
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/3942
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 161 |
1 files changed, 111 insertions, 50 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index a14828e980b..6ab859a10c6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -822,6 +822,7 @@ out: ret = 0; //sent error to cli, prevent second reply } + GF_FREE (brick_list); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1157,17 +1158,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) char msg[2048] = {0,}; int32_t flag = 0; gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname); goto out; } @@ -1179,7 +1185,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " "operation is committed or aborted", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; @@ -1187,7 +1193,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } cmd = flag; @@ -1205,20 +1211,38 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_START: { if (GLUSTERD_STATUS_STARTED != volinfo->status) { - snprintf (msg, sizeof (msg), "Volume %s needs to be started " - "before remove-brick (you can use 'force' or " - "'commit' to override this behavior)", - volinfo->volname); + snprintf (msg, sizeof (msg), "Volume %s needs to be " + "started before remove-brick (you can use " + "'force' or 'commit' to override this " + "behavior)", volinfo->volname); errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } if (glusterd_is_defrag_on(volinfo)) { - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); + errstr = gf_strdup("Rebalance is in progress. Please " + "retry after completion"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + + if (is_origin_glusterd ()) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing remove-brick-id"); + ret = 0; + } + } break; } @@ -1240,7 +1264,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "count", &brick_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } @@ -1253,7 +1277,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); if (ret && errstr) { if (op_errstr) *op_errstr = errstr; @@ -1390,10 +1414,10 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } @@ -1412,42 +1436,67 @@ out: int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - int32_t flag = 0; - char err_str[4096] = {0,}; - int need_rebalance = 0; - int force = 0; - gf1_op_commands cmd = 0; - int32_t replica_count = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get command"); goto out; } cmd = flag; + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id on completion/stopping of remove-brick operation */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) + uuid_clear (volinfo->rebal.rebalance_id); + ret = -1; switch (cmd) { case GF_OP_CMD_NONE: @@ -1468,7 +1517,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -1476,7 +1525,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } @@ -1486,6 +1535,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } case GF_OP_CMD_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Missing remove-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } force = 0; break; @@ -1496,13 +1553,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); + if (volinfo->rebal.defrag) { + LOCK (&volinfo->rebal.defrag->lock); /* Fake 'rebalance-complete' so the graph change happens right away */ - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; - UNLOCK (&volinfo->defrag->lock); + UNLOCK (&volinfo->rebal.defrag->lock); } /* Graph change happens in rebalance _cbk function, no need to do anything here */ @@ -1525,7 +1583,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) snprintf (key, 256, "brick%d", i); ret = dict_get_str (dict, key, &brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); + gf_log (this->name, GF_LOG_ERROR, "Unable to get %s", + key); goto out; } @@ -1537,7 +1596,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "changing replica count %d to %d on volume %s", volinfo->replica_count, replica_count, volinfo->volname); @@ -1559,34 +1618,36 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles"); + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo"); + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } if (!force && need_rebalance) { /* perform the rebalance operations */ - ret = glusterd_handle_defrag_start (volinfo, err_str, 4096, - GF_DEFRAG_CMD_START_FORCE, - glusterd_remove_brick_migrate_cbk); + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_FORCE, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + if (!ret) volinfo->decommission_in_progress = 1; if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to start the rebalance"); } } else { |