diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 709 |
1 files changed, 530 insertions, 179 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index a14828e98..596503c21 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -335,7 +335,7 @@ out: /* Handler functions */ int -glusterd_handle_add_brick (rpcsvc_request_t *req) +__glusterd_handle_add_brick (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -358,15 +358,16 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) GF_ASSERT (req); - if (!xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf_cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; snprintf (err_str, sizeof (err_str), "Garbage args received"); goto out; } - gf_log ("glusterd", GF_LOG_INFO, "Received add brick req"); + gf_log (this->name, GF_LOG_INFO, "Received add brick req"); if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ @@ -376,11 +377,11 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); snprintf (err_str, sizeof (err_str), "Unable to decode " - "the buffer"); + "the command"); goto out; } } @@ -388,45 +389,50 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); snprintf (err_str, sizeof (err_str), "Unable to get volume " "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } if (!(ret = glusterd_check_volume_exists (volname))) { ret = -1; - snprintf(err_str, 2048, "Volume %s does not exist", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); + snprintf (err_str, sizeof (err_str), "Volume %s does not exist", + volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } ret = dict_get_int32 (dict, "count", &brick_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); snprintf (err_str, sizeof (err_str), "Unable to get volume " "brick count"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, "replica-count is %d", + gf_log (this->name, GF_LOG_INFO, "replica-count is %d", replica_count); } ret = dict_get_int32 (dict, "stripe-count", &stripe_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, "stripe-count is %d", + gf_log (this->name, GF_LOG_INFO, "stripe-count is %d", stripe_count); } + if (!dict_get (dict, "force")) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get flag"); + goto out; + } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { snprintf (err_str, sizeof (err_str), "Unable to get volinfo " "for volume name %s", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } @@ -442,10 +448,10 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) goto brick_val; if ((brick_count % volinfo->dist_leaf_count) != 0) { - snprintf(err_str, 2048, "Incorrect number of bricks" - " supplied %d with count %d", + snprintf (err_str, sizeof (err_str), "Incorrect number " + "of bricks supplied %d with count %d", brick_count, volinfo->dist_leaf_count); - gf_log("glusterd", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); ret = -1; goto out; } @@ -461,7 +467,7 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) err_str, sizeof (err_str)); if (ret == -1) { - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } @@ -471,7 +477,7 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) ret = dict_set_int32 (dict, "stripe-count", stripe_count); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to set the stripe-count in dict"); goto out; } @@ -483,7 +489,7 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) &type, err_str, sizeof (err_str)); if (ret == -1) { - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } @@ -493,7 +499,7 @@ glusterd_handle_add_brick (rpcsvc_request_t *req) ret = dict_set_int32 (dict, "replica-count", replica_count); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to set the replica-count in dict"); goto out; } @@ -503,14 +509,14 @@ brick_val: if (ret) { snprintf (err_str, sizeof (err_str), "Unable to get volume " "bricks"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } if (type != volinfo->type) { ret = dict_set_int32 (dict, "type", type); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to set the new type in dict"); } @@ -526,8 +532,6 @@ out: cli_rsp = &rsp; glusterd_to_cli (req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict); - if (dict) - dict_unref (dict); ret = 0; //sent error to cli, prevent second reply } @@ -536,9 +540,80 @@ out: return ret; } +int +glusterd_handle_add_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +static int +subvol_matcher_init (int **subvols, int count) +{ + int ret = -1; + + *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int); + if (*subvols) + ret = 0; + + return ret; +} + +static void +subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + glusterd_brickinfo_t *tmp = NULL; + int32_t sub_volume = 0; + int pos = 0; + + list_for_each_entry (tmp, &volinfo->bricks, brick_list) { + + if (strcmp (tmp->hostname, brickinfo->hostname) || + strcmp (tmp->path, brickinfo->path)) { + pos++; + continue; + } + gf_log (THIS->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK, + brickinfo->hostname, brickinfo->path, + volinfo->volname); + sub_volume = (pos / volinfo->dist_leaf_count); + subvols[sub_volume]++; + break; + } + +} + +static int +subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str, + size_t err_len, char *vol_type) +{ + int i = 0; + int ret = 0; + + do { + + if (subvols[i] % volinfo->dist_leaf_count == 0) { + continue; + } else { + ret = -1; + snprintf (err_str, err_len, + "Bricks not from same subvol for %s", vol_type); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + break; + } + } while (++i < volinfo->subvol_count); + + return ret; +} + +static void +subvol_matcher_destroy (int *subvols) +{ + GF_FREE (subvols); +} int -glusterd_handle_remove_brick (rpcsvc_request_t *req) +__glusterd_handle_remove_brick (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -550,10 +625,7 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) int i = 1; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; - int32_t pos = 0; - int32_t sub_volume = 0; - int32_t sub_volume_start = 0; - int32_t sub_volume_end = 0; + int *subvols = NULL; glusterd_brickinfo_t *tmp = NULL; char err_str[2048] = {0}; gf_cli_rsp rsp = {0,}; @@ -565,18 +637,23 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) int found = 0; int diff_count = 0; char *volname = 0; + xlator_t *this = NULL; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); - if (!xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf_cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; + snprintf (err_str, sizeof (err_str), "Received garbage args"); goto out; } - gf_log ("glusterd", GF_LOG_INFO, "Received rem brick req"); + gf_log (this->name, GF_LOG_INFO, "Received rem brick req"); if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ @@ -586,36 +663,42 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); goto out; } } ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Unable to get volname"); + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } ret = dict_get_int32 (dict, "count", &count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); + snprintf (err_str, sizeof (err_str), "Unable to get brick " + "count"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - snprintf (err_str, 2048, "Volume %s does not exist", - volname); - gf_log ("", GF_LOG_ERROR, "%s", err_str); + snprintf (err_str, sizeof (err_str),"Volume %s does not exist", + volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "request to change replica-count to %d", replica_count); ret = gd_rmbr_validate_replica_count (volinfo, replica_count, count, err_str, @@ -632,7 +715,7 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) ret = dict_set_int32 (dict, "replica-count", replica_count); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to set the replica_count " "in dict"); goto out; @@ -651,12 +734,12 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) strcpy (vol_type, "distribute"); } - /* Do not allow remove-brick if the volume is plain stripe */ + /* Do not allow remove-brick if the volume is a stripe volume*/ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->stripe_count)) { - snprintf (err_str, 2048, - "Removing brick from a plain stripe is not allowed"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); + snprintf (err_str, sizeof (err_str), + "Removing brick from a stripe volume is not allowed"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); ret = -1; goto out; } @@ -664,11 +747,11 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) if (!replica_count && (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) && (volinfo->brick_count == volinfo->dist_leaf_count)) { - snprintf (err_str, 2048, + snprintf (err_str, sizeof(err_str), "Removing bricks from stripe-replicate" " configuration is not allowed without reducing " "replica or stripe count explicitly."); - gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); ret = -1; goto out; } @@ -676,11 +759,11 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) if (!replica_count && (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && (volinfo->brick_count == volinfo->dist_leaf_count)) { - snprintf (err_str, 2048, + snprintf (err_str, sizeof (err_str), "Removing bricks from replicate configuration " "is not allowed without reducing replica count " "explicitly."); - gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); ret = -1; goto out; } @@ -690,10 +773,10 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) { - snprintf (err_str, 2048, "Remove brick incorrect" - " brick count of %d for %s %d", + snprintf (err_str, sizeof (err_str), "Remove brick " + "incorrect brick count of %d for %s %d", count, vol_type, volinfo->dist_leaf_count); - gf_log ("", GF_LOG_ERROR, "%s", err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); ret = -1; goto out; } @@ -707,22 +790,32 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) } strcpy (brick_list, " "); + + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_init (&subvols, volinfo->subvol_count); + if (ret) + goto out; + } + while ( i <= count) { - snprintf (key, 256, "brick%d", i); + snprintf (key, sizeof (key), "brick%d", i); ret = dict_get_str (dict, key, &brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); + snprintf (err_str, sizeof (err_str), "Unable to get %s", + key); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } - gf_log ("", GF_LOG_DEBUG, "Remove brick count %d brick: %s", - i, brick); + gf_log (this->name, GF_LOG_DEBUG, "Remove brick count %d brick:" + " %s", i, brick); ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo); if (ret) { - snprintf(err_str, 2048,"Incorrect brick %s for volume" - " %s", brick, volname); - gf_log ("", GF_LOG_ERROR, "%s", err_str); + snprintf (err_str, sizeof (err_str), "Incorrect brick " + "%s for volume %s", brick, volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); goto out; } strcat(brick_list, brick); @@ -743,7 +836,7 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) found = 0; list_for_each_entry (tmp, &volinfo->bricks, brick_list) { tmp_brick_idx++; - gf_log (THIS->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_TRACE, "validate brick %s:%s (%d %d %d)", tmp->hostname, tmp->path, tmp_brick_idx, brick_index, volinfo->replica_count); @@ -761,8 +854,9 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) if (found) continue; - snprintf(err_str, 2048,"Bricks are from same subvol"); - gf_log (THIS->name, GF_LOG_INFO, + snprintf (err_str, sizeof (err_str), "Bricks are from " + "same subvol"); + gf_log (this->name, GF_LOG_INFO, "failed to validate brick %s:%s (%d %d %d)", tmp->hostname, tmp->path, tmp_brick_idx, brick_index, volinfo->replica_count); @@ -771,36 +865,18 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } - pos = 0; - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - - if (strcmp (tmp->hostname,brickinfo->hostname) || - strcmp (tmp->path, brickinfo->path)) { - pos++; - continue; - } + /* Find which subvolume the brick belongs to */ + subvol_matcher_update (subvols, volinfo, brickinfo); + } - gf_log ("", GF_LOG_INFO, "Found brick"); - if (!sub_volume && (volinfo->dist_leaf_count > 1)) { - sub_volume = (pos / volinfo->dist_leaf_count) + 1; - sub_volume_start = (volinfo->dist_leaf_count * - (sub_volume - 1)); - sub_volume_end = (volinfo->dist_leaf_count * - sub_volume) - 1; - } else { - if (pos < sub_volume_start || - pos >sub_volume_end) { - ret = -1; - snprintf(err_str, 2048,"Bricks not from" - " same subvol for %s", - vol_type); - gf_log ("", GF_LOG_ERROR, - "%s", err_str); - goto out; - } - } - break; - } + /* Check if the bricks belong to the same subvolumes.*/ + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_verify (subvols, volinfo, + err_str, sizeof(err_str), + vol_type); + if (ret) + goto out; } ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict); @@ -810,24 +886,125 @@ out: rsp.op_ret = -1; rsp.op_errno = 0; if (err_str[0] == '\0') - snprintf (err_str, sizeof (err_str), "Operation failed"); - gf_log ("", GF_LOG_ERROR, "%s", err_str); + snprintf (err_str, sizeof (err_str), + "Operation failed"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); rsp.op_errstr = err_str; cli_rsp = &rsp; glusterd_to_cli (req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict); - if (dict) - dict_unref (dict); ret = 0; //sent error to cli, prevent second reply } + GF_FREE (brick_list); + subvol_matcher_destroy (subvols); free (cli_req.dict.dict_val); //its malloced by xdr return ret; } +int +glusterd_handle_remove_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_remove_brick); +} + +static int +_glusterd_restart_gsync_session (dict_t *this, char *key, + data_t *value, void *data) +{ + char *slave = NULL; + char *slave_buf = NULL; + char *path_list = NULL; + char *slave_vol = NULL; + char *slave_ip = NULL; + char *conf_path = NULL; + char **errmsg = NULL; + int ret = -1; + glusterd_gsync_status_temp_t *param = NULL; + gf_boolean_t is_running = _gf_false; + + param = (glusterd_gsync_status_temp_t *)data; + + GF_ASSERT (param); + GF_ASSERT (param->volinfo); + + slave = strchr(value->data, ':'); + if (slave) { + slave++; + slave_buf = gf_strdup (slave); + if (!slave_buf) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + } + else + return 0; + + ret = dict_set_dynstr (param->rsp_dict, "slave", slave_buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave"); + if (slave_buf) + GF_FREE(slave_buf); + goto out; + } + + ret = glusterd_get_slave_details_confpath (param->volinfo, + param->rsp_dict, + &slave_ip, &slave_vol, + &conf_path, errmsg); + if (ret) { + if (*errmsg) + gf_log ("", GF_LOG_ERROR, "%s", *errmsg); + else + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + goto out; + } + + /* In cases that gsyncd is not running, we will not invoke it + * because of add-brick. */ + ret = glusterd_check_gsync_running_local (param->volinfo->volname, + slave, conf_path, + &is_running); + if (ret) { + gf_log ("", GF_LOG_ERROR, "gsync running validation failed."); + goto out; + } + if (_gf_false == is_running) { + gf_log ("", GF_LOG_DEBUG, "gsync session for %s and %s is" + " not running on this node. Hence not restarting.", + param->volinfo->volname, slave); + ret = 0; + goto out; + } + + ret = glusterd_get_local_brickpaths (param->volinfo, &path_list); + if (!path_list) { + gf_log ("", GF_LOG_DEBUG, "This node not being part of" + " volume should not be running gsyncd. Hence" + " no gsyncd process to restart."); + ret = 0; + goto out; + } + + ret = glusterd_check_restart_gsync_session (param->volinfo, slave, + param->rsp_dict, path_list, + conf_path, 0); + if (ret) + gf_log ("", GF_LOG_ERROR, + "Unable to restart gsync session."); + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); + return ret; +} /* op-sm */ @@ -835,17 +1012,21 @@ int glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, char *bricks, dict_t *dict) { - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - int32_t i = 1; - char *brick_list = NULL; - char *free_ptr1 = NULL; - char *free_ptr2 = NULL; - char *saveptr = NULL; - int32_t ret = -1; - int32_t stripe_count = 0; - int32_t replica_count = 0; - int32_t type = 0; + char *brick = NULL; + int32_t i = 1; + char *brick_list = NULL; + char *free_ptr1 = NULL; + char *free_ptr2 = NULL; + char *saveptr = NULL; + int32_t ret = -1; + int32_t stripe_count = 0; + int32_t replica_count = 0; + int32_t type = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_gsync_status_temp_t param = {0, }; + gf_boolean_t restart_needed = 0; + char msg[1024] __attribute__((unused)) = {0, }; + int caps = 0; GF_ASSERT (volinfo); @@ -903,13 +1084,15 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, if (stripe_count) { volinfo->stripe_count = stripe_count; } - volinfo->dist_leaf_count = (volinfo->stripe_count * - volinfo->replica_count); + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); /* backward compatibility */ volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0: volinfo->dist_leaf_count); + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) goto out; @@ -924,13 +1107,40 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; +#endif while (i <= count) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, &brickinfo); if (ret) goto out; +#ifdef HAVE_BD_XLATOR + /* Check for VG/thin pool if its BD volume */ + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, "%s", msg); + goto out; + } + /* if anyone of the brick does not have thin support, + disable it for entire volume */ + caps &= brickinfo->caps; + } else + caps = 0; +#endif + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, + brickinfo->hostname, brickinfo->path); + goto out; + } + } ret = glusterd_brick_start (volinfo, brickinfo, _gf_true); @@ -938,8 +1148,27 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, goto out; i++; brick = strtok_r (NULL, " \n", &saveptr); + + /* Check if the brick is added in this node, and set + * the restart_needed flag. */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + !restart_needed) { + restart_needed = 1; + gf_log ("", GF_LOG_DEBUG, + "Restart gsyncd session, if it's already " + "running."); + } } + /* If the restart_needed flag is set, restart gsyncd sessions for that + * particular master with all the slaves. */ + if (restart_needed) { + param.rsp_dict = dict; + param.volinfo = volinfo; + dict_foreach (volinfo->gsync_slaves, + _glusterd_restart_gsync_session, ¶m); + } + volinfo->caps = caps; out: GF_FREE (free_ptr1); GF_FREE (free_ptr2); @@ -1016,6 +1245,7 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) gf_boolean_t brick_alloc = _gf_false; char *all_bricks = NULL; char *str_ret = NULL; + gf_boolean_t is_force = _gf_false; priv = THIS->private; if (!priv) @@ -1035,15 +1265,6 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) goto out; } - if (volinfo->backend == GD_VOL_BK_BD) { - snprintf (msg, sizeof (msg), "Add brick is not supported for " - "Block backend volume %s.", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - ret = glusterd_validate_volume_id (dict, volinfo); if (ret) goto out; @@ -1078,6 +1299,8 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) goto out; } + is_force = dict_get_str_boolean (dict, "force", _gf_false); + if (bricks) { brick_list = gf_strdup (bricks); all_bricks = gf_strdup (bricks); @@ -1119,10 +1342,21 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) } if (!uuid_compare (brickinfo->uuid, MY_UUID)) { - ret = glusterd_brick_create_path (brickinfo->hostname, - brickinfo->path, +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 1, msg); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "%s", + msg); + *op_errstr = gf_strdup (msg); + goto out; + } + } +#endif + + ret = glusterd_validate_and_create_brickpath (brickinfo, volinfo->volume_id, - op_errstr); + op_errstr, is_force); if (ret) goto out; } @@ -1157,17 +1391,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) char msg[2048] = {0,}; int32_t flag = 0; gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname); goto out; } @@ -1179,7 +1418,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " "operation is committed or aborted", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; @@ -1187,7 +1426,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } cmd = flag; @@ -1204,21 +1443,60 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_START: { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + dict_get (dict, "replica-count")) { + snprintf (msg, sizeof(msg), "Migration of data is not " + "needed when reducing replica count. Use the" + " 'force' option"); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + if (GLUSTERD_STATUS_STARTED != volinfo->status) { - snprintf (msg, sizeof (msg), "Volume %s needs to be started " - "before remove-brick (you can use 'force' or " - "'commit' to override this behavior)", + snprintf (msg, sizeof (msg), "Volume %s needs to be " + "started before remove-brick (you can use " + "'force' or 'commit' to override this " + "behavior)", volinfo->volname); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + if (!gd_is_remove_brick_committed (volinfo)) { + snprintf (msg, sizeof (msg), "An earlier remove-brick " + "task exists for volume %s. Either commit it" + " or stop it before starting a new task.", volinfo->volname); errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr); + gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick" + " task exists for volume %s.", + volinfo->volname); goto out; } if (glusterd_is_defrag_on(volinfo)) { - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); + errstr = gf_strdup("Rebalance is in progress. Please " + "retry after completion"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + + if (is_origin_glusterd (dict)) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing remove-brick-id"); + ret = 0; + } + } break; } @@ -1240,7 +1518,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "count", &brick_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } @@ -1253,7 +1531,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); if (ret && errstr) { if (op_errstr) *op_errstr = errstr; @@ -1389,15 +1667,6 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) goto out; } - /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { - case GF_DEFRAG_STATUS_FAILED: - case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; - default: - break; - } - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) goto out; @@ -1412,42 +1681,74 @@ out: int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - int32_t flag = 0; - char err_str[4096] = {0,}; - int need_rebalance = 0; - int force = 0; - gf1_op_commands cmd = 0; - int32_t replica_count = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + xlator_t *this = NULL; + dict_t *bricks_dict = NULL; + char *brick_tmpstr = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get command"); goto out; } cmd = flag; + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id, rebal.op and stored bricks on commmitting/stopping + * remove-brick */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) { + uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + dict_unref (volinfo->rebal.dict); + volinfo->rebal.dict = NULL; + } + ret = -1; switch (cmd) { case GF_OP_CMD_NONE: @@ -1468,7 +1769,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -1476,7 +1777,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } @@ -1486,6 +1787,15 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } case GF_OP_CMD_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Missing remove-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REMOVE_BRICK; + } force = 0; break; @@ -1496,13 +1806,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); + if (volinfo->rebal.defrag) { + LOCK (&volinfo->rebal.defrag->lock); /* Fake 'rebalance-complete' so the graph change happens right away */ - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; - UNLOCK (&volinfo->defrag->lock); + UNLOCK (&volinfo->rebal.defrag->lock); } /* Graph change happens in rebalance _cbk function, no need to do anything here */ @@ -1520,15 +1831,44 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) goto out; } - + /* Save the list of bricks for later usage. Right now this is required + * for displaying the task parameters with task status in volume status. + */ + bricks_dict = dict_new (); + if (!bricks_dict) { + ret = -1; + goto out; + } + ret = dict_set_int32 (bricks_dict, "count", count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to save remove-brick count"); + goto out; + } while ( i <= count) { snprintf (key, 256, "brick%d", i); ret = dict_get_str (dict, key, &brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); + gf_log (this->name, GF_LOG_ERROR, "Unable to get %s", + key); goto out; } + brick_tmpstr = gf_strdup (brick); + if (!brick_tmpstr) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to duplicate brick name"); + goto out; + } + ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick to dict"); + goto out; + } + brick_tmpstr = NULL; + ret = glusterd_op_perform_remove_brick (volinfo, brick, force, &need_rebalance); if (ret) @@ -1537,13 +1877,16 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "changing replica count %d to %d on volume %s", volinfo->replica_count, replica_count, volinfo->volname); volinfo->replica_count = replica_count; - volinfo->dist_leaf_count = (volinfo->stripe_count * - replica_count); + volinfo->sub_count = replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + if (replica_count == 1) { if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { volinfo->type = GF_CLUSTER_TYPE_NONE; @@ -1556,37 +1899,41 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } } } + volinfo->rebal.dict = bricks_dict; + bricks_dict = NULL; ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles"); + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo"); + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } if (!force && need_rebalance) { /* perform the rebalance operations */ - ret = glusterd_handle_defrag_start (volinfo, err_str, 4096, - GF_DEFRAG_CMD_START_FORCE, - glusterd_remove_brick_migrate_cbk); + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_FORCE, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + if (!ret) volinfo->decommission_in_progress = 1; if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to start the rebalance"); } } else { @@ -1598,5 +1945,9 @@ out: if (ret && err_str[0] && op_errstr) *op_errstr = gf_strdup (err_str); + GF_FREE (brick_tmpstr); + if (bricks_dict) + dict_unref (bricks_dict); + return ret; } |
