diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 477 |
1 files changed, 469 insertions, 8 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 82239dd5270..ef13665be50 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -10286,6 +10286,100 @@ out: return ret; } +int +glusterd_merge_brick_status (dict_t *dst, dict_t *src) +{ + int64_t volume_count = 0; + int64_t index = 0; + int64_t j = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + char key[PATH_MAX] = {0, }; + char snapbrckcnt[PATH_MAX] = {0, }; + char snapbrckord[PATH_MAX] = {0, }; + int ret = -1; + int32_t brick_online = 0; + xlator_t *this = NULL; + int32_t snap_command = 0; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + if (snap_command == GF_SNAP_OPTION_TYPE_DELETE) { + gf_log (this->name, GF_LOG_DEBUG, "snapshot delete command." + " Need not merge the status of the bricks"); + ret = 0; + goto out; + } + + ret = dict_get_int64 (src, "volcount", &volume_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + for (index = 0; index < volume_count; index++) { + ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1, + "snap-vol%ld_brickcount", index+1); + ret = dict_get_int64 (src, snapbrckcnt, &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "No bricks for this volume in this dict"); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf (snapbrckord, sizeof(snapbrckord) - 1, + "snap-vol%ld.brick%ld.order", index+1, j); + + ret = dict_get_int64 (src, snapbrckord, &brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick order"); + goto out; + } + + snprintf (key, sizeof (key) - 1, + "snap-vol%ld.brick%ld.status", index+1, + brick_order); + ret = dict_get_int32 (src, key, &brick_online); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the brick status"); + goto out; + } + + ret = dict_set_int32 (dst, key, brick_online); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "set the brick status"); + goto out; + } + brick_online = 0; + } + } + + ret = 0; + +out: + return ret; +} + /* Aggregate missed_snap_counts from different nodes and save it * * in the req_dict of the originator node */ int @@ -10309,6 +10403,13 @@ glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) goto out; } + ret = glusterd_merge_brick_status (dst, src); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to merge brick " + "status"); + goto out; + } + ret = dict_get_int32 (src, "missed_snap_count", &src_missed_snap_count); if (ret) { @@ -10325,7 +10426,7 @@ glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) } for (i = 0; i < src_missed_snap_count; i++) { - snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", i); ret = dict_get_str (src, name_buf, &buf); if (ret) { @@ -10351,15 +10452,8 @@ glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) } tmp_str = NULL; - dst_missed_snap_count++; } - ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to set dst_missed_snap_count"); - goto out; - } out: if (ret && tmp_str) @@ -12240,6 +12334,373 @@ glusterd_recursive_rmdir (const char *delete_path) gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", delete_path, strerror (errno)); } + +out: + return ret; +} + +gf_boolean_t +glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict, + int down_count, gf_boolean_t first_brick_on, + int8_t snap_force, int quorum_count, + char *quorum_type, char **op_errstr) +{ + gf_boolean_t quorum_met = _gf_false; + char err_str[PATH_MAX] = {0, }; + xlator_t *this = NULL; + int up_count = 0; + + this = THIS; + GF_ASSERT (this); + + if (!volinfo || !dict) { + gf_log (this->name, GF_LOG_WARNING, "input parameters NULL"); + goto out; + } + + if (!snap_force && down_count) { + snprintf (err_str, sizeof (err_str), "One or more bricks may " + "be down. Use the force option "); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + *op_errstr = gf_strdup (err_str); + goto out; + } + + up_count = volinfo->replica_count - down_count; + + if (quorum_type && !strcmp (quorum_type, "fixed")) { + if (up_count >= quorum_count) { + quorum_met = _gf_true; + goto out; + } + } else { + if (volinfo->replica_count % 2 == 0) { + if ((up_count > quorum_count) || + ((up_count == quorum_count) && first_brick_on)) { + quorum_met = _gf_true; + goto out; + } + } else { + if (up_count >= quorum_count) { + quorum_met = _gf_true; + goto out; + } + } + } + + if (!quorum_met) { + snprintf (err_str, sizeof (err_str), "quorum is not met"); + gf_log (this->name, GF_LOG_WARNING, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + +out: + return quorum_met; +} + +int32_t +glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, + dict_t *dict, char *key_prefix, + int8_t snap_force, int quorum_count, + char *quorum_type, char **op_errstr) +{ + int ret = 0; + xlator_t *this = NULL; + int64_t i = 0; + int64_t j = 0; + char key[1024] = {0, }; + int down_count = 0; + gf_boolean_t first_brick_on = _gf_true; + glusterd_conf_t *priv = NULL; + gf_boolean_t quorum_met = _gf_false; + int distribute_subvols = 0; + int32_t brick_online = 0; + char err_str[PATH_MAX] = {0, }; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + if (!volinfo || !dict) { + gf_log (this->name, GF_LOG_WARNING, "input parameters NULL"); + goto out; + } + + if (!glusterd_is_volume_replicate (volinfo) || + volinfo->replica_count < 3) { + for (i = 0; i < volinfo->brick_count ; i++) { + /* for a pure distribute volume, and replica volume + with replica count 2, quorum is not met if even + one of its subvolumes is down + */ + snprintf (key, sizeof (key), "%s%ld.brick%ld.status", + key_prefix, index, i); + ret = dict_get_int32 (dict, key, &brick_online); + if (ret || !brick_online) { + ret = 1; + snprintf (err_str, sizeof (err_str), "quorum " + "is not met"); + gf_log (this->name, GF_LOG_ERROR, "%s", + err_str); + *op_errstr = gf_strdup (err_str); + goto out; + } + } + ret = 0; + quorum_met = _gf_true; + } else { + distribute_subvols = volinfo->brick_count / volinfo->replica_count; + for (j = 0; j < distribute_subvols; j++) { + // by default assume quorum is not met + /* TODO: Handle distributed striped replicate volumes + Currently only distributed replicate volumes are + handled. + */ + ret = 1; + quorum_met = _gf_false; + for (i = 0; i < volinfo->replica_count; i++) { + snprintf (key, sizeof (key), + "%s%ld.brick%ld.status", key_prefix, + index, + (j * volinfo->replica_count) + i); + ret = dict_get_int32 (dict, key, &brick_online); + if (ret || !brick_online) { + if (i == 0) + first_brick_on = _gf_false; + down_count++; + } + } + + quorum_met = glusterd_volume_quorum_calculate (volinfo, + dict, + down_count, + first_brick_on, + snap_force, + quorum_count, + quorum_type, + op_errstr); + // goto out if quorum is not met + if (!quorum_met) { + ret = -1; + goto out; + } + + down_count = 0; + first_brick_on = _gf_true; + } + } + + if (quorum_met) { + gf_log (this->name, GF_LOG_DEBUG, "volume %s is in quorum", + volinfo->volname); + ret = 0; + } + +out: + return ret; +} + +int32_t +glusterd_snap_quorum_check (dict_t *dict, gf_boolean_t snap_volume, char **op_errstr) +{ + int64_t i = 0; + int32_t ret = 0; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int64_t volcount = 0; + char key[PATH_MAX] = {0, }; + xlator_t *this = NULL; + int32_t snap_command = 0; + int8_t snap_force = 0; + int32_t force = 0; + char err_str[PATH_MAX] = {0, }; + int quorum_count = 0; + char *quorum_type = NULL; + int32_t tmp = 0; + char key_prefix[PATH_MAX] = {0, }; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is NULL"); + goto out; + } + + if (snap_volume) { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the " + "snapshot %s", snapname); + goto out; + } + } + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = dict_get_int32 (dict, "flags", &force); + if (!ret && (force & GF_CLI_FLAG_OP_FORCE)) + snap_force = 1; + if (!snap_force) { + /* Do a quorum check of glusterds also. Because, + the missed snapshot information will be saved + by glusterd and if glusterds are not in + quorum, then better fail the snapshot + */ + if (!does_gd_meet_server_quorum (this)) { + snprintf (err_str, sizeof (err_str), + "glusterds are not in quorum"); + gf_log (this->name, GF_LOG_WARNING, "%s", + err_str); + *op_errstr = gf_strdup (err_str); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "glusterds are in " + "quorum"); + } + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get " + "volcount"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "%s%ld", + snap_volume?"snap-volname":"volname", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get volname"); + goto out; + } + + if (snap_volume) { + ret = glusterd_snap_volinfo_find (volname, snap, + &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get snap volume %s " + "for snap %s", volname, + snapname); + goto out; + } + } else { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to find the volume %s", + volname); + goto out; + } + } + + /* for replicate volumes with replica count equal to or + greater than 3, do quorum check by getting what type + of quorum rule has been set by getting the volume + option set. If getting the option fails, then assume + default. + AFR does this: + if quorum type is "auto": + - for odd numner of bricks (n), n/2 + 1 + bricks should be present + - for even number of bricks n, n/2 bricks + should be present along with the 1st + subvolume + if quorum type is not "auto": + - get the quorum count from dict with the + help of the option "cluster.quorum-count" + if the option is not there in the dict, + then assume quorum type is auto and follow + the above method. + For non replicate volumes quorum is met only if all + the bricks of the volume are online + */ + + if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { + if (volinfo->replica_count %2 == 0) + quorum_count = volinfo->replica_count/2; + else + quorum_count = + volinfo->replica_count/2 + 1; + } else { + quorum_count = volinfo->brick_count; + } + + ret = dict_get_str (volinfo->dict, + "cluster.quorum-type", + &quorum_type); + if (!ret && !strcmp (quorum_type, "fixed")) { + ret = dict_get_int32 (volinfo->dict, + "cluster.quorum-count", + &tmp); + /* if quorum-type option is not found in the + dict assume auto quorum type. i.e n/2 + 1. + The same assumption is made when quorum-count + option cannot be obtained from the dict (even + if the quorum-type option is not set to auto, + the behavior is set to the default behavior) + */ + if (!ret) + quorum_count = tmp; + else + quorum_type = NULL; + } + + snprintf (key_prefix, sizeof (key_prefix), + "%s", snap_volume?"snap-vol":"vol"); + + ret = glusterd_volume_quorum_check (volinfo, i, dict, + key_prefix, + snap_force, + quorum_count, + quorum_type, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "volume %s " + "is not in quorum", volinfo->volname); + goto out; + } + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + if (!does_gd_meet_server_quorum (this)) { + snprintf (err_str, sizeof (err_str), + "glusterds are not in quorum"); + gf_log (this->name, GF_LOG_WARNING, "%s", + err_str); + *op_errstr = gf_strdup (err_str); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "glusterds are in " + "quorum"); + break; + default: + break; + } + + ret = 0; + out: return ret; } |