diff options
author | Pranith Kumar K <pranithk@gluster.com> | 2012-11-06 12:18:24 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2012-11-23 23:14:50 -0800 |
commit | 7c23a94516c5dd21536c259f323a3cc113fdfa0d (patch) | |
tree | 36c0efc8a3bbd4976ae56d8e5fff1cccbcef9c93 /xlators/mgmt | |
parent | 76a4afec6e03d15cb442e819f6fe7b94d6f9f487 (diff) |
mgmt/glusterd: Implementation of server-side quorum
Feature-page:
http://www.gluster.org/community/documentation/index.php/Features/Server-quorum
Change-Id: I747b222519e71022462343d2c1bcd3626e1f9c86
BUG: 839595
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.org/3811
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 170 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 492 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.c | 35 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.h | 16 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 118 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 5 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 429 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 31 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 41 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 27 |
11 files changed, 1206 insertions, 161 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 7cdad10e219..f33d2caeb56 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -280,6 +280,8 @@ _build_option_key (dict_t *d, char *k, data_t *v, void *tmp) int ret = -1; pack = tmp; + if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0) + return 0; snprintf (reconfig_key, 256, "volume%d.option.%s", pack->vol_count, k); ret = dict_set_str (pack->dict, reconfig_key, v->data); @@ -303,12 +305,14 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, glusterd_conf_t *priv = NULL; char *volume_id_str = NULL; struct args_pack pack = {0,}; + xlator_t *this = NULL; GF_ASSERT (volinfo); GF_ASSERT (volumes); - priv = THIS->private; + this = THIS; + priv = this->private; GF_ASSERT (priv); @@ -388,6 +392,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, pack.vol_count = count; pack.opt_count = 0; dict_foreach (dict, _build_option_key, (void *) &pack); + dict_foreach (priv->opts, _build_option_key, &pack); snprintf (key, 256, "volume%d.opt_count", pack.vol_count); ret = dict_set_int32 (volumes, key, pack.opt_count); @@ -445,7 +450,6 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx) GF_ASSERT (priv); ret = glusterd_lock (MY_UUID); - if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to acquire local lock, ret: %d", ret); @@ -660,15 +664,30 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) gf1_cli_probe_req cli_req = {0,}; glusterd_peerinfo_t *peerinfo = NULL; gf_boolean_t run_fsm = _gf_true; + xlator_t *this = NULL; + GF_ASSERT (req); + this = THIS; - if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_probe_req)) { + if (!xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_probe_req)) { //failed to decode msg; gf_log ("", GF_LOG_ERROR, "xdr decoding error"); req->rpc_err = GARBAGE_ARGS; goto out; } + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET, + NULL, + cli_req.hostname, cli_req.port); + gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, " + "rejecting operation"); + ret = 0; + goto out; + } + gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname, cli_req.port); gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", @@ -684,8 +703,9 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) &peerinfo))) { if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) { - gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d" - " already a peer", cli_req.hostname, cli_req.port); + gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port " + "%d already a peer", cli_req.hostname, + cli_req.port); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL, cli_req.hostname, cli_req.port); @@ -694,8 +714,8 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) } ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port); - gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port, - (ret) ? "FAILED" : "SUCCESS"); + gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, + cli_req.port, (ret) ? "FAILED" : "SUCCESS"); if (ret == GLUSTERD_CONNECTION_AWAITED) { //fsm should be run after connection establishes @@ -717,7 +737,7 @@ int glusterd_handle_cli_deprobe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_deprobe_req cli_req = {0,}; + gf1_cli_deprobe_req cli_req = {0,}; uuid_t uuid = {0}; int op_errno = 0; xlator_t *this = NULL; @@ -750,18 +770,29 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) goto out; } - if (!uuid_is_null (uuid) && !(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { - /* Check if peers are connected, except peer being detached*/ - if (!glusterd_chk_peers_connected_befriended (uuid)) { - ret = -1; - op_errno = GF_DEPROBE_FRIEND_DOWN; - goto out; + if (!(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { + if (!uuid_is_null (uuid)) { + /* Check if peers are connected, except peer being detached*/ + if (!glusterd_chk_peers_connected_befriended (uuid)) { + ret = -1; + op_errno = GF_DEPROBE_FRIEND_DOWN; + goto out; + } + ret = glusterd_all_volume_cond_check ( + glusterd_friend_brick_belongs, + -1, &uuid); + if (ret) { + op_errno = GF_DEPROBE_BRICK_EXIST; + goto out; + } } - ret = glusterd_all_volume_cond_check ( - glusterd_friend_brick_belongs, - -1, &uuid); - if (ret) { - op_errno = GF_DEPROBE_BRICK_EXIST; + + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + gf_log (this->name, GF_LOG_ERROR, "Quorum does not " + "meet, rejecting operation"); + ret = -1; + op_errno = GF_DEPROBE_QUORUM_NOT_MET; goto out; } } @@ -2146,6 +2177,43 @@ out: } int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args) +{ + dict_t *options = NULL; + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + + peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) + goto out; + + if (args) + peerctx->args = *args; + + peerctx->peerinfo = peerinfo; + + ret = glusterd_transport_inet_options_build (&options, + peerinfo->hostname, + peerinfo->port); + if (ret) + goto out; + + ret = glusterd_rpc_create (&peerinfo->rpc, options, + glusterd_peer_rpc_notify, peerctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" + " peer %s", peerinfo->hostname); + goto out; + } + peerctx = NULL; + ret = 0; +out: + GF_FREE (peerctx); + return ret; +} + +int glusterd_friend_add (const char *hoststr, int port, glusterd_friend_sm_state_t state, uuid_t *uuid, @@ -2156,8 +2224,6 @@ glusterd_friend_add (const char *hoststr, int port, int ret = 0; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; - glusterd_peerctx_t *peerctx = NULL; - dict_t *options = NULL; gf_boolean_t handover = _gf_false; this = THIS; @@ -2165,49 +2231,35 @@ glusterd_friend_add (const char *hoststr, int port, GF_ASSERT (conf); GF_ASSERT (hoststr); - peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); - if (!peerctx) { - ret = -1; - goto out; - } - - if (args) - peerctx->args = *args; - - ret = glusterd_peerinfo_new (friend, state, uuid, hoststr); - if (ret) - goto out; - - peerctx->peerinfo = *friend; - - ret = glusterd_transport_inet_options_build (&options, hoststr, port); + ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port); if (ret) goto out; - if (!restore) { - ret = glusterd_store_peerinfo (*friend); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to store " - "peerinfo"); + //restore needs to first create the list of peers, then create rpcs + //to keep track of quorum in race-free manner. In restore for each peer + //rpc-create calls rpc_notify when the friend-list is partially + //constructed, leading to wrong quorum calculations. + if (restore) + goto done; - goto out; - } - } - list_add_tail (&(*friend)->uuid_list, &conf->peers); - ret = glusterd_rpc_create (&(*friend)->rpc, options, - glusterd_peer_rpc_notify, - peerctx); + ret = glusterd_store_peerinfo (*friend); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" - " peer %s", (char*)hoststr); + gf_log (this->name, GF_LOG_ERROR, "Failed to store " + "peerinfo"); + goto out; } + ret = glusterd_friend_rpc_create (this, *friend, args); + if (ret) + goto out; +done: + list_add_tail (&(*friend)->uuid_list, &conf->peers); handover = _gf_true; out: if (ret && !handover) { - (void) glusterd_friend_cleanup (*friend); - *friend = NULL; + (void) glusterd_friend_cleanup (*friend); + *friend = NULL; } gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret); @@ -2866,6 +2918,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_peerctx_t *peerctx = NULL; uuid_t owner = {0,}; uuid_t *peer_uuid = NULL; + gf_boolean_t quorum_action = _gf_false; peerctx = mydata; if (!peerctx) @@ -2880,6 +2933,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, { gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); peerinfo->connected = 1; + peerinfo->quorum_action = _gf_true; ret = glusterd_peer_dump_version (this, rpc, peerctx); if (ret) @@ -2892,6 +2946,14 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { + peerinfo->quorum_contrib = QUORUM_DOWN; + quorum_action = _gf_true; + peerinfo->quorum_action = _gf_false; + } + peerinfo->connected = 0; + /* local glusterd (thinks that it) is the owner of the cluster lock and 'fails' the operation on the first disconnect from @@ -2944,6 +3006,8 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_friend_sm (); glusterd_op_sm (); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 40f38f5bbba..d5686bbab28 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -46,6 +46,24 @@ #include <signal.h> #include <sys/wait.h> +#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label) \ + do { \ + gf_boolean_t _all = !strcmp ("all", volname); \ + gf_boolean_t _ratio = !strcmp (key, \ + GLUSTERD_QUORUM_RATIO_KEY); \ + if (_all && !_ratio) { \ + ret = -1; \ + *op_errstr = gf_strdup ("Not a valid option for all " \ + "volumes"); \ + goto label; \ + } else if (!_all && _ratio) { \ + ret = -1; \ + *op_errstr = gf_strdup ("Not a valid option for " \ + "single volume"); \ + goto label; \ + } \ + } while (0) + static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; @@ -291,6 +309,24 @@ out: } static int +glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value, + char **op_errstr) +{ + int ret = 0; + char *key = NULL; + volume_option_t *opt = NULL; + + if (!glusterd_is_quorum_option (fullkey)) + goto out; + key = strchr (fullkey, '.'); + key++; + opt = xlator_volume_option_get (this, key); + ret = xlator_option_validate (this, key, value, opt, op_errstr); +out: + return ret; +} + +static int glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) { int ret = -1; @@ -315,6 +351,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) uint32_t local_key_op_version = 0; gf_boolean_t origin_glusterd = _gf_true; gf_boolean_t check_op_version = _gf_true; + gf_boolean_t all_vol = _gf_false; GF_ASSERT (dict); this = THIS; @@ -399,26 +436,30 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) goto out; } - exists = glusterd_check_volume_exists (volname); - if (!exists) { - snprintf (errstr, sizeof (errstr), "Volume %s does not exist", - volname); - gf_log (this->name, GF_LOG_ERROR, "%s", errstr); - *op_errstr = gf_strdup (errstr); - ret = -1; - goto out; - } + if (strcasecmp (volname, "all") != 0) { + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (errstr, sizeof (errstr), "Volume %s does " + "not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to allocate memory"); - goto out; - } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to allocate memory"); + goto out; + } - ret = glusterd_validate_volume_id (dict, volinfo); - if (ret) - goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + } else { + all_vol = _gf_true; + } local_new_op_version = priv->op_version; @@ -473,6 +514,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } + if (!exists) { gf_log (this->name, GF_LOG_ERROR, "Option with name: %s " @@ -490,6 +532,11 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) if (key_fixed) key = key_fixed; + ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out); + ret = glusterd_validate_quorum_options (this, key, value, + op_errstr); + if (ret) + goto out; local_key_op_version = glusterd_get_op_version_for_key (key); if (local_key_op_version > local_new_op_version) @@ -539,9 +586,9 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) } *op_errstr = NULL; - if (!global_opt) + if (!global_opt && !all_vol) ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr); - else { + else if (!all_vol) { voliter = NULL; list_for_each_entry (voliter, &priv->volumes, vol_list) { ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr); @@ -626,23 +673,24 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) goto out; } - exists = glusterd_check_volume_exists (volname); + if (strcasecmp (volname, "all") != 0) { + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (msg, sizeof (msg), "Volume %s does not " + "exist", volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not " - "exist", volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) - goto out; - - ret = glusterd_validate_volume_id (dict, volinfo); - if (ret) - goto out; ret = dict_get_str (dict, "key", &key); if (ret) { @@ -666,6 +714,11 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) *op_errstr = gf_strdup (msg); ret = -1; goto out; + } else if (exists > 0) { + if (key_fixed) + key = key_fixed; + ALL_VOLUME_OPTION_CHECK (volname, key, ret, + op_errstr, out); } } @@ -993,6 +1046,22 @@ out: } static int +_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data) +{ + int32_t *is_force = 0; + + GF_ASSERT (data); + is_force = (int32_t*)data; + + if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0) + goto out; + + _delete_reconfig_opt (this, key, value, data); +out: + return 0; +} + +static int glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key, int32_t *is_force) { @@ -1008,15 +1077,6 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key, if (!strncmp(key, "all", 3)) dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force); else { - if (glusterd_check_option_exists (key, &key_fixed) != 1) { - gf_log ("glusterd", GF_LOG_ERROR, - "volinfo dict inconsistency: option %s not found", - key); - ret = -1; - goto out; - } - if (key_fixed) - key = key_fixed; value = dict_get (volinfo->dict, key); if (!value) { gf_log ("glusterd", GF_LOG_DEBUG, @@ -1053,6 +1113,91 @@ out: return ret; } +static int +glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict) +{ + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + int32_t is_force = 0; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + gf_boolean_t all = _gf_false; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key", &key); + if (ret) + goto out; + + ret = dict_get_int32 (dict, "force", &is_force); + if (ret) + is_force = 0; + + if (strcmp (key, "all")) { + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", + key); + ret = -1; + goto out; + } + } else { + all = _gf_true; + } + + if (key_fixed) + key = key_fixed; + + ret = -1; + dup_opt = dict_new (); + if (!dup_opt) + goto out; + if (!all) { + dict_copy (conf->opts, dup_opt); + dict_del (dup_opt, key); + } + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); + if (ret) + goto out; + + ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); + if (ret) + goto out; + + ret = glusterd_store_options (this, dup_opt); + if (ret) + goto out; + + if (glusterd_is_quorum_changed (conf->opts, key, NULL)) + quorum_action = _gf_true; + + ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + next_version); + if (ret) + goto out; + else + next_version = NULL; + + if (!all) { + dict_del (conf->opts, key); + } else { + dict_foreach (conf->opts, _delete_reconfig_global_opt, + &is_force); + } +out: + GF_FREE (key_fixed); + if (dup_opt) + dict_unref (dup_opt); + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); + GF_FREE (next_version); + return ret; +} static int glusterd_op_reset_volume (dict_t *dict, char **op_errstr) @@ -1061,14 +1206,23 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr) int ret = -1; char *volname = NULL; char *key = NULL; + char *key_fixed = NULL; int32_t is_force = 0; + gf_boolean_t quorum_action = _gf_false; + xlator_t *this = NULL; + this = THIS; ret = dict_get_str (dict, "volname", &volname); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to get volume name " ); goto out; } + if (strcasecmp (volname, "all") == 0) { + ret = glusterd_op_reset_all_volume_options (this, dict); + goto out; + } + ret = dict_get_int32 (dict, "force", &is_force); if (ret) is_force = 0; @@ -1085,6 +1239,20 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr) goto out; } + if (strcmp (key, "all") && + glusterd_check_option_exists (key, &key_fixed) != 1) { + gf_log ("glusterd", GF_LOG_ERROR, + "volinfo dict inconsistency: option %s not found", + key); + ret = -1; + goto out; + } + if (key_fixed) + key = key_fixed; + + if (glusterd_is_quorum_changed (volinfo->dict, key, NULL)) + quorum_action = _gf_true; + ret = glusterd_options_reset (volinfo, key, &is_force); if (is_force == -1) { ret = -1; @@ -1093,12 +1261,14 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr) } out: + GF_FREE (key_fixed); + if (quorum_action) + glusterd_do_quorum_action (); + gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret); return ret; - } - int glusterd_stop_bricks (glusterd_volinfo_t *volinfo) { @@ -1128,6 +1298,91 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo) } static int +glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict) +{ + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char *dup_value = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key1", &key); + if (ret) + goto out; + + ret = dict_get_str (dict, "value1", &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "invalid key,value pair in 'volume set'"); + goto out; + } + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", key); + ret = -1; + goto out; + } + + if (key_fixed) + key = key_fixed; + + ret = -1; + dup_opt = dict_new (); + if (!dup_opt) + goto out; + dict_copy (conf->opts, dup_opt); + ret = dict_set_str (dup_opt, key, value); + if (ret) + goto out; + + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); + if (ret) + goto out; + + ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); + if (ret) + goto out; + + dup_value = gf_strdup (value); + if (!dup_value) + goto out; + + ret = glusterd_store_options (this, dup_opt); + if (ret) + goto out; + + if (glusterd_is_quorum_changed (conf->opts, key, value)) + quorum_action = _gf_true; + + ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + next_version); + if (ret) + goto out; + else + next_version = NULL; + + ret = dict_set_dynstr (conf->opts, key, dup_value); + if (ret) + goto out; +out: + GF_FREE (key_fixed); + if (dup_opt) + dict_unref (dup_opt); + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); + GF_FREE (next_version); + return ret; +} + +static int glusterd_op_set_volume (dict_t *dict) { int ret = 0; @@ -1146,7 +1401,7 @@ glusterd_op_set_volume (dict_t *dict) int32_t dict_count = 0; gf_boolean_t check_op_version = _gf_false; uint32_t new_op_version = 0; - + gf_boolean_t quorum_action = _gf_false; this = THIS; GF_ASSERT (this); @@ -1176,6 +1431,11 @@ glusterd_op_set_volume (dict_t *dict) goto out; } + if (strcasecmp (volname, "all") == 0) { + ret = glusterd_op_set_all_volume_options (this, dict); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); @@ -1242,7 +1502,6 @@ glusterd_op_set_volume (dict_t *dict) key_fixed = NULL; goto out; } - } if (glusterd_check_globaloption (key)) @@ -1261,6 +1520,9 @@ glusterd_op_set_volume (dict_t *dict) if (key_fixed) key = key_fixed; + if (glusterd_is_quorum_changed (volinfo->dict, key, value)) + quorum_action = _gf_true; + if (global_opt) { list_for_each_entry (voliter, &priv->volumes, vol_list) { value = gf_strdup (value); @@ -1350,6 +1612,8 @@ glusterd_op_set_volume (dict_t *dict) out: GF_FREE (key_fixed); gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } @@ -1875,20 +2139,26 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) static int glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; + int ret = 0; glusterd_op_lock_ctx_t *lock_ctx = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; GF_ASSERT (event); GF_ASSERT (ctx); + this = THIS; + priv = this->private; lock_ctx = (glusterd_op_lock_ctx_t *)ctx; ret = glusterd_unlock (lock_ctx->uuid); - gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); glusterd_op_unlock_send_resp (lock_ctx->req, ret); + if (priv->pending_quorum_action) + glusterd_do_quorum_action (); return ret; } @@ -2042,7 +2312,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) goto out; } if (strcmp (volname, "help") && - strcmp (volname, "help-xml")) { + strcmp (volname, "help-xml") && + strcasecmp (volname, "all")) { ret = glusterd_dict_set_volid (dict, volname, op_errstr); if (ret) @@ -2095,10 +2366,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) goto out; } - ret = glusterd_dict_set_volid (dict, volname, - op_errstr); - if (ret) - goto out; + if (strcasecmp (volname, "all")) { + ret = glusterd_dict_set_volid (dict, + volname, + op_errstr); + if (ret) + goto out; + } dict_copy (dict, req_dict); } break; @@ -2114,6 +2388,105 @@ out: return ret; } +gf_boolean_t +glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict) +{ + char *key = NULL; + char *volname = NULL; + int ret = 0; + + if (op == GD_OP_STATUS_VOLUME) + return _gf_true; + + if ((op == GD_OP_SET_VOLUME)) { + //check for set volume help + ret = dict_get_str (dict, "volname", &volname); + if (volname && + ((strcmp (volname, "help") == 0) || + (strcmp (volname, "help-xml") == 0))) { + ret = dict_get_str (dict, "key1", &key); + if (ret < 0) + return _gf_true; + } + } + + return _gf_false; +} + +gf_boolean_t +glusterd_is_op_quorum_validation_required (xlator_t *this, glusterd_op_t op, + dict_t *dict) +{ + gf_boolean_t required = _gf_true; + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + + if (glusterd_is_get_op (this, op, dict)) { + required = _gf_false; + goto out; + } + if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME)) + goto out; + if (op == GD_OP_SET_VOLUME) + ret = dict_get_str (dict, "key1", &key); + else if (op == GD_OP_RESET_VOLUME) + ret = dict_get_str (dict, "key", &key); + if (ret) + goto out; + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) + goto out; + if (key_fixed) + key = key_fixed; + if (glusterd_is_quorum_option (key)) + required = _gf_false; +out: + GF_FREE (key_fixed); + return required; +} + +static int +glusterd_op_validate_quorum (xlator_t *this, glusterd_op_t op, + dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *errstr = NULL; + + + errstr = "Quorum not met. Volume operation not allowed."; + if (!glusterd_is_op_quorum_validation_required (this, op, dict)) + goto out; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + ret = 0; + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = 0; + goto out; + } + + if (does_gd_meet_server_quorum (this)) { + ret = 0; + goto out; + } + + if (glusterd_is_volume_in_server_quorum (volinfo)) { + ret = -1; + *op_errstr = gf_strdup (errstr); + goto out; + } + ret = 0; +out: + return ret; +} + static int glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) { @@ -2141,6 +2514,13 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) goto out; } + ret = glusterd_op_validate_quorum (this, op, dict, &op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, op_errstr); + opinfo.op_errstr = op_errstr; + goto out; + } + /* rsp_dict NULL from source */ ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL); if (ret) { @@ -2817,6 +3197,8 @@ glusterd_op_txn_complete () GF_FREE (op_errstr); + if (priv->pending_quorum_action) + glusterd_do_quorum_action (); gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index a60eb53e554..5a38fdfecb5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -959,6 +959,27 @@ glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event) } } +gf_boolean_t +gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state, + glusterd_friend_sm_event_type_t event_type, + glusterd_peerinfo_t *peerinfo) +{ + gf_boolean_t affects = _gf_false; + + //When glusterd comes up with friends in BEFRIENDED state in store, + //wait until compare-data happens. + if ((old_state == GD_FRIEND_STATE_BEFRIENDED) && + (event_type != GD_FRIEND_EVENT_RCVD_ACC) && + (event_type != GD_FRIEND_EVENT_LOCAL_ACC)) + goto out; + if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED) + && peerinfo->connected) { + affects = _gf_true; + } +out: + return affects; +} + int glusterd_friend_sm () { @@ -970,6 +991,8 @@ glusterd_friend_sm () glusterd_peerinfo_t *peerinfo = NULL; glusterd_friend_sm_event_type_t event_type = 0; gf_boolean_t is_await_conn = _gf_false; + gf_boolean_t quorum_action = _gf_false; + glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT; while (!list_empty (&gd_friend_sm_queue)) { list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) { @@ -989,6 +1012,7 @@ glusterd_friend_sm () glusterd_friend_sm_event_name_get (event_type)); + old_state = peerinfo->state.state; state = glusterd_friend_state_table[peerinfo->state.state]; GF_ASSERT (state); @@ -1029,6 +1053,15 @@ glusterd_friend_sm () goto out; } + if (gd_does_peer_affect_quorum (old_state, event_type, + peerinfo)) { + peerinfo->quorum_contrib = QUORUM_UP; + if (peerinfo->quorum_action) { + peerinfo->quorum_action = _gf_false; + quorum_action = _gf_true; + } + } + ret = glusterd_store_peerinfo (peerinfo); glusterd_destroy_friend_event_context (event); @@ -1042,6 +1075,8 @@ glusterd_friend_sm () ret = 0; out: + if (quorum_action) + glusterd_do_quorum_action (); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index 011578da3d0..0af45deb651 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -36,6 +36,20 @@ struct glusterd_store_handle_ { typedef struct glusterd_store_handle_ glusterd_store_handle_t; +typedef enum gd_quorum_contribution_ { + QUORUM_NONE, + QUORUM_WAITING, + QUORUM_DOWN, + QUORUM_UP +} gd_quorum_contrib_t; + +typedef enum gd_quorum_status_ { + QUORUM_UNKNOWN, + QUORUM_NOT_APPLICABLE, + QUORUM_MEETS, + QUORUM_DOES_NOT_MEET +} gd_quorum_status_t; + typedef enum glusterd_friend_sm_state_ { GD_FRIEND_STATE_DEFAULT = 0, GD_FRIEND_STATE_REQ_SENT, @@ -91,6 +105,8 @@ struct glusterd_peerinfo_ { int connected; glusterd_store_handle_t *shandle; glusterd_sm_tr_log_t sm_log; + gf_boolean_t quorum_action; + gd_quorum_contrib_t quorum_contrib; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 6fa490e47d5..471a24e6622 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1908,10 +1908,13 @@ glusterd_store_iter_destroy (glusterd_store_iter_t *iter) { int32_t ret = -1; - GF_ASSERT (iter); - GF_ASSERT (iter->fd > 0); + if (!iter) + return 0; - ret = fclose (iter->file); + if (iter->file) + ret = fclose (iter->file); + else + ret = 0; if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, " @@ -2203,7 +2206,6 @@ glusterd_store_retrieve_node_state (char *volname) ret = glusterd_store_handle_retrieve (path, &volinfo->node_state_shandle); - if (ret) goto out; @@ -2215,6 +2217,7 @@ glusterd_store_retrieve_node_state (char *volname) ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) goto out; + if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { volinfo->defrag_cmd = atoi (value); @@ -2440,6 +2443,102 @@ out: return ret; } +inline void +glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len) +{ + snprintf (path, len, "%s/options", conf->workdir); +} + +int +_store_global_opts (dict_t *this, char *key, data_t *value, void *data) +{ + glusterd_store_handle_t *shandle = data; + + glusterd_store_save_value (shandle->fd, key, (char*)value->data); + return 0; +} + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts) +{ + glusterd_store_handle_t *shandle = NULL; + glusterd_conf_t *conf = NULL; + char path[PATH_MAX] = {0}; + int fd = -1; + int32_t ret = -1; + + conf = this->private; + glusterd_store_set_options_path (conf, path, sizeof (path)); + + ret = glusterd_store_handle_new (path, &shandle); + if (ret) + goto out; + + fd = glusterd_store_mkstemp (shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + + shandle->fd = fd; + dict_foreach (opts, _store_global_opts, shandle); + shandle->fd = 0; + ret = glusterd_store_rename_tmppath (shandle); + if (ret) + goto out; +out: + glusterd_store_handle_destroy (shandle); + if (fd >=0 ) + close (fd); + return ret; +} + +int32_t +glusterd_store_retrieve_options (xlator_t *this) +{ + char path[PATH_MAX] = {0}; + glusterd_conf_t *conf = NULL; + glusterd_store_handle_t *shandle = NULL; + glusterd_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + glusterd_store_op_errno_t op_errno = 0; + int ret = -1; + + conf = this->private; + glusterd_store_set_options_path (conf, path, sizeof (path)); + + ret = glusterd_store_handle_retrieve (path, &shandle); + if (ret) + goto out; + + ret = glusterd_store_iter_new (shandle, &iter); + if (ret) + goto out; + + ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); + while (!ret) { + ret = dict_set_dynstr (conf->opts, key, value); + if (ret) { + GF_FREE (key); + GF_FREE (value); + goto out; + } + GF_FREE (key); + key = NULL; + value = NULL; + + ret = glusterd_store_iter_get_next (iter, &key, &value, + &op_errno); + } + if (op_errno != GD_STORE_EOF) + goto out; + ret = 0; +out: + glusterd_store_iter_destroy (iter); + glusterd_store_handle_destroy (shandle); + return ret; +} int32_t glusterd_store_retrieve_volumes (xlator_t *this) @@ -2840,9 +2939,8 @@ glusterd_store_retrieve_peers (xlator_t *this) (void) glusterd_store_iter_destroy (iter); - args.mode = GD_MODE_SWITCH_ON; ret = glusterd_friend_add (hostname, 0, state, &uuid, - &peerinfo, 1, &args); + &peerinfo, 1, NULL); GF_FREE (hostname); if (ret) @@ -2852,6 +2950,13 @@ glusterd_store_retrieve_peers (xlator_t *this) glusterd_for_each_entry (entry, dir); } + args.mode = GD_MODE_ON; + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + ret = glusterd_friend_rpc_create (this, peerinfo, &args); + if (ret) + goto out; + } + out: if (dir) closedir (dir); @@ -2905,7 +3010,6 @@ glusterd_restore () goto out; } - ret = glusterd_store_retrieve_volumes (this); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 1e9e599a429..d8c1567a661 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -138,4 +138,9 @@ glusterd_retrieve_op_version (xlator_t *this, int *op_version); int glusterd_store_global_info (xlator_t *this); +int32_t +glusterd_store_retrieve_options (xlator_t *this); + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 726aa3a1bb3..6c2870238bd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -68,6 +68,8 @@ #define NLMV4_VERSION 4 #define NLMV1_VERSION 1 +#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X)) + char *glusterd_sock_dir = "/var/run"; static glusterd_lock_t lock; @@ -939,7 +941,10 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) { GF_ASSERT (peerinfo); glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + if (peerinfo->quorum_contrib != QUORUM_NONE) + quorum_action = _gf_true; if (peerinfo->rpc) { /* cleanup the saved-frames before last unref */ rpc_clnt_connection_cleanup (&peerinfo->rpc->conn); @@ -955,6 +960,8 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) } glusterd_peer_destroy (peerinfo); + if (quorum_action) + glusterd_do_quorum_action (); return 0; } @@ -982,12 +989,10 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) } } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int32_t glusterd_service_stop (const char *service, char *pidfile, int sig, gf_boolean_t force_kill) @@ -1655,25 +1660,25 @@ out: } int -_add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) +_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) { - glusterd_voldict_ctx_t *ctx = NULL; + glusterd_dict_ctx_t *ctx = NULL; char optkey[512] = {0,}; int ret = -1; ctx = data; - snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, ctx->key_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, key); if (ret) gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", - ctx->key_name, ctx->count, key); - snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, + ctx->key_name, ctx->opt_count, key); + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, ctx->val_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, value->data); if (ret) gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", - ctx->val_name, ctx->count, value->data); + ctx->val_name, ctx->opt_count, value->data); ctx->opt_count++; return ret; @@ -1711,6 +1716,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { int32_t ret = -1; + char prefix[512] = {0,}; char key[512] = {0,}; glusterd_brickinfo_t *brickinfo = NULL; int32_t i = 1; @@ -1718,7 +1724,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, char *src_brick = NULL; char *dst_brick = NULL; char *str = NULL; - glusterd_voldict_ctx_t ctx = {0}; + glusterd_dict_ctx_t ctx = {0}; GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1850,14 +1856,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; } + snprintf (prefix, sizeof (prefix), "volume%d", count); ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; ctx.key_name = "key"; ctx.val_name = "value"; GF_ASSERT (volinfo->dict); - dict_foreach (volinfo->dict, _add_volinfo_dict_to_prdict, &ctx); + dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -1866,13 +1873,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; ctx.key_name = "slave-num"; ctx.val_name = "slave-val"; GF_ASSERT (volinfo->gsync_slaves); - dict_foreach (volinfo->gsync_slaves, _add_volinfo_dict_to_prdict, &ctx); + dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); @@ -1914,6 +1921,7 @@ glusterd_build_volume_dict (dict_t **vols) glusterd_conf_t *priv = NULL; glusterd_volinfo_t *volinfo = NULL; int32_t count = 0; + glusterd_dict_ctx_t ctx = {0}; priv = THIS->private; @@ -1934,6 +1942,17 @@ glusterd_build_volume_dict (dict_t **vols) if (ret) goto out; + ctx.dict = dict; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach (priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count); + if (ret) + goto out; + *vols = dict; out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -2015,8 +2034,8 @@ out: } static int32_t -import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, - char *value_prefix, int opt_count, int count) +import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, + char *value_prefix, int opt_count, char *prefix) { char key[512] = {0,}; int32_t ret = 0; @@ -2028,8 +2047,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, while (i <= opt_count) { memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.%s%d", - count, key_prefix, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, key_prefix, i); ret = dict_get_str (vols, key, &opt_key); if (ret) { snprintf (msg, sizeof (msg), "Volume dict key not " @@ -2038,8 +2057,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.%s%d", - count, value_prefix, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, value_prefix, i); ret = dict_get_str (vols, key, &opt_val); if (ret) { snprintf (msg, sizeof (msg), "Volume dict value not " @@ -2068,6 +2087,250 @@ out: } +gf_boolean_t +glusterd_is_quorum_option (char *option) +{ + gf_boolean_t res = _gf_false; + int i = 0; + char *keys[] = {GLUSTERD_QUORUM_TYPE_KEY, + GLUSTERD_QUORUM_RATIO_KEY, NULL}; + + for (i = 0; keys[i]; i++) { + if (strcmp (option, keys[i]) == 0) { + res = _gf_true; + break; + } + } + return res; +} + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value) +{ + int ret = 0; + gf_boolean_t reconfigured = _gf_false; + gf_boolean_t all = _gf_false; + char *oldquorum = NULL; + char *newquorum = NULL; + char *oldratio = NULL; + char *newratio = NULL; + + if ((strcmp ("all", option) != 0) && + !glusterd_is_quorum_option (option)) + goto out; + + if (strcmp ("all", option) == 0) + all = _gf_true; + + if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) { + newquorum = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY, + &oldquorum); + } + + if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) { + newratio = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY, + &oldratio); + } + + reconfigured = _gf_true; + + if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0)) + reconfigured = _gf_false; + if (oldratio && newratio && (strcmp (oldratio, newratio) == 0)) + reconfigured = _gf_false; + + if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) && + (newquorum == NULL)) + reconfigured = _gf_false; +out: + return reconfigured; +} + +static inline gf_boolean_t +_is_contributing_to_quorum (gd_quorum_contrib_t contrib) +{ + if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN)) + return _gf_true; + return _gf_false; +} + +static inline gf_boolean_t +_does_quorum_meet (int active_count, int quorum_count) +{ + return (active_count >= quorum_count); +} + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int inquorum_count = 0; + char *val = NULL; + double quorum_percentage = 0.0; + gf_boolean_t ratio = _gf_false; + int count = 0; + + conf = this->private; + //Start with counting self + inquorum_count = 1; + if (active_count) + *active_count = 1; + list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + if (peerinfo->quorum_contrib == QUORUM_WAITING) + goto out; + + if (_is_contributing_to_quorum (peerinfo->quorum_contrib)) + inquorum_count = inquorum_count + 1; + + if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) + *active_count = *active_count + 1; + } + + ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); + if (ret == 0) { + ratio = _gf_true; + ret = gf_string2percent (val, &quorum_percentage); + if (!ret) + ratio = _gf_true; + } + if (ratio) + count = CEILING_POS (inquorum_count * + quorum_percentage / 100.0); + else + count = (inquorum_count * 50 / 100) + 1; + + *quorum_count = count; + ret = 0; +out: + return ret; +} + +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t res = _gf_false; + char *quorum_type = NULL; + int ret = 0; + + ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, + &quorum_type); + if (ret) + goto out; + + if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0) + res = _gf_true; +out: + return res; +} + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + conf = this->private; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (glusterd_is_volume_in_server_quorum (volinfo)) { + return _gf_true; + } + } + return _gf_false; +} + +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this) +{ + int quorum_count = 0; + int active_count = 0; + gf_boolean_t in = _gf_false; + glusterd_conf_t *conf = NULL; + int ret = -1; + + conf = this->private; + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto out; + + if (!_does_quorum_meet (active_count, quorum_count)) { + goto out; + } + + in = _gf_true; +out: + return in; +} + +void +glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, + gf_boolean_t meets_quorum) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *conf = NULL; + + conf = this->private; + if (volinfo->status != GLUSTERD_STATUS_STARTED) + goto out; + + if (!glusterd_is_volume_in_server_quorum (volinfo)) + meets_quorum = _gf_true; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!glusterd_is_local_brick (this, volinfo, brickinfo)) + continue; + if (meets_quorum) + glusterd_brick_start (volinfo, brickinfo, _gf_false); + else + glusterd_brick_stop (volinfo, brickinfo, _gf_false); + } +out: + return; +} + +int +glusterd_do_quorum_action () +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t meets = _gf_false; + + this = THIS; + conf = this->private; + + conf->pending_quorum_action = _gf_true; + ret = glusterd_lock (conf->uuid); + if (ret) + goto out; + + { + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto unlock; + + if (_does_quorum_meet (active_count, quorum_count)) + meets = _gf_true; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + glusterd_do_volume_quorum_action (this, volinfo, meets); + } + } +unlock: + (void)glusterd_unlock (conf->uuid); + conf->pending_quorum_action = _gf_false; +out: + return ret; +} + int32_t glusterd_import_friend_volume_opts (dict_t *vols, int count, glusterd_volinfo_t *volinfo) @@ -2076,6 +2339,7 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, int32_t ret = -1; int opt_count = 0; char msg[2048] = {0}; + char volume_prefix[1024] = {0}; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -2086,8 +2350,9 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, goto out; } - ret = import_prdict_volinfo_dict (vols, volinfo->dict, "key", - "value", opt_count, count); + snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count); + ret = import_prdict_dict (vols, volinfo->dict, "key", "value", + opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import options dict " "specified for %s", volinfo->volname); @@ -2103,9 +2368,8 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, goto out; } - ret = import_prdict_volinfo_dict (vols, volinfo->gsync_slaves, - "slave-num", "slave-val", opt_count, - count); + ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num", + "slave-val", opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import gsync sessions " "specified for %s", volinfo->volname); @@ -2602,6 +2866,95 @@ out: return ret; } +int +glusterd_get_global_opt_version (dict_t *opts, uint32_t *version) +{ + int ret = -1; + char *version_str = NULL; + + ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str); + if (ret) + goto out; + + ret = gf_string2uint (version_str, version); + if (ret) + goto out; + ret = 0; +out: + return ret; +} + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str) +{ + int ret = -1; + char version_string[64] = {0}; + uint32_t version = 0; + + ret = glusterd_get_global_opt_version (opts, &version); + if (ret) + goto out; + version++; + snprintf (version_string, sizeof (version_string), "%"PRIu32, version); + *version_str = gf_strdup (version_string); + if (*version_str) + ret = 0; +out: + return ret; +} + +int32_t +glusterd_import_global_opts (dict_t *friend_data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + dict_t *import_options = NULL; + int count = 0; + uint32_t local_version = 0; + uint32_t remote_version = 0; + + this = THIS; + conf = this->private; + + ret = dict_get_int32 (friend_data, "global-opt-count", &count); + if (ret) { + //old version peer + ret = 0; + goto out; + } + + import_options = dict_new (); + if (!import_options) + goto out; + ret = import_prdict_dict (friend_data, import_options, "key", "val", + count, "global"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to import" + " global options"); + goto out; + } + + ret = glusterd_get_global_opt_version (conf->opts, &local_version); + if (ret) + goto out; + ret = glusterd_get_global_opt_version (import_options, &remote_version); + if (ret) + goto out; + if (remote_version > local_version) { + ret = glusterd_store_options (this, import_options); + if (ret) + goto out; + dict_unref (conf->opts); + conf->opts = dict_ref (import_options); + } + ret = 0; +out: + if (import_options) + dict_unref (import_options); + return ret; +} + int32_t glusterd_compare_friend_data (dict_t *vols, int32_t *status) { @@ -2639,6 +2992,9 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) stale_nfs = _gf_true; if (glusterd_is_nodesvc_running ("glustershd")) stale_shd = _gf_true; + ret = glusterd_import_global_opts (vols); + if (ret) + goto out; ret = glusterd_import_friend_volumes (vols); if (ret) goto out; @@ -3523,14 +3879,16 @@ glusterd_restart_bricks (glusterd_conf_t *conf) int ret = 0; list_for_each_entry (volinfo, &conf->volumes, vol_list) { - /* If volume status is not started, do not proceed */ - if (volinfo->status == GLUSTERD_STATUS_STARTED) { - list_for_each_entry (brickinfo, &volinfo->bricks, - brick_list) { - glusterd_brick_start (volinfo, brickinfo, - _gf_true); - } - start_nodesvcs = _gf_true; + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + if (glusterd_is_volume_in_server_quorum (volinfo)) { + //these bricks will be restarted once the quorum is met + continue; + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + glusterd_brick_start (volinfo, brickinfo, _gf_true); } } @@ -4811,8 +5169,8 @@ out: int glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname) + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port) { glusterd_peerinfo_t *new_peer = NULL; int ret = -1; @@ -4842,6 +5200,9 @@ glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, if (ret) goto out; + if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) + new_peer->quorum_contrib = QUORUM_WAITING; + new_peer->port = port; *peerinfo = new_peer; out: if (ret && new_peer) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 545fa3385ac..3f956deee67 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -33,13 +33,13 @@ struct glusterd_lock_ { time_t timestamp; }; -typedef struct glusterd_voldict_ctx_ { +typedef struct glusterd_dict_ctx_ { dict_t *dict; - int count; int opt_count; char *key_name; char *val_name; -} glusterd_voldict_ctx_t; + char *prefix; +} glusterd_dict_ctx_t; int glusterd_compare_lines (const void *a, const void *b); @@ -323,8 +323,8 @@ glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, int event); int glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname); + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port); int glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log, char * (*state_name_get) (int), @@ -463,4 +463,25 @@ glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); */ gf_boolean_t is_origin_glusterd (); + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value); + +int +glusterd_do_quorum_action (); + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count); + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str); +gf_boolean_t +glusterd_is_quorum_option (char *option); +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this); +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 6f7b3034cbf..3629361ad56 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -184,6 +184,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"network.inode-lru-limit", "protocol/server", NULL, NULL, NO_DOC, 0, 1}, {AUTH_ALLOW_MAP_KEY, "protocol/server", "!server-auth", "*", DOC, 0, 1}, {AUTH_REJECT_MAP_KEY, "protocol/server", "!server-auth", NULL, DOC, 0}, + {"transport.keepalive", "protocol/server", "transport.socket.keepalive", NULL, NO_DOC, 0, 1}, {"server.allow-insecure", "protocol/server", "rpc-auth-allow-insecure", NULL, NO_DOC, 0, 1}, {"server.statedump-path", "protocol/server", "statedump-path", NULL, DOC, 0, 1}, @@ -252,6 +253,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"storage.owner-gid", "storage/posix", "brick-gid", NULL, DOC, 0, 2}, {"config.memory-accounting", "configuration", "!config", NULL, DOC, 0, 2}, {"config.transport", "configuration", "!config", NULL, DOC, 0, 2}, + {GLUSTERD_QUORUM_TYPE_KEY, "mgmt/glusterd", NULL, "off", DOC, 0}, + {GLUSTERD_QUORUM_RATIO_KEY, "mgmt/glusterd", NULL, "0", DOC, 0}, {NULL, } }; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index cb5ac321c2f..02dd5fac78b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -118,6 +118,36 @@ glusterd_uuid_init () } int +glusterd_options_init (xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + char *initial_version = "0"; + + priv = this->private; + + priv->opts = dict_new (); + if (!priv->opts) + goto out; + + ret = glusterd_store_retrieve_options (this); + if (ret == 0) + goto out; + + ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, + initial_version); + if (ret) + goto out; + ret = glusterd_store_options (this, priv->opts); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store version"); + return ret; + } +out: + + return 0; +} +int glusterd_fetchspec_notify (xlator_t *this) { int ret = -1; @@ -1028,6 +1058,10 @@ init (xlator_t *this) if (ret < 0) goto out; + ret = glusterd_options_init (this); + if (ret < 0) + goto out; + ret = glusterd_handle_upgrade_downgrade (this->options, conf); if (ret) goto out; @@ -1172,5 +1206,12 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, }, #endif + { .key = {"server-quorum-type"}, + .type = GF_OPTION_TYPE_STR, + .value = { "none", "server"}, + }, + { .key = {"server-quorum-ratio"}, + .type = GF_OPTION_TYPE_PERCENT, + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 00a6bd16343..c2041ad3194 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -43,7 +43,14 @@ #define GLUSTERD_TR_LOG_SIZE 50 #define GLUSTERD_NAME "glusterd" #define GLUSTERD_SOCKET_LISTEN_BACKLOG 128 +#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type" +#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" +#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" +#define GLUSTERD_SERVER_QUORUM "server" + +struct glusterd_volinfo_; +typedef struct glusterd_volinfo_ glusterd_volinfo_t; typedef enum glusterd_op_ { GD_OP_NONE = 0, @@ -74,7 +81,6 @@ typedef enum glusterd_op_ { GD_OP_MAX, } glusterd_op_t; - struct glusterd_store_iter_ { int fd; FILE *file; @@ -86,6 +92,7 @@ typedef struct glusterd_store_iter_ glusterd_store_iter_t; struct glusterd_volgen { dict_t *dict; }; + typedef struct { struct rpc_clnt *rpc; gf_boolean_t running; @@ -109,6 +116,12 @@ typedef struct { versions */ typedef struct { + gf_boolean_t quorum; + double quorum_ratio; + uint64_t gl_opt_version; +} gd_global_opts_t; + +typedef struct { struct _volfile_ctx *volfile; pthread_mutex_t mutex; struct list_head peers; @@ -134,11 +147,11 @@ typedef struct { #endif pthread_t brick_thread; void *hooks_priv; - xlator_t *xl; /* Should be set to 'THIS' before creating thread */ - /* need for proper handshake_t */ int op_version; /* Starts with 1 for 3.3.0 */ - + xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + gf_boolean_t pending_quorum_action; + dict_t *opts; } glusterd_conf_t; @@ -170,9 +183,6 @@ struct gf_defrag_brickinfo_ { int size; }; -struct glusterd_volinfo_; -typedef struct glusterd_volinfo_ glusterd_volinfo_t; - typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo, gf_defrag_status_t status); @@ -411,6 +421,9 @@ glusterd_friend_add (const char *hoststr, int port, gf_boolean_t restore, glusterd_peerctx_args_t *args); int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args); +int glusterd_friend_remove (uuid_t uuid, char *hostname); int |