diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2012-11-06 12:18:24 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2012-11-23 23:14:50 -0800 | 
| commit | 7c23a94516c5dd21536c259f323a3cc113fdfa0d (patch) | |
| tree | 36c0efc8a3bbd4976ae56d8e5fff1cccbcef9c93 | |
| parent | 76a4afec6e03d15cb442e819f6fe7b94d6f9f487 (diff) | |
mgmt/glusterd: Implementation of server-side quorum
Feature-page:
http://www.gluster.org/community/documentation/index.php/Features/Server-quorum
Change-Id: I747b222519e71022462343d2c1bcd3626e1f9c86
BUG: 839595
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.org/3811
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
| -rw-r--r-- | cli/src/cli-rpc-ops.c | 84 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 6 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 170 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 492 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.c | 35 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.h | 16 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 118 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 5 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 429 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 31 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 41 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 27 | 
13 files changed, 1253 insertions, 204 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 44202037169..10ff6e7a90e 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -83,6 +83,42 @@ rpc_clnt_prog_t cli_pmap_prog = {          .progver    = GLUSTER_PMAP_VERSION,  }; +void +gf_cli_probe_strerror (gf1_cli_probe_rsp *rsp, char *msg, size_t len) +{ +        switch (rsp->op_errno) { +        case GF_PROBE_ANOTHER_CLUSTER: +                snprintf (msg, len, "%s is already part of another cluster", +                          rsp->hostname); +                break; +        case GF_PROBE_VOLUME_CONFLICT: +                snprintf (msg, len, "Atleast one volume on %s conflicts with " +                          "existing volumes in the cluster", rsp->hostname); +                break; +        case GF_PROBE_UNKNOWN_PEER: +                snprintf (msg, len, "%s responded with 'unknown peer' error, " +                          "this could happen if %s doesn't have localhost in " +                          "its peer database", rsp->hostname, rsp->hostname); +                break; +        case GF_PROBE_ADD_FAILED: +                snprintf (msg, len, "Failed to add peer information on %s" , +                          rsp->hostname); +                break; +        case GF_PROBE_SAME_UUID: +                snprintf (msg, len, "Peer uuid (host %s) is same as local uuid", +                          rsp->hostname); +                break; +        case GF_PROBE_QUORUM_NOT_MET: +                snprintf (msg, len, "Cluster quorum is not met. Changing " +                          "peers is not allowed in this state"); +                break; +        default: +                snprintf (msg, len, "Probe returned with unknown " +                          "errno %d", rsp->op_errno); +                break; +        } +} +  int  gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,                          int count, void *myframe) @@ -133,47 +169,7 @@ gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,                  if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {                          snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);                  } else { -                        switch (rsp.op_errno) { -                                case GF_PROBE_ANOTHER_CLUSTER: -                                        snprintf (msg, sizeof (msg), -                                                  "%s is already part of " -                                                  "another cluster", -                                                  rsp.hostname); -                                        break; -                                case GF_PROBE_VOLUME_CONFLICT: -                                        snprintf (msg, sizeof (msg), -                                                  "Atleast one volume on %s " -                                                  "conflicts with existing " -                                                  "volumes in the cluster", -                                                  rsp.hostname); -                                        break; -                                case GF_PROBE_UNKNOWN_PEER: -                                        snprintf (msg, sizeof (msg), -                                                  "%s responded with 'unknown " -                                                  "peer' error, this could " -                                                  "happen if %s doesn't have " -                                                  "localhost in its peer " -                                                  "database", rsp.hostname, -                                                  rsp.hostname); -                                        break; -                                case GF_PROBE_ADD_FAILED: -                                        snprintf (msg, sizeof (msg), -                                                  "Failed to add peer " -                                                  "information on %s" , -                                                  rsp.hostname); -                                        break; -                                case GF_PROBE_SAME_UUID: -                                        snprintf (msg, sizeof (msg), -                                                  "Peer uuid (host %s) is" -                                                  "same as local uuid", -                                                  rsp.hostname); -                                break; -                                default: -                                        snprintf (msg, sizeof (msg), -                                                  "Probe returned with unknown " -                                                  "errno %d", rsp.op_errno); -                                        break; -                        } +                        gf_cli_probe_strerror (&rsp, msg, sizeof (msg));                  }                  gf_log ("cli", GF_LOG_ERROR, "%s", msg);          } @@ -248,6 +244,12 @@ gf_cli_deprobe_cbk (struct rpc_req *req, struct iovec *iov,                                                    " down. Check with 'peer "                                                    "status'.");                                          break; +                                case GF_DEPROBE_QUORUM_NOT_MET: +                                        snprintf (msg, sizeof (msg), "Cluster " +                                                  "quorum is not met. Changing " +                                                  "peers is not allowed in this" +                                                  " state"); +                                        break;                                  default:                                          snprintf (msg, sizeof (msg),                                                    "Detach returned with unknown" diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 899844cd44c..0cb7d0a148e 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -96,7 +96,8 @@ enum gf_probe_resp {          GF_PROBE_VOLUME_CONFLICT,          GF_PROBE_SAME_UUID,          GF_PROBE_UNKNOWN_PEER, -        GF_PROBE_ADD_FAILED +        GF_PROBE_ADD_FAILED, +        GF_PROBE_QUORUM_NOT_MET  };  enum gf_deprobe_resp { @@ -104,7 +105,8 @@ enum gf_deprobe_resp {          GF_DEPROBE_LOCALHOST,          GF_DEPROBE_NOT_FRIEND,          GF_DEPROBE_BRICK_EXIST, -        GF_DEPROBE_FRIEND_DOWN +        GF_DEPROBE_FRIEND_DOWN, +        GF_DEPROBE_QUORUM_NOT_MET,  };  enum gf_cbk_procnum { diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 7cdad10e219..f33d2caeb56 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -280,6 +280,8 @@ _build_option_key (dict_t *d, char *k, data_t *v, void *tmp)          int                     ret               = -1;          pack = tmp; +        if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0) +                return 0;          snprintf (reconfig_key, 256, "volume%d.option.%s",                    pack->vol_count, k);          ret = dict_set_str (pack->dict, reconfig_key, v->data); @@ -303,12 +305,14 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,          glusterd_conf_t         *priv = NULL;          char                    *volume_id_str  = NULL;          struct args_pack        pack = {0,}; +        xlator_t                *this = NULL;          GF_ASSERT (volinfo);          GF_ASSERT (volumes); -        priv = THIS->private; +        this = THIS; +        priv = this->private;          GF_ASSERT (priv); @@ -388,6 +392,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,          pack.vol_count = count;          pack.opt_count = 0;          dict_foreach (dict, _build_option_key, (void *) &pack); +        dict_foreach (priv->opts, _build_option_key, &pack);          snprintf (key, 256, "volume%d.opt_count", pack.vol_count);          ret = dict_set_int32 (volumes, key, pack.opt_count); @@ -445,7 +450,6 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx)          GF_ASSERT (priv);          ret = glusterd_lock (MY_UUID); -          if (ret) {                  gf_log (this->name, GF_LOG_ERROR,                          "Unable to acquire local lock, ret: %d", ret); @@ -660,15 +664,30 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req)          gf1_cli_probe_req               cli_req = {0,};          glusterd_peerinfo_t             *peerinfo = NULL;          gf_boolean_t                    run_fsm = _gf_true; +        xlator_t                        *this = NULL; +          GF_ASSERT (req); +        this = THIS; -        if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_probe_req)) { +        if (!xdr_to_generic (req->msg[0], &cli_req, +                             (xdrproc_t)xdr_gf1_cli_probe_req)) {                  //failed to decode msg;                  gf_log ("", GF_LOG_ERROR, "xdr decoding error");                  req->rpc_err = GARBAGE_ARGS;                  goto out;          } +        if (glusterd_is_any_volume_in_server_quorum (this) && +            !does_gd_meet_server_quorum (this)) { +                glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET, +                                              NULL, +                                              cli_req.hostname, cli_req.port); +                gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, " +                        "rejecting operation"); +                ret = 0; +                goto out; +        } +          gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname,                      cli_req.port);          gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", @@ -684,8 +703,9 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req)                                           &peerinfo))) {                  if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) { -                        gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d" -                               " already a peer", cli_req.hostname, cli_req.port); +                        gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port " +                                "%d already a peer", cli_req.hostname, +                                cli_req.port);                          glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND,                                                        NULL, cli_req.hostname,                                                        cli_req.port); @@ -694,8 +714,8 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req)          }          ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port); -        gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port, -                    (ret) ? "FAILED" : "SUCCESS"); +        gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, +                    cli_req.port, (ret) ? "FAILED" : "SUCCESS");          if (ret == GLUSTERD_CONNECTION_AWAITED) {                  //fsm should be run after connection establishes @@ -717,7 +737,7 @@ int  glusterd_handle_cli_deprobe (rpcsvc_request_t *req)  {          int32_t                         ret = -1; -        gf1_cli_deprobe_req               cli_req = {0,}; +        gf1_cli_deprobe_req             cli_req = {0,};          uuid_t                          uuid = {0};          int                             op_errno = 0;          xlator_t                        *this = NULL; @@ -750,18 +770,29 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req)                  goto out;          } -        if (!uuid_is_null (uuid) && !(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { -                /* Check if peers are connected, except peer being detached*/ -                if (!glusterd_chk_peers_connected_befriended (uuid)) { -                        ret = -1; -                        op_errno = GF_DEPROBE_FRIEND_DOWN; -                        goto out; +        if (!(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { +                if (!uuid_is_null (uuid)) { +                        /* Check if peers are connected, except peer being detached*/ +                        if (!glusterd_chk_peers_connected_befriended (uuid)) { +                                ret = -1; +                                op_errno = GF_DEPROBE_FRIEND_DOWN; +                                goto out; +                        } +                        ret = glusterd_all_volume_cond_check ( +                                                 glusterd_friend_brick_belongs, +                                                 -1, &uuid); +                        if (ret) { +                                op_errno = GF_DEPROBE_BRICK_EXIST; +                                goto out; +                        }                  } -                ret = glusterd_all_volume_cond_check ( -                                                glusterd_friend_brick_belongs, -                                                -1, &uuid); -                if (ret) { -                        op_errno = GF_DEPROBE_BRICK_EXIST; + +                if (glusterd_is_any_volume_in_server_quorum (this) && +                    !does_gd_meet_server_quorum (this)) { +                        gf_log (this->name, GF_LOG_ERROR, "Quorum does not " +                                "meet, rejecting operation"); +                        ret = -1; +                        op_errno = GF_DEPROBE_QUORUM_NOT_MET;                          goto out;                  }          } @@ -2146,6 +2177,43 @@ out:  }  int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, +                            glusterd_peerctx_args_t *args) +{ +        dict_t                 *options = NULL; +        int                    ret = -1; +        glusterd_peerctx_t     *peerctx = NULL; + +        peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); +        if (!peerctx) +                goto out; + +        if (args) +                peerctx->args = *args; + +        peerctx->peerinfo = peerinfo; + +        ret = glusterd_transport_inet_options_build (&options, +                                                     peerinfo->hostname, +                                                     peerinfo->port); +        if (ret) +                goto out; + +        ret = glusterd_rpc_create (&peerinfo->rpc, options, +                                   glusterd_peer_rpc_notify, peerctx); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" +                        " peer %s", peerinfo->hostname); +                goto out; +        } +        peerctx = NULL; +        ret = 0; +out: +        GF_FREE (peerctx); +        return ret; +} + +int  glusterd_friend_add (const char *hoststr, int port,                       glusterd_friend_sm_state_t state,                       uuid_t *uuid, @@ -2156,8 +2224,6 @@ glusterd_friend_add (const char *hoststr, int port,          int                     ret = 0;          xlator_t               *this = NULL;          glusterd_conf_t        *conf = NULL; -        glusterd_peerctx_t     *peerctx = NULL; -        dict_t                 *options = NULL;          gf_boolean_t            handover = _gf_false;          this = THIS; @@ -2165,49 +2231,35 @@ glusterd_friend_add (const char *hoststr, int port,          GF_ASSERT (conf);          GF_ASSERT (hoststr); -        peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); -        if (!peerctx) { -                ret = -1; -                goto out; -        } - -        if (args) -                peerctx->args = *args; - -        ret = glusterd_peerinfo_new (friend, state, uuid, hoststr); -        if (ret) -                goto out; - -        peerctx->peerinfo = *friend; - -        ret = glusterd_transport_inet_options_build (&options, hoststr, port); +        ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port);          if (ret)                  goto out; -        if (!restore) { -                ret = glusterd_store_peerinfo (*friend); -                if (ret) { -                        gf_log (this->name, GF_LOG_ERROR, "Failed to store " -                                "peerinfo"); +        //restore needs to first create the list of peers, then create rpcs +        //to keep track of quorum in race-free manner. In restore for each peer +        //rpc-create calls rpc_notify when the friend-list is partially +        //constructed, leading to wrong quorum calculations. +        if (restore) +                goto done; -                        goto out; -                } -        } -        list_add_tail (&(*friend)->uuid_list, &conf->peers); -        ret = glusterd_rpc_create (&(*friend)->rpc, options, -                                   glusterd_peer_rpc_notify, -                                   peerctx); +        ret = glusterd_store_peerinfo (*friend);          if (ret) { -                gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" -                        " peer %s", (char*)hoststr); +                gf_log (this->name, GF_LOG_ERROR, "Failed to store " +                        "peerinfo"); +                  goto out;          } +        ret = glusterd_friend_rpc_create (this, *friend, args); +        if (ret) +                goto out; +done: +        list_add_tail (&(*friend)->uuid_list, &conf->peers);          handover = _gf_true;  out:          if (ret && !handover) { -                        (void) glusterd_friend_cleanup (*friend); -                        *friend = NULL; +                (void) glusterd_friend_cleanup (*friend); +                *friend = NULL;          }          gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret); @@ -2866,6 +2918,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,          glusterd_peerctx_t   *peerctx     = NULL;          uuid_t                owner       = {0,};          uuid_t               *peer_uuid   = NULL; +        gf_boolean_t         quorum_action = _gf_false;          peerctx = mydata;          if (!peerctx) @@ -2880,6 +2933,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,          {                  gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");                  peerinfo->connected = 1; +                peerinfo->quorum_action = _gf_true;                  ret = glusterd_peer_dump_version (this, rpc, peerctx);                  if (ret) @@ -2892,6 +2946,14 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,                  gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d",                          peerinfo->state.state); +                if ((peerinfo->quorum_contrib != QUORUM_DOWN) && +                    (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { +                        peerinfo->quorum_contrib = QUORUM_DOWN; +                        quorum_action = _gf_true; +                        peerinfo->quorum_action = _gf_false; +                } +                peerinfo->connected = 0; +                  /*                    local glusterd (thinks that it) is the owner of the cluster                    lock and 'fails' the operation on the first disconnect from @@ -2944,6 +3006,8 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,          glusterd_friend_sm ();          glusterd_op_sm (); +        if (quorum_action) +                glusterd_do_quorum_action ();          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 40f38f5bbba..d5686bbab28 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -46,6 +46,24 @@  #include <signal.h>  #include <sys/wait.h> +#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label)           \ +        do {                                                                   \ +                gf_boolean_t    _all = !strcmp ("all", volname);               \ +                gf_boolean_t    _ratio = !strcmp (key,                         \ +                                                  GLUSTERD_QUORUM_RATIO_KEY);  \ +                if (_all && !_ratio) {                                         \ +                        ret = -1;                                              \ +                        *op_errstr = gf_strdup ("Not a valid option for all "  \ +                                                "volumes");                    \ +                        goto label;                                            \ +                } else if (!_all && _ratio) {                                  \ +                        ret = -1;                                              \ +                        *op_errstr = gf_strdup ("Not a valid option for "      \ +                                                "single volume");              \ +                        goto label;                                            \ +                }                                                              \ +         } while (0) +  static struct list_head gd_op_sm_queue;  pthread_mutex_t       gd_op_sm_lock;  glusterd_op_info_t    opinfo = {{0},}; @@ -291,6 +309,24 @@ out:  }  static int +glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value, +                                  char **op_errstr) +{ +        int             ret = 0; +        char            *key = NULL; +        volume_option_t *opt = NULL; + +        if (!glusterd_is_quorum_option (fullkey)) +                goto out; +        key = strchr (fullkey, '.'); +        key++; +        opt = xlator_volume_option_get (this, key); +        ret = xlator_option_validate (this, key, value, opt, op_errstr); +out: +        return ret; +} + +static int  glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)  {          int                             ret                     = -1; @@ -315,6 +351,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)          uint32_t                        local_key_op_version    = 0;          gf_boolean_t                    origin_glusterd         = _gf_true;          gf_boolean_t                    check_op_version        = _gf_true; +        gf_boolean_t                    all_vol        = _gf_false;          GF_ASSERT (dict);          this = THIS; @@ -399,26 +436,30 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                  goto out;          } -        exists = glusterd_check_volume_exists (volname); -        if (!exists) { -                snprintf (errstr, sizeof (errstr), "Volume %s does not exist", -                          volname); -                gf_log (this->name, GF_LOG_ERROR, "%s", errstr); -                *op_errstr = gf_strdup (errstr); -                ret = -1; -                goto out; -        } +        if (strcasecmp (volname, "all") != 0) { +                exists = glusterd_check_volume_exists (volname); +                if (!exists) { +                        snprintf (errstr, sizeof (errstr), "Volume %s does " +                                  "not exist", volname); +                        gf_log (this->name, GF_LOG_ERROR, "%s", errstr); +                        *op_errstr = gf_strdup (errstr); +                        ret = -1; +                        goto out; +                } -        ret = glusterd_volinfo_find (volname, &volinfo); -        if (ret) { -                gf_log (this->name, GF_LOG_ERROR, -                        "Unable to allocate memory"); -                goto out; -        } +                ret = glusterd_volinfo_find (volname, &volinfo); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "Unable to allocate memory"); +                        goto out; +                } -        ret = glusterd_validate_volume_id (dict, volinfo); -        if (ret) -                goto out; +                ret = glusterd_validate_volume_id (dict, volinfo); +                if (ret) +                        goto out; +        } else { +                all_vol = _gf_true; +        }          local_new_op_version = priv->op_version; @@ -473,6 +514,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                          ret = -1;                          goto out;                  } +                  if (!exists) {                          gf_log (this->name, GF_LOG_ERROR,                                  "Option with name: %s " @@ -490,6 +532,11 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                  if (key_fixed)                          key = key_fixed; +                ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out); +                ret = glusterd_validate_quorum_options (this, key, value, +                                                        op_errstr); +                if (ret) +                        goto out;                  local_key_op_version = glusterd_get_op_version_for_key (key);                  if (local_key_op_version > local_new_op_version) @@ -539,9 +586,9 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                  }                  *op_errstr = NULL; -                if (!global_opt) +                if (!global_opt && !all_vol)                          ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr); -                else { +                else if (!all_vol) {                          voliter = NULL;                          list_for_each_entry (voliter, &priv->volumes, vol_list) {                                  ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr); @@ -626,23 +673,24 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)                  goto out;          } -        exists = glusterd_check_volume_exists (volname); +        if (strcasecmp (volname, "all") != 0) { +                exists = glusterd_check_volume_exists (volname); +                if (!exists) { +                        snprintf (msg, sizeof (msg), "Volume %s does not " +                                  "exist", volname); +                        gf_log ("", GF_LOG_ERROR, "%s", msg); +                        *op_errstr = gf_strdup (msg); +                        ret = -1; +                        goto out; +                } +                ret = glusterd_volinfo_find (volname, &volinfo); +                if (ret) +                        goto out; -        if (!exists) { -                snprintf (msg, sizeof (msg), "Volume %s does not " -                          "exist", volname); -                gf_log ("", GF_LOG_ERROR, "%s", msg); -                *op_errstr = gf_strdup (msg); -                ret = -1; -                goto out; +                ret = glusterd_validate_volume_id (dict, volinfo); +                if (ret) +                        goto out;          } -        ret = glusterd_volinfo_find (volname, &volinfo); -        if (ret) -                goto out; - -        ret = glusterd_validate_volume_id (dict, volinfo); -        if (ret) -                goto out;          ret = dict_get_str (dict, "key", &key);          if (ret) { @@ -666,6 +714,11 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)                          *op_errstr = gf_strdup (msg);                          ret = -1;                          goto out; +                } else if (exists > 0) { +                        if (key_fixed) +                                key = key_fixed; +                        ALL_VOLUME_OPTION_CHECK (volname, key, ret, +                                                 op_errstr, out);                  }          } @@ -993,6 +1046,22 @@ out:  }  static int +_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data) +{ +        int32_t        *is_force = 0; + +        GF_ASSERT (data); +        is_force = (int32_t*)data; + +        if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0) +                goto out; + +        _delete_reconfig_opt (this, key, value, data); +out: +        return 0; +} + +static int  glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,                          int32_t *is_force)  { @@ -1008,15 +1077,6 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,          if (!strncmp(key, "all", 3))                  dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force);          else { -                if (glusterd_check_option_exists (key, &key_fixed) != 1) { -                        gf_log ("glusterd", GF_LOG_ERROR, -                                "volinfo dict inconsistency: option %s not found", -                                key); -                        ret = -1; -                        goto out; -                } -                if (key_fixed) -                        key = key_fixed;                  value = dict_get (volinfo->dict, key);                  if (!value) {                          gf_log ("glusterd", GF_LOG_DEBUG, @@ -1053,6 +1113,91 @@ out:          return ret;  } +static int +glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict) +{ +        char            *key            = NULL; +        char            *key_fixed      = NULL; +        int             ret             = -1; +        int32_t         is_force        = 0; +        glusterd_conf_t *conf           = NULL; +        dict_t          *dup_opt        = NULL; +        gf_boolean_t    all             = _gf_false; +        char            *next_version   = NULL; +        gf_boolean_t    quorum_action   = _gf_false; + +        conf = this->private; +        ret = dict_get_str (dict, "key", &key); +        if (ret) +                goto out; + +        ret = dict_get_int32 (dict, "force", &is_force); +        if (ret) +                is_force = 0; + +        if (strcmp (key, "all")) { +                ret = glusterd_check_option_exists (key, &key_fixed); +                if (ret <= 0) { +                        gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", +                                key); +                        ret = -1; +                        goto out; +                } +        } else { +                all = _gf_true; +        } + +        if (key_fixed) +                key = key_fixed; + +        ret = -1; +        dup_opt = dict_new (); +        if (!dup_opt) +                goto out; +        if (!all) { +                dict_copy (conf->opts, dup_opt); +                dict_del (dup_opt, key); +        } +        ret = glusterd_get_next_global_opt_version_str (conf->opts, +                                                        &next_version); +        if (ret) +                goto out; + +        ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); +        if (ret) +                goto out; + +        ret = glusterd_store_options (this, dup_opt); +        if (ret) +                goto out; + +        if (glusterd_is_quorum_changed (conf->opts, key, NULL)) +                quorum_action = _gf_true; + +        ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, +                               next_version); +        if (ret) +                goto out; +        else +                next_version = NULL; + +        if (!all) { +                dict_del (conf->opts, key); +        } else { +                dict_foreach (conf->opts, _delete_reconfig_global_opt, +                              &is_force); +        } +out: +        GF_FREE (key_fixed); +        if (dup_opt) +                dict_unref (dup_opt); + +        gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); +        if (quorum_action) +                glusterd_do_quorum_action (); +        GF_FREE (next_version); +        return ret; +}  static int  glusterd_op_reset_volume (dict_t *dict, char **op_errstr) @@ -1061,14 +1206,23 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr)          int                     ret         = -1;          char                    *volname    = NULL;          char                    *key        = NULL; +        char                    *key_fixed  = NULL;          int32_t                 is_force    = 0; +        gf_boolean_t            quorum_action = _gf_false; +        xlator_t                *this         = NULL; +        this = THIS;          ret = dict_get_str (dict, "volname", &volname);          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to get volume name " );                  goto out;          } +        if (strcasecmp (volname, "all") == 0) { +                ret = glusterd_op_reset_all_volume_options (this, dict); +                goto out; +        } +          ret = dict_get_int32 (dict, "force", &is_force);          if (ret)                  is_force = 0; @@ -1085,6 +1239,20 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr)                  goto out;          } +        if (strcmp (key, "all") && +            glusterd_check_option_exists (key, &key_fixed) != 1) { +                gf_log ("glusterd", GF_LOG_ERROR, +                        "volinfo dict inconsistency: option %s not found", +                        key); +                ret = -1; +                goto out; +        } +        if (key_fixed) +                key = key_fixed; + +        if (glusterd_is_quorum_changed (volinfo->dict, key, NULL)) +                quorum_action = _gf_true; +          ret = glusterd_options_reset (volinfo, key, &is_force);          if (is_force == -1) {                  ret = -1; @@ -1093,12 +1261,14 @@ glusterd_op_reset_volume (dict_t *dict, char **op_errstr)          }  out: +        GF_FREE (key_fixed); +        if (quorum_action) +                glusterd_do_quorum_action (); +          gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret);          return ret; -  } -  int  glusterd_stop_bricks (glusterd_volinfo_t *volinfo)  { @@ -1128,6 +1298,91 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)  }  static int +glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict) +{ +        char            *key            = NULL; +        char            *key_fixed      = NULL; +        char            *value          = NULL; +        char            *dup_value      = NULL; +        int             ret             = -1; +        glusterd_conf_t *conf           = NULL; +        dict_t          *dup_opt        = NULL; +        char            *next_version   = NULL; +        gf_boolean_t    quorum_action   = _gf_false; + +        conf = this->private; +        ret = dict_get_str (dict, "key1", &key); +        if (ret) +                goto out; + +        ret = dict_get_str (dict, "value1", &value); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "invalid key,value pair in 'volume set'"); +                goto out; +        } +        ret = glusterd_check_option_exists (key, &key_fixed); +        if (ret <= 0) { +                gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", key); +                ret = -1; +                goto out; +        } + +        if (key_fixed) +                key = key_fixed; + +        ret = -1; +        dup_opt = dict_new (); +        if (!dup_opt) +                goto out; +        dict_copy (conf->opts, dup_opt); +        ret = dict_set_str (dup_opt, key, value); +        if (ret) +                goto out; + +        ret = glusterd_get_next_global_opt_version_str (conf->opts, +                                                        &next_version); +        if (ret) +                goto out; + +        ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); +        if (ret) +                goto out; + +        dup_value = gf_strdup (value); +        if (!dup_value) +                goto out; + +        ret = glusterd_store_options (this, dup_opt); +        if (ret) +                goto out; + +        if (glusterd_is_quorum_changed (conf->opts, key, value)) +                quorum_action = _gf_true; + +        ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, +                               next_version); +        if (ret) +                goto out; +        else +                next_version = NULL; + +        ret = dict_set_dynstr (conf->opts, key, dup_value); +        if (ret) +                goto out; +out: +        GF_FREE (key_fixed); +        if (dup_opt) +                dict_unref (dup_opt); + +        gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); +        if (quorum_action) +                glusterd_do_quorum_action (); +        GF_FREE (next_version); +        return ret; +} + +static int  glusterd_op_set_volume (dict_t *dict)  {          int                                      ret = 0; @@ -1146,7 +1401,7 @@ glusterd_op_set_volume (dict_t *dict)          int32_t                                  dict_count = 0;          gf_boolean_t                             check_op_version = _gf_false;          uint32_t                                 new_op_version = 0; - +        gf_boolean_t                            quorum_action  = _gf_false;          this = THIS;          GF_ASSERT (this); @@ -1176,6 +1431,11 @@ glusterd_op_set_volume (dict_t *dict)                  goto out;          } +        if (strcasecmp (volname, "all") == 0) { +                ret = glusterd_op_set_all_volume_options (this, dict); +                goto out; +        } +          ret = glusterd_volinfo_find (volname, &volinfo);          if (ret) {                  gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); @@ -1242,7 +1502,6 @@ glusterd_op_set_volume (dict_t *dict)                                  key_fixed = NULL;                                  goto out;                          } -                  }                  if (glusterd_check_globaloption (key)) @@ -1261,6 +1520,9 @@ glusterd_op_set_volume (dict_t *dict)                  if (key_fixed)                          key = key_fixed; +                if (glusterd_is_quorum_changed (volinfo->dict, key, value)) +                        quorum_action = _gf_true; +                  if (global_opt) {                         list_for_each_entry (voliter, &priv->volumes, vol_list) {                                 value = gf_strdup (value); @@ -1350,6 +1612,8 @@ glusterd_op_set_volume (dict_t *dict)   out:          GF_FREE (key_fixed);          gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); +        if (quorum_action) +                glusterd_do_quorum_action ();          return ret;  } @@ -1875,20 +2139,26 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx)  static int  glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx)  { -        int ret = 0; +        int                      ret = 0;          glusterd_op_lock_ctx_t   *lock_ctx = NULL; +        xlator_t                 *this = NULL; +        glusterd_conf_t          *priv = NULL;          GF_ASSERT (event);          GF_ASSERT (ctx); +        this = THIS; +        priv = this->private;          lock_ctx = (glusterd_op_lock_ctx_t *)ctx;          ret = glusterd_unlock (lock_ctx->uuid); -        gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret); +        gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret);          glusterd_op_unlock_send_resp (lock_ctx->req, ret); +        if (priv->pending_quorum_action) +                glusterd_do_quorum_action ();          return ret;  } @@ -2042,7 +2312,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                                          goto out;                                  }                                  if (strcmp (volname, "help") && -                                    strcmp (volname, "help-xml")) { +                                    strcmp (volname, "help-xml") && +                                    strcasecmp (volname, "all")) {                                          ret = glusterd_dict_set_volid                                                   (dict, volname, op_errstr);                                          if (ret) @@ -2095,10 +2366,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                                          goto out;                                  } -                                ret = glusterd_dict_set_volid (dict, volname, -                                                               op_errstr); -                                if (ret) -                                        goto out; +                                if (strcasecmp (volname, "all")) { +                                        ret = glusterd_dict_set_volid (dict, +                                                                       volname, +                                                                     op_errstr); +                                        if (ret) +                                                goto out; +                                }                                  dict_copy (dict, req_dict);                          }                          break; @@ -2114,6 +2388,105 @@ out:          return ret;  } +gf_boolean_t +glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict) +{ +        char            *key = NULL; +        char            *volname = NULL; +        int             ret = 0; + +        if (op == GD_OP_STATUS_VOLUME) +                return _gf_true; + +        if ((op == GD_OP_SET_VOLUME)) { +                //check for set volume help +                ret = dict_get_str (dict, "volname", &volname); +                if (volname && +                    ((strcmp (volname, "help") == 0) || +                     (strcmp (volname, "help-xml") == 0))) { +                        ret = dict_get_str (dict, "key1", &key); +                        if (ret < 0) +                                return _gf_true; +                } +        } + +        return _gf_false; +} + +gf_boolean_t +glusterd_is_op_quorum_validation_required (xlator_t *this, glusterd_op_t op, +                                           dict_t *dict) +{ +        gf_boolean_t    required = _gf_true; +        char            *key = NULL; +        char            *key_fixed = NULL; +        int             ret = -1; + +        if (glusterd_is_get_op (this, op, dict)) { +                required = _gf_false; +                goto out; +        } +        if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME)) +                goto out; +        if (op == GD_OP_SET_VOLUME) +                ret = dict_get_str (dict, "key1", &key); +        else if (op == GD_OP_RESET_VOLUME) +                ret = dict_get_str (dict, "key", &key); +        if (ret) +                goto out; +        ret = glusterd_check_option_exists (key, &key_fixed); +        if (ret <= 0) +                goto out; +        if (key_fixed) +                key = key_fixed; +        if (glusterd_is_quorum_option (key)) +                required = _gf_false; +out: +        GF_FREE (key_fixed); +        return required; +} + +static int +glusterd_op_validate_quorum (xlator_t *this, glusterd_op_t op, +                             dict_t *dict, char **op_errstr) +{ +        int                     ret = 0; +        char                    *volname = NULL; +        glusterd_volinfo_t      *volinfo = NULL; +        char                    *errstr = NULL; + + +        errstr = "Quorum not met. Volume operation not allowed."; +        if (!glusterd_is_op_quorum_validation_required (this, op, dict)) +                goto out; + +        ret = dict_get_str (dict, "volname", &volname); +        if (ret) { +                ret = 0; +                goto out; +        } + +        ret = glusterd_volinfo_find (volname, &volinfo); +        if (ret) { +                ret = 0; +                goto out; +        } + +        if (does_gd_meet_server_quorum (this)) { +                ret = 0; +                goto out; +        } + +        if (glusterd_is_volume_in_server_quorum (volinfo)) { +                ret = -1; +                *op_errstr = gf_strdup (errstr); +                goto out; +        } +        ret = 0; +out: +        return ret; +} +  static int  glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)  { @@ -2141,6 +2514,13 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)                  goto out;          } +        ret = glusterd_op_validate_quorum (this, op, dict, &op_errstr); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, op_errstr); +                opinfo.op_errstr = op_errstr; +                goto out; +        } +          /* rsp_dict NULL from source */          ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL);          if (ret) { @@ -2817,6 +3197,8 @@ glusterd_op_txn_complete ()                  GF_FREE (op_errstr); +        if (priv->pending_quorum_action) +                glusterd_do_quorum_action ();          gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index a60eb53e554..5a38fdfecb5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -959,6 +959,27 @@ glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event)          }  } +gf_boolean_t +gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state, +                            glusterd_friend_sm_event_type_t event_type, +                            glusterd_peerinfo_t *peerinfo) +{ +        gf_boolean_t    affects = _gf_false; + +        //When glusterd comes up with friends in BEFRIENDED state in store, +        //wait until compare-data happens. +        if ((old_state == GD_FRIEND_STATE_BEFRIENDED) && +            (event_type != GD_FRIEND_EVENT_RCVD_ACC) && +            (event_type != GD_FRIEND_EVENT_LOCAL_ACC)) +                goto out; +        if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED) +            && peerinfo->connected) { +                affects = _gf_true; +        } +out: +        return affects; +} +  int  glusterd_friend_sm ()  { @@ -970,6 +991,8 @@ glusterd_friend_sm ()          glusterd_peerinfo_t             *peerinfo   = NULL;          glusterd_friend_sm_event_type_t  event_type = 0;          gf_boolean_t                     is_await_conn = _gf_false; +        gf_boolean_t                     quorum_action = _gf_false; +        glusterd_friend_sm_state_t       old_state = GD_FRIEND_STATE_DEFAULT;          while (!list_empty (&gd_friend_sm_queue)) {                  list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) { @@ -989,6 +1012,7 @@ glusterd_friend_sm ()                                  glusterd_friend_sm_event_name_get (event_type)); +                        old_state = peerinfo->state.state;                          state = glusterd_friend_state_table[peerinfo->state.state];                          GF_ASSERT (state); @@ -1029,6 +1053,15 @@ glusterd_friend_sm ()                                  goto out;                          } +                        if (gd_does_peer_affect_quorum (old_state, event_type, +                                                        peerinfo)) { +                                peerinfo->quorum_contrib = QUORUM_UP; +                                if (peerinfo->quorum_action) { +                                        peerinfo->quorum_action = _gf_false; +                                        quorum_action = _gf_true; +                                } +                        } +                          ret = glusterd_store_peerinfo (peerinfo);                          glusterd_destroy_friend_event_context (event); @@ -1042,6 +1075,8 @@ glusterd_friend_sm ()          ret = 0;  out: +        if (quorum_action) +                glusterd_do_quorum_action ();          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index 011578da3d0..0af45deb651 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -36,6 +36,20 @@ struct glusterd_store_handle_ {  typedef struct glusterd_store_handle_  glusterd_store_handle_t; +typedef enum gd_quorum_contribution_ { +        QUORUM_NONE, +        QUORUM_WAITING, +        QUORUM_DOWN, +        QUORUM_UP +} gd_quorum_contrib_t; + +typedef enum gd_quorum_status_ { +        QUORUM_UNKNOWN, +        QUORUM_NOT_APPLICABLE, +        QUORUM_MEETS, +        QUORUM_DOES_NOT_MEET +} gd_quorum_status_t; +  typedef enum glusterd_friend_sm_state_ {          GD_FRIEND_STATE_DEFAULT = 0,          GD_FRIEND_STATE_REQ_SENT, @@ -91,6 +105,8 @@ struct glusterd_peerinfo_ {          int                             connected;          glusterd_store_handle_t         *shandle;          glusterd_sm_tr_log_t            sm_log; +        gf_boolean_t                    quorum_action; +        gd_quorum_contrib_t             quorum_contrib;  };  typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 6fa490e47d5..471a24e6622 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1908,10 +1908,13 @@ glusterd_store_iter_destroy (glusterd_store_iter_t *iter)  {          int32_t         ret = -1; -        GF_ASSERT (iter); -        GF_ASSERT (iter->fd > 0); +        if (!iter) +                return 0; -        ret = fclose (iter->file); +        if (iter->file) +                ret = fclose (iter->file); +        else +                ret = 0;          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, " @@ -2203,7 +2206,6 @@ glusterd_store_retrieve_node_state (char   *volname)          ret = glusterd_store_handle_retrieve (path,                                                &volinfo->node_state_shandle); -          if (ret)                  goto out; @@ -2215,6 +2217,7 @@ glusterd_store_retrieve_node_state (char   *volname)          ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno);          if (ret)                  goto out; +          if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,                         strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {                   volinfo->defrag_cmd = atoi (value); @@ -2440,6 +2443,102 @@ out:          return ret;  } +inline void +glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len) +{ +        snprintf (path, len, "%s/options", conf->workdir); +} + +int +_store_global_opts (dict_t *this, char *key, data_t *value, void *data) +{ +        glusterd_store_handle_t *shandle = data; + +        glusterd_store_save_value (shandle->fd, key, (char*)value->data); +        return 0; +} + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts) +{ +        glusterd_store_handle_t         *shandle = NULL; +        glusterd_conf_t                 *conf = NULL; +        char                            path[PATH_MAX] = {0}; +        int                             fd = -1; +        int32_t                         ret = -1; + +        conf = this->private; +        glusterd_store_set_options_path (conf, path, sizeof (path)); + +        ret = glusterd_store_handle_new (path, &shandle); +        if (ret) +                goto out; + +        fd = glusterd_store_mkstemp (shandle); +        if (fd <= 0) { +                ret = -1; +                goto out; +        } + +        shandle->fd = fd; +        dict_foreach (opts, _store_global_opts, shandle); +        shandle->fd = 0; +        ret = glusterd_store_rename_tmppath (shandle); +        if (ret) +                goto out; +out: +        glusterd_store_handle_destroy (shandle); +        if (fd >=0 ) +                close (fd); +        return ret; +} + +int32_t +glusterd_store_retrieve_options (xlator_t *this) +{ +        char                            path[PATH_MAX] = {0}; +        glusterd_conf_t                 *conf = NULL; +        glusterd_store_handle_t         *shandle = NULL; +        glusterd_store_iter_t           *iter = NULL; +        char                            *key = NULL; +        char                            *value = NULL; +        glusterd_store_op_errno_t       op_errno = 0; +        int                             ret = -1; + +        conf = this->private; +        glusterd_store_set_options_path (conf, path, sizeof (path)); + +        ret = glusterd_store_handle_retrieve (path, &shandle); +        if (ret) +                goto out; + +        ret = glusterd_store_iter_new (shandle, &iter); +        if (ret) +                goto out; + +        ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); +        while (!ret) { +                ret = dict_set_dynstr (conf->opts, key, value); +                if (ret) { +                        GF_FREE (key); +                        GF_FREE (value); +                        goto out; +                } +                GF_FREE (key); +                key = NULL; +                value = NULL; + +                ret = glusterd_store_iter_get_next (iter, &key, &value, +                                                    &op_errno); +        } +        if (op_errno != GD_STORE_EOF) +                goto out; +        ret = 0; +out: +        glusterd_store_iter_destroy (iter); +        glusterd_store_handle_destroy (shandle); +        return ret; +}  int32_t  glusterd_store_retrieve_volumes (xlator_t  *this) @@ -2840,9 +2939,8 @@ glusterd_store_retrieve_peers (xlator_t *this)                  (void) glusterd_store_iter_destroy (iter); -                args.mode = GD_MODE_SWITCH_ON;                  ret = glusterd_friend_add (hostname, 0, state, &uuid, -                                           &peerinfo, 1, &args); +                                           &peerinfo, 1, NULL);                  GF_FREE (hostname);                  if (ret) @@ -2852,6 +2950,13 @@ glusterd_store_retrieve_peers (xlator_t *this)                  glusterd_for_each_entry (entry, dir);          } +        args.mode = GD_MODE_ON; +        list_for_each_entry (peerinfo, &priv->peers, uuid_list) { +                ret = glusterd_friend_rpc_create (this, peerinfo, &args); +                if (ret) +                        goto out; +        } +  out:          if (dir)                  closedir (dir); @@ -2905,7 +3010,6 @@ glusterd_restore ()                  goto out;          } -          ret = glusterd_store_retrieve_volumes (this);          if (ret)                  goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 1e9e599a429..d8c1567a661 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -138,4 +138,9 @@ glusterd_retrieve_op_version (xlator_t *this, int *op_version);  int  glusterd_store_global_info (xlator_t *this); +int32_t +glusterd_store_retrieve_options (xlator_t *this); + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 726aa3a1bb3..6c2870238bd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -68,6 +68,8 @@  #define NLMV4_VERSION       4  #define NLMV1_VERSION       1 +#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X)) +  char    *glusterd_sock_dir = "/var/run";  static glusterd_lock_t lock; @@ -939,7 +941,10 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)  {          GF_ASSERT (peerinfo);          glusterd_peerctx_t      *peerctx = NULL; +        gf_boolean_t            quorum_action = _gf_false; +        if (peerinfo->quorum_contrib != QUORUM_NONE) +                quorum_action = _gf_true;          if (peerinfo->rpc) {                  /* cleanup the saved-frames before last unref */                  rpc_clnt_connection_cleanup (&peerinfo->rpc->conn); @@ -955,6 +960,8 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)          }          glusterd_peer_destroy (peerinfo); +        if (quorum_action) +                glusterd_do_quorum_action ();          return 0;  } @@ -982,12 +989,10 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo)                  }          } -          gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);          return ret;  } -  int32_t  glusterd_service_stop (const char *service, char *pidfile, int sig,                         gf_boolean_t force_kill) @@ -1655,25 +1660,25 @@ out:  }  int -_add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) +_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)  { -        glusterd_voldict_ctx_t   *ctx = NULL; +        glusterd_dict_ctx_t     *ctx = NULL;          char                    optkey[512] = {0,};          int                     ret = -1;          ctx = data; -        snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, +        snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,                    ctx->key_name, ctx->opt_count);          ret = dict_set_str (ctx->dict, optkey, key);          if (ret)                  gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", -                        ctx->key_name, ctx->count, key); -        snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, +                        ctx->key_name, ctx->opt_count, key); +        snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,                    ctx->val_name, ctx->opt_count);          ret = dict_set_str (ctx->dict, optkey, value->data);          if (ret)                  gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", -                        ctx->val_name, ctx->count, value->data); +                        ctx->val_name, ctx->opt_count, value->data);          ctx->opt_count++;          return ret; @@ -1711,6 +1716,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,                               dict_t  *dict, int32_t count)  {          int32_t                 ret             = -1; +        char                    prefix[512]     = {0,};          char                    key[512]        = {0,};          glusterd_brickinfo_t    *brickinfo      = NULL;          int32_t                 i               = 1; @@ -1718,7 +1724,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,          char                    *src_brick      = NULL;          char                    *dst_brick      = NULL;          char                    *str            = NULL; -        glusterd_voldict_ctx_t   ctx            = {0}; +        glusterd_dict_ctx_t     ctx            = {0};          GF_ASSERT (dict);          GF_ASSERT (volinfo); @@ -1850,14 +1856,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,                          goto out;          } +        snprintf (prefix, sizeof (prefix), "volume%d", count);          ctx.dict = dict; -        ctx.count = count; +        ctx.prefix = prefix;          ctx.opt_count = 1;          ctx.key_name = "key";          ctx.val_name = "value";          GF_ASSERT (volinfo->dict); -        dict_foreach (volinfo->dict, _add_volinfo_dict_to_prdict, &ctx); +        dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx);          ctx.opt_count--;          memset (key, 0, sizeof (key));          snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -1866,13 +1873,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,                  goto out;          ctx.dict = dict; -        ctx.count = count; +        ctx.prefix = prefix;          ctx.opt_count = 1;          ctx.key_name = "slave-num";          ctx.val_name = "slave-val";          GF_ASSERT (volinfo->gsync_slaves); -        dict_foreach (volinfo->gsync_slaves, _add_volinfo_dict_to_prdict, &ctx); +        dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx);          ctx.opt_count--;          memset (key, 0, sizeof (key)); @@ -1914,6 +1921,7 @@ glusterd_build_volume_dict (dict_t **vols)          glusterd_conf_t         *priv = NULL;          glusterd_volinfo_t      *volinfo = NULL;          int32_t                 count = 0; +        glusterd_dict_ctx_t     ctx            = {0};          priv = THIS->private; @@ -1934,6 +1942,17 @@ glusterd_build_volume_dict (dict_t **vols)          if (ret)                  goto out; +        ctx.dict = dict; +        ctx.prefix = "global"; +        ctx.opt_count = 1; +        ctx.key_name = "key"; +        ctx.val_name = "val"; +        dict_foreach (priv->opts, _add_dict_to_prdict, &ctx); +        ctx.opt_count--; +        ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count); +        if (ret) +                goto out; +          *vols = dict;  out:          gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -2015,8 +2034,8 @@ out:  }  static int32_t -import_prdict_volinfo_dict (dict_t *vols, dict_t  *dst_dict, char *key_prefix, -                            char *value_prefix, int opt_count, int count) +import_prdict_dict (dict_t *vols, dict_t  *dst_dict, char *key_prefix, +                    char *value_prefix, int opt_count, char *prefix)  {          char                    key[512] = {0,};          int32_t                 ret = 0; @@ -2028,8 +2047,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t  *dst_dict, char *key_prefix,          while (i <= opt_count) {                  memset (key, 0, sizeof (key)); -                snprintf (key, sizeof (key), "volume%d.%s%d", -                          count, key_prefix, i); +                snprintf (key, sizeof (key), "%s.%s%d", +                          prefix, key_prefix, i);                  ret = dict_get_str (vols, key, &opt_key);                  if (ret) {                          snprintf (msg, sizeof (msg), "Volume dict key not " @@ -2038,8 +2057,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t  *dst_dict, char *key_prefix,                  }                  memset (key, 0, sizeof (key)); -                snprintf (key, sizeof (key), "volume%d.%s%d", -                          count, value_prefix, i); +                snprintf (key, sizeof (key), "%s.%s%d", +                          prefix, value_prefix, i);                  ret = dict_get_str (vols, key, &opt_val);                  if (ret) {                          snprintf (msg, sizeof (msg), "Volume dict value not " @@ -2068,6 +2087,250 @@ out:  } +gf_boolean_t +glusterd_is_quorum_option (char *option) +{ +        gf_boolean_t    res = _gf_false; +        int             i = 0; +        char            *keys[] = {GLUSTERD_QUORUM_TYPE_KEY, +                                   GLUSTERD_QUORUM_RATIO_KEY, NULL}; + +        for (i = 0; keys[i]; i++) { +                if (strcmp (option, keys[i]) == 0) { +                        res = _gf_true; +                        break; +                } +        } +        return res; +} + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value) +{ +        int             ret = 0; +        gf_boolean_t    reconfigured = _gf_false; +        gf_boolean_t    all = _gf_false; +        char            *oldquorum = NULL; +        char            *newquorum = NULL; +        char            *oldratio = NULL; +        char            *newratio = NULL; + +        if ((strcmp ("all", option) != 0) && +            !glusterd_is_quorum_option (option)) +                goto out; + +        if (strcmp ("all", option) == 0) +                all = _gf_true; + +        if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) { +                newquorum = value; +                ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY, +                                    &oldquorum); +        } + +        if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) { +                newratio = value; +                ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY, +                                    &oldratio); +        } + +        reconfigured = _gf_true; + +        if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0)) +                reconfigured = _gf_false; +        if (oldratio && newratio && (strcmp (oldratio, newratio) == 0)) +                reconfigured = _gf_false; + +        if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) && +            (newquorum == NULL)) +                reconfigured = _gf_false; +out: +        return reconfigured; +} + +static inline gf_boolean_t +_is_contributing_to_quorum (gd_quorum_contrib_t contrib) +{ +        if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN)) +                return _gf_true; +        return _gf_false; +} + +static inline gf_boolean_t +_does_quorum_meet (int active_count, int quorum_count) +{ +        return (active_count >= quorum_count); +} + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, +                                    int *quorum_count) +{ +        glusterd_peerinfo_t *peerinfo      = NULL; +        glusterd_conf_t     *conf          = NULL; +        int                 ret            = -1; +        int                 inquorum_count = 0; +        char                *val           = NULL; +        double              quorum_percentage = 0.0; +        gf_boolean_t        ratio          = _gf_false; +        int                 count          = 0; + +        conf = this->private; +        //Start with counting self +        inquorum_count = 1; +        if (active_count) +                *active_count = 1; +        list_for_each_entry (peerinfo, &conf->peers, uuid_list) { +                if (peerinfo->quorum_contrib == QUORUM_WAITING) +                        goto out; + +                if (_is_contributing_to_quorum (peerinfo->quorum_contrib)) +                        inquorum_count = inquorum_count + 1; + +                if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) +                        *active_count = *active_count + 1; +        } + +        ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); +        if (ret == 0) { +                ratio = _gf_true; +                ret = gf_string2percent (val, &quorum_percentage); +                if (!ret) +                        ratio = _gf_true; +        } +        if (ratio) +                count = CEILING_POS (inquorum_count * +                                     quorum_percentage / 100.0); +        else +                count = (inquorum_count * 50 / 100) + 1; + +        *quorum_count = count; +        ret = 0; +out: +        return ret; +} + +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo) +{ +        gf_boolean_t    res = _gf_false; +        char            *quorum_type = NULL; +        int             ret = 0; + +        ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, +                            &quorum_type); +        if (ret) +                goto out; + +        if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0) +                res = _gf_true; +out: +        return res; +} + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this) +{ +        glusterd_conf_t         *conf = NULL; +        glusterd_volinfo_t      *volinfo = NULL; + +        conf = this->private; +        list_for_each_entry (volinfo, &conf->volumes, vol_list) { +                if (glusterd_is_volume_in_server_quorum (volinfo)) { +                        return _gf_true; +                } +        } +        return _gf_false; +} + +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this) +{ +        int                     quorum_count = 0; +        int                     active_count   = 0; +        gf_boolean_t            in = _gf_false; +        glusterd_conf_t         *conf = NULL; +        int                     ret = -1; + +        conf = this->private; +        ret = glusterd_get_quorum_cluster_counts (this, &active_count, +                                                  &quorum_count); +        if (ret) +                goto out; + +        if (!_does_quorum_meet (active_count, quorum_count)) { +                goto out; +        } + +        in = _gf_true; +out: +        return in; +} + +void +glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, +                                  gf_boolean_t meets_quorum) +{ +        glusterd_brickinfo_t    *brickinfo = NULL; +        glusterd_conf_t         *conf = NULL; + +        conf = this->private; +        if (volinfo->status != GLUSTERD_STATUS_STARTED) +                goto out; + +        if (!glusterd_is_volume_in_server_quorum (volinfo)) +                meets_quorum = _gf_true; + +        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                if (!glusterd_is_local_brick (this, volinfo, brickinfo)) +                        continue; +                if (meets_quorum) +                        glusterd_brick_start (volinfo, brickinfo, _gf_false); +                else +                        glusterd_brick_stop (volinfo, brickinfo, _gf_false); +        } +out: +        return; +} + +int +glusterd_do_quorum_action () +{ +        xlator_t            *this          = NULL; +        glusterd_conf_t     *conf          = NULL; +        glusterd_volinfo_t  *volinfo       = NULL; +        int                 ret            = 0; +        int                 active_count   = 0; +        int                 quorum_count   = 0; +        gf_boolean_t        meets          = _gf_false; + +        this = THIS; +        conf = this->private; + +        conf->pending_quorum_action = _gf_true; +        ret = glusterd_lock (conf->uuid); +        if (ret) +                goto out; + +        { +                ret = glusterd_get_quorum_cluster_counts (this, &active_count, +                                                          &quorum_count); +                if (ret) +                        goto unlock; + +                if (_does_quorum_meet (active_count, quorum_count)) +                        meets = _gf_true; +                list_for_each_entry (volinfo, &conf->volumes, vol_list) { +                        glusterd_do_volume_quorum_action (this, volinfo, meets); +                } +        } +unlock: +        (void)glusterd_unlock (conf->uuid); +        conf->pending_quorum_action = _gf_false; +out: +        return ret; +} +  int32_t  glusterd_import_friend_volume_opts (dict_t *vols, int count,                                      glusterd_volinfo_t *volinfo) @@ -2076,6 +2339,7 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count,          int32_t                 ret = -1;          int                     opt_count = 0;          char                    msg[2048] = {0}; +        char                    volume_prefix[1024] = {0};          memset (key, 0, sizeof (key));          snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -2086,8 +2350,9 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count,                  goto out;          } -        ret = import_prdict_volinfo_dict (vols, volinfo->dict, "key", -                                          "value", opt_count, count); +        snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count); +        ret = import_prdict_dict (vols, volinfo->dict, "key", "value", +                                  opt_count, volume_prefix);          if (ret) {                  snprintf (msg, sizeof (msg), "Unable to import options dict "                            "specified for %s", volinfo->volname); @@ -2103,9 +2368,8 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count,                  goto out;          } -        ret = import_prdict_volinfo_dict (vols, volinfo->gsync_slaves, -                                          "slave-num", "slave-val", opt_count, -                                          count); +        ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num", +                                  "slave-val", opt_count, volume_prefix);          if (ret) {                  snprintf (msg, sizeof (msg), "Unable to import gsync sessions "                            "specified for %s", volinfo->volname); @@ -2602,6 +2866,95 @@ out:          return ret;  } +int +glusterd_get_global_opt_version (dict_t *opts, uint32_t *version) +{ +        int     ret = -1; +        char    *version_str = NULL; + +        ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str); +        if (ret) +                goto out; + +        ret = gf_string2uint (version_str, version); +        if (ret) +                goto out; +        ret = 0; +out: +        return ret; +} + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str) +{ +        int             ret = -1; +        char            version_string[64] = {0}; +        uint32_t        version = 0; + +        ret = glusterd_get_global_opt_version (opts, &version); +        if (ret) +                goto out; +        version++; +        snprintf (version_string, sizeof (version_string), "%"PRIu32, version); +        *version_str = gf_strdup (version_string); +        if (*version_str) +                ret = 0; +out: +        return ret; +} + +int32_t +glusterd_import_global_opts (dict_t *friend_data) +{ +        xlator_t        *this = NULL; +        glusterd_conf_t *conf = NULL; +        int             ret = -1; +        dict_t          *import_options = NULL; +        int             count = 0; +        uint32_t        local_version = 0; +        uint32_t        remote_version = 0; + +        this = THIS; +        conf = this->private; + +        ret = dict_get_int32 (friend_data, "global-opt-count", &count); +        if (ret) { +                //old version peer +                ret = 0; +                goto out; +        } + +        import_options = dict_new (); +        if (!import_options) +                goto out; +        ret = import_prdict_dict (friend_data, import_options, "key", "val", +                                  count, "global"); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to import" +                        " global options"); +                goto out; +        } + +        ret = glusterd_get_global_opt_version (conf->opts, &local_version); +        if (ret) +                goto out; +        ret = glusterd_get_global_opt_version (import_options, &remote_version); +        if (ret) +                goto out; +        if (remote_version > local_version) { +                ret = glusterd_store_options (this, import_options); +                if (ret) +                        goto out; +                dict_unref (conf->opts); +                conf->opts = dict_ref (import_options); +        } +        ret = 0; +out: +        if (import_options) +                dict_unref (import_options); +        return ret; +} +  int32_t  glusterd_compare_friend_data (dict_t  *vols, int32_t *status)  { @@ -2639,6 +2992,9 @@ glusterd_compare_friend_data (dict_t  *vols, int32_t *status)                          stale_nfs = _gf_true;                  if (glusterd_is_nodesvc_running ("glustershd"))                          stale_shd = _gf_true; +                ret = glusterd_import_global_opts (vols); +                if (ret) +                        goto out;                  ret = glusterd_import_friend_volumes (vols);                  if (ret)                          goto out; @@ -3523,14 +3879,16 @@ glusterd_restart_bricks (glusterd_conf_t *conf)          int                   ret            = 0;          list_for_each_entry (volinfo, &conf->volumes, vol_list) { -                /* If volume status is not started, do not proceed */ -                if (volinfo->status == GLUSTERD_STATUS_STARTED) { -                        list_for_each_entry (brickinfo, &volinfo->bricks, -                                             brick_list) { -                                glusterd_brick_start (volinfo, brickinfo, -                                                     _gf_true); -                        } -                        start_nodesvcs = _gf_true; +                if (volinfo->status != GLUSTERD_STATUS_STARTED) +                        continue; +                start_nodesvcs = _gf_true; +                if (glusterd_is_volume_in_server_quorum (volinfo)) { +                        //these bricks will be restarted once the quorum is met +                        continue; +                } + +                list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                        glusterd_brick_start (volinfo, brickinfo, _gf_true);                  }          } @@ -4811,8 +5169,8 @@ out:  int  glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, -                       glusterd_friend_sm_state_t state, -                       uuid_t *uuid, const char *hostname) +                       glusterd_friend_sm_state_t state, uuid_t *uuid, +                       const char *hostname, int port)  {          glusterd_peerinfo_t      *new_peer = NULL;          int                      ret = -1; @@ -4842,6 +5200,9 @@ glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo,          if (ret)                  goto out; +        if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) +                new_peer->quorum_contrib = QUORUM_WAITING; +        new_peer->port = port;          *peerinfo = new_peer;  out:          if (ret && new_peer) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 545fa3385ac..3f956deee67 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -33,13 +33,13 @@ struct glusterd_lock_ {          time_t  timestamp;  }; -typedef struct glusterd_voldict_ctx_ { +typedef struct glusterd_dict_ctx_ {          dict_t  *dict; -        int     count;          int     opt_count;          char    *key_name;          char    *val_name; -} glusterd_voldict_ctx_t; +        char    *prefix; +} glusterd_dict_ctx_t;  int  glusterd_compare_lines (const void *a, const void *b); @@ -323,8 +323,8 @@ glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log,                                             int event);  int  glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, -                       glusterd_friend_sm_state_t state, -                       uuid_t *uuid, const char *hostname); +                       glusterd_friend_sm_state_t state, uuid_t *uuid, +                       const char *hostname, int port);  int  glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log,                           char * (*state_name_get) (int), @@ -463,4 +463,25 @@ glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);   */  gf_boolean_t  is_origin_glusterd (); + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value); + +int +glusterd_do_quorum_action (); + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, +                                    int *quorum_count); + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str); +gf_boolean_t +glusterd_is_quorum_option (char *option); +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this); +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 6f7b3034cbf..3629361ad56 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -184,6 +184,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {          {"network.inode-lru-limit",              "protocol/server",           NULL, NULL, NO_DOC, 0, 1},          {AUTH_ALLOW_MAP_KEY,                     "protocol/server",           "!server-auth", "*", DOC, 0, 1},          {AUTH_REJECT_MAP_KEY,                    "protocol/server",           "!server-auth", NULL, DOC, 0}, +          {"transport.keepalive",                  "protocol/server",           "transport.socket.keepalive", NULL, NO_DOC, 0, 1},          {"server.allow-insecure",                "protocol/server",           "rpc-auth-allow-insecure", NULL, NO_DOC, 0, 1},          {"server.statedump-path",                "protocol/server",           "statedump-path", NULL, DOC, 0, 1}, @@ -252,6 +253,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = {          {"storage.owner-gid",                    "storage/posix",             "brick-gid", NULL, DOC, 0, 2},          {"config.memory-accounting",             "configuration",             "!config", NULL, DOC, 0, 2},          {"config.transport",                     "configuration",             "!config", NULL, DOC, 0, 2}, +        {GLUSTERD_QUORUM_TYPE_KEY,               "mgmt/glusterd",             NULL,         "off", DOC, 0}, +        {GLUSTERD_QUORUM_RATIO_KEY,              "mgmt/glusterd",             NULL,         "0", DOC, 0},          {NULL,                                                                }  }; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index cb5ac321c2f..02dd5fac78b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -118,6 +118,36 @@ glusterd_uuid_init ()  }  int +glusterd_options_init (xlator_t *this) +{ +        int             ret = -1; +        glusterd_conf_t *priv = NULL; +        char            *initial_version = "0"; + +        priv = this->private; + +        priv->opts = dict_new (); +        if (!priv->opts) +                goto out; + +        ret = glusterd_store_retrieve_options (this); +        if (ret == 0) +                goto out; + +        ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, +                            initial_version); +        if (ret) +                goto out; +        ret = glusterd_store_options (this, priv->opts); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Unable to store version"); +                return ret; +        } +out: + +        return 0; +} +int  glusterd_fetchspec_notify (xlator_t *this)  {          int              ret   = -1; @@ -1028,6 +1058,10 @@ init (xlator_t *this)          if (ret < 0)                  goto out; +        ret = glusterd_options_init (this); +        if (ret < 0) +                goto out; +          ret = glusterd_handle_upgrade_downgrade (this->options, conf);          if (ret)                  goto out; @@ -1172,5 +1206,12 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_BOOL,          },  #endif +        { .key = {"server-quorum-type"}, +          .type = GF_OPTION_TYPE_STR, +          .value = { "none", "server"}, +        }, +        { .key = {"server-quorum-ratio"}, +          .type = GF_OPTION_TYPE_PERCENT, +        },          { .key   = {NULL} },  }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 00a6bd16343..c2041ad3194 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -43,7 +43,14 @@  #define GLUSTERD_TR_LOG_SIZE            50  #define GLUSTERD_NAME                   "glusterd"  #define GLUSTERD_SOCKET_LISTEN_BACKLOG  128 +#define GLUSTERD_QUORUM_TYPE_KEY        "cluster.server-quorum-type" +#define GLUSTERD_QUORUM_RATIO_KEY       "cluster.server-quorum-ratio" +#define GLUSTERD_GLOBAL_OPT_VERSION     "global-option-version" +#define GLUSTERD_SERVER_QUORUM "server" + +struct glusterd_volinfo_; +typedef struct glusterd_volinfo_ glusterd_volinfo_t;  typedef enum glusterd_op_ {          GD_OP_NONE = 0, @@ -74,7 +81,6 @@ typedef enum glusterd_op_ {          GD_OP_MAX,  } glusterd_op_t; -  struct glusterd_store_iter_ {          int     fd;          FILE    *file; @@ -86,6 +92,7 @@ typedef struct glusterd_store_iter_     glusterd_store_iter_t;  struct glusterd_volgen {          dict_t *dict;  }; +  typedef struct {          struct rpc_clnt         *rpc;          gf_boolean_t            running; @@ -109,6 +116,12 @@ typedef struct {                                  versions */  typedef struct { +        gf_boolean_t    quorum; +        double          quorum_ratio; +        uint64_t        gl_opt_version; +} gd_global_opts_t; + +typedef struct {          struct _volfile_ctx *volfile;  	pthread_mutex_t   mutex;  	struct list_head  peers; @@ -134,11 +147,11 @@ typedef struct {  #endif          pthread_t       brick_thread;          void           *hooks_priv; -        xlator_t       *xl; /* Should be set to 'THIS' before creating thread */ -          /* need for proper handshake_t */          int             op_version; /* Starts with 1 for 3.3.0 */ - +        xlator_t       *xl;  /* Should be set to 'THIS' before creating thread */ +        gf_boolean_t   pending_quorum_action; +        dict_t             *opts;  } glusterd_conf_t; @@ -170,9 +183,6 @@ struct gf_defrag_brickinfo_ {          int   size;  }; -struct glusterd_volinfo_; -typedef struct glusterd_volinfo_ glusterd_volinfo_t; -  typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo,                                  gf_defrag_status_t status); @@ -411,6 +421,9 @@ glusterd_friend_add (const char *hoststr, int port,                       gf_boolean_t restore, glusterd_peerctx_args_t *args);  int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, +                            glusterd_peerctx_args_t *args); +int  glusterd_friend_remove (uuid_t uuid, char *hostname);  int  | 
