diff options
author | Jeff Darcy <jdarcy@redhat.com> | 2016-12-08 16:24:15 -0500 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2017-01-30 19:13:58 -0500 |
commit | 1a95fc3036db51b82b6a80952f0908bc2019d24a (patch) | |
tree | b983ac196a8165d5cb5e860a5ef97d3e9a41b5c9 /xlators/protocol | |
parent | 7f7d7a939e46b330a084d974451eee4757ba61b4 (diff) |
core: run many bricks within one glusterfsd process
This patch adds support for multiple brick translator stacks running
in a single brick server process. This reduces our per-brick memory usage by
approximately 3x, and our appetite for TCP ports even more. It also creates
potential to avoid process/thread thrashing, and to improve QoS by scheduling
more carefully across the bricks, but realizing that potential will require
further work.
Multiplexing is controlled by the "cluster.brick-multiplex" global option. By
default it's off, and bricks are started in separate processes as before. If
multiplexing is enabled, then *compatible* bricks (mostly those with the same
transport options) will be started in the same process.
Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb
BUG: 1385758
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: https://review.gluster.org/14763
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/protocol')
-rw-r--r-- | xlators/protocol/auth/addr/src/addr.c | 69 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 5 | ||||
-rw-r--r-- | xlators/protocol/server/src/server-handshake.c | 152 | ||||
-rw-r--r-- | xlators/protocol/server/src/server-rpc-fops.c | 5 | ||||
-rw-r--r-- | xlators/protocol/server/src/server.c | 171 |
5 files changed, 203 insertions, 199 deletions
diff --git a/xlators/protocol/auth/addr/src/addr.c b/xlators/protocol/auth/addr/src/addr.c index 6965da01b7a..1b4557134f9 100644 --- a/xlators/protocol/auth/addr/src/addr.c +++ b/xlators/protocol/auth/addr/src/addr.c @@ -30,21 +30,14 @@ gf_auth (dict_t *input_params, dict_t *config_params) int ret = 0; char *name = NULL; char *searchstr = NULL; - peer_info_t *peer_info = NULL; - data_t *peer_info_data = NULL; data_t *allow_addr = NULL; data_t *reject_addr = NULL; char *addr_str = NULL; char *tmp = NULL; char *addr_cpy = NULL; - char *service = NULL; - uint16_t peer_port = 0; - char is_inet_sdp = 0; char negate = 0; char match = 0; char peer_addr[UNIX_PATH_MAX]; - char *type = NULL; - gf_boolean_t allow_insecure = _gf_false; name = data_to_str (dict_get (input_params, "remote-subvolume")); if (!name) { @@ -73,7 +66,7 @@ gf_auth (dict_t *input_params, dict_t *config_params) GF_FREE (searchstr); if (!allow_addr) { - /* TODO: backword compatibility */ + /* TODO: backward compatibility */ ret = gf_asprintf (&searchstr, "auth.ip.%s.allow", name); if (-1 == ret) { gf_log ("auth/addr", GF_LOG_ERROR, @@ -92,66 +85,6 @@ gf_auth (dict_t *input_params, dict_t *config_params) goto out; } - peer_info_data = dict_get (input_params, "peer-info"); - if (!peer_info_data) { - gf_log ("auth/addr", GF_LOG_ERROR, - "peer-info not present"); - goto out; - } - - peer_info = data_to_ptr (peer_info_data); - - switch (((struct sockaddr *) &peer_info->sockaddr)->sa_family) - { - case AF_INET_SDP: - is_inet_sdp = 1; - ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET; - - case AF_INET: - case AF_INET6: - { - strcpy (peer_addr, peer_info->identifier); - service = strrchr (peer_addr, ':'); - *service = '\0'; - service ++; - - if (is_inet_sdp) { - ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET_SDP; - } - - ret = dict_get_str (config_params, "rpc-auth-allow-insecure", - &type); - if (ret == 0) { - ret = gf_string2boolean (type, &allow_insecure); - if (ret < 0) { - gf_log ("auth/addr", GF_LOG_WARNING, - "rpc-auth-allow-insecure option %s " - "is not a valid bool option", type); - goto out; - } - } - - peer_port = atoi (service); - if (peer_port >= PRIVILEGED_PORT_CEILING && !allow_insecure) { - gf_log ("auth/addr", GF_LOG_ERROR, - "client is bound to port %d which is not privileged", - peer_port); - goto out; - } - break; - - case AF_UNIX: - strcpy (peer_addr, peer_info->identifier); - break; - - default: - gf_log ("authenticate/addr", GF_LOG_ERROR, - "unknown address family %d", - ((struct sockaddr *) &peer_info->sockaddr)->sa_family); - goto out; - } - } - if (reject_addr) { addr_cpy = gf_strdup (reject_addr->data); if (!addr_cpy) diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 354b9167810..6d1f14b2aa7 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -1272,6 +1272,11 @@ out: PC_MSG_CHILD_CONNECTING_NOTIFY_FAILED, "notify of CHILD_CONNECTING failed"); conf->connecting= 1; + /* + * The reconnection *won't* happen in the background (see + * previous comment) unless we kill the current connection. + */ + rpc_transport_disconnect (conf->rpc->conn.trans, _gf_false); ret = 0; } diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index a33efb8c33a..249dde7de76 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -36,27 +36,6 @@ gf_compare_client_version (rpcsvc_request_t *req, int fop_prognum, return ret; } -void __check_and_set (xlator_t *each, void *data) -{ - if (!strcmp (each->name, - ((struct __get_xl_struct *) data)->name)) - ((struct __get_xl_struct *) data)->reply = each; -} - -static xlator_t * -get_xlator_by_name (xlator_t *some_xl, const char *name) -{ - struct __get_xl_struct get = { - .name = name, - .reply = NULL - }; - - xlator_foreach (some_xl, __check_and_set, &get); - - return get.reply; -} - - int _volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum) { @@ -426,13 +405,14 @@ server_setvolume (rpcsvc_request_t *req) int32_t ret = -1; int32_t op_ret = -1; int32_t op_errno = EINVAL; - int32_t fop_version = 0; - int32_t mgmt_version = 0; uint32_t lk_version = 0; char *buf = NULL; gf_boolean_t cancelled = _gf_false; uint32_t opversion = 0; rpc_transport_t *xprt = NULL; + int32_t fop_version = 0; + int32_t mgmt_version = 0; + params = dict_new (); reply = dict_new (); @@ -446,32 +426,6 @@ server_setvolume (rpcsvc_request_t *req) this = req->svc->xl; - config_params = dict_copy_with_ref (this->options, NULL); - conf = this->private; - - if (conf->parent_up == _gf_false) { - /* PARENT_UP indicates that all xlators in graph are inited - * successfully - */ - op_ret = -1; - op_errno = EAGAIN; - - ret = dict_set_str (reply, "ERROR", - "xlator graph in server is not initialised " - "yet. Try again later"); - if (ret < 0) - gf_msg_debug (this->name, 0, "failed to set error: " - "xlator graph in server is not " - "initialised yet. Try again later"); - goto fail; - } - - ret = dict_set_int32 (reply, "child_up", conf->child_up); - if (ret < 0) - gf_msg (this->name, GF_LOG_ERROR, 0, - PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' " - "in the reply dict"); - buf = memdup (args.dict.dict_val, args.dict.dict_len); if (buf == NULL) { op_ret = -1; @@ -497,6 +451,65 @@ server_setvolume (rpcsvc_request_t *req) params->extra_free = buf; buf = NULL; + ret = dict_get_str (params, "remote-subvolume", &name); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "No remote-subvolume option specified"); + if (ret < 0) + gf_msg_debug (this->name, 0, "failed to set error " + "msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + xl = get_xlator_by_name (this, name); + if (xl == NULL) { + ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found", + name); + if (-1 == ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_ASPRINTF_FAILED, + "asprintf failed while setting error msg"); + goto fail; + } + ret = dict_set_dynstr (reply, "ERROR", msg); + if (ret < 0) + gf_msg_debug (this->name, 0, "failed to set error " + "msg"); + + op_ret = -1; + op_errno = ENOENT; + goto fail; + } + + config_params = dict_copy_with_ref (xl->options, NULL); + conf = this->private; + + if (conf->parent_up == _gf_false) { + /* PARENT_UP indicates that all xlators in graph are inited + * successfully + */ + op_ret = -1; + op_errno = EAGAIN; + + ret = dict_set_str (reply, "ERROR", + "xlator graph in server is not initialised " + "yet. Try again later"); + if (ret < 0) + gf_msg_debug (this->name, 0, "failed to set error: " + "xlator graph in server is not " + "initialised yet. Try again later"); + goto fail; + } + + ret = dict_set_int32 (reply, "child_up", conf->child_up); + if (ret < 0) + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' " + "in the reply dict"); + ret = dict_get_str (params, "process-uuid", &client_uid); if (ret < 0) { ret = dict_set_str (reply, "ERROR", @@ -603,39 +616,6 @@ server_setvolume (rpcsvc_request_t *req) goto fail; } - ret = dict_get_str (params, "remote-subvolume", &name); - if (ret < 0) { - ret = dict_set_str (reply, "ERROR", - "No remote-subvolume option specified"); - if (ret < 0) - gf_msg_debug (this->name, 0, "failed to set error " - "msg"); - - op_ret = -1; - op_errno = EINVAL; - goto fail; - } - - xl = get_xlator_by_name (this, name); - if (xl == NULL) { - ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found", - name); - if (-1 == ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - PS_MSG_ASPRINTF_FAILED, - "asprintf failed while setting error msg"); - goto fail; - } - ret = dict_set_dynstr (reply, "ERROR", msg); - if (ret < 0) - gf_msg_debug (this->name, 0, "failed to set error " - "msg"); - - op_ret = -1; - op_errno = ENOENT; - goto fail; - } - if (conf->verify_volfile) { ret = dict_get_uint32 (params, "volfile-checksum", &checksum); if (ret == 0) { @@ -850,7 +830,13 @@ fail: dict_unref (params); dict_unref (reply); - dict_unref (config_params); + if (config_params) { + /* + * This might be null if we couldn't even find the translator + * (brick) to copy it from. + */ + dict_unref (config_params); + } GF_FREE (buf); diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index 0a5497f22e0..5bb40a77515 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -3385,10 +3385,8 @@ server_compound_resume (call_frame_t *frame, xlator_t *bound_xl) int length = 0; int op_errno = ENOMEM; compound_req *c_req = NULL; - xlator_t *this = NULL; state = CALL_STATE (frame); - this = frame->this; if (state->resolve.op_ret != 0) { ret = state->resolve.op_ret; @@ -3422,8 +3420,7 @@ server_compound_resume (call_frame_t *frame, xlator_t *bound_xl) } STACK_WIND (frame, server_compound_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->compound, + bound_xl, bound_xl->fops->compound, args, state->xdata); return 0; diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index db2f06ad582..5be900a6db0 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -524,30 +524,30 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, */ pthread_mutex_lock (&conf->mutex); - { - list_add_tail (&trans->list, &conf->xprt_list); - } + rpc_transport_ref (trans); + list_add_tail (&trans->list, &conf->xprt_list); pthread_mutex_unlock (&conf->mutex); break; } case RPCSVC_EVENT_DISCONNECT: + /* A DISCONNECT event could come without an ACCEPT event * happening for this transport. This happens when the server is * expecting encrypted connections by the client tries to * connect unecnrypted */ - if (list_empty (&trans->list)) + if (list_empty (&trans->list)) { break; + } /* transport has to be removed from the list upon disconnect * irrespective of whether lock self heal is off or on, since * new transport will be created upon reconnect. */ pthread_mutex_lock (&conf->mutex); - { - list_del_init (&trans->list); - } + list_del_init (&trans->list); + rpc_transport_unref (trans); pthread_mutex_unlock (&conf->mutex); client = trans->xl_private; @@ -667,6 +667,8 @@ _delete_auth_opt (dict_t *this, char *key, data_t *value, void *data) { char *auth_option_pattern[] = { "auth.addr.*.allow", "auth.addr.*.reject", + "auth.login.*.allow", + "auth.login.*.password", "auth.login.*.ssl-allow", NULL}; int i = 0; @@ -687,6 +689,8 @@ _copy_auth_opt (dict_t *unused, char *key, data_t *value, void *xl_dict) { char *auth_option_pattern[] = { "auth.addr.*.allow", "auth.addr.*.reject", + "auth.login.*.allow", + "auth.login.*.password", "auth.login.*.ssl-allow", NULL}; int i = 0; @@ -729,15 +733,19 @@ out: } int -server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t old, - int32_t new) +server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t new) { - if (old == new) - return 0; + struct event_pool *pool = this->ctx->event_pool; + int target; + target = new + pool->auto_thread_count; conf->event_threads = new; - return event_reconfigure_threads (this->ctx->event_pool, - conf->event_threads); + + if (target == pool->eventthreadcount) { + return 0; + } + + return event_reconfigure_threads (pool, target); } int @@ -748,6 +756,7 @@ reconfigure (xlator_t *this, dict_t *options) rpcsvc_t *rpc_conf; rpcsvc_listener_t *listeners; rpc_transport_t *xprt = NULL; + rpc_transport_t *xp_next = NULL; int inode_lru_limit; gf_boolean_t trace; data_t *data; @@ -755,6 +764,19 @@ reconfigure (xlator_t *this, dict_t *options) char *statedump_path = NULL; int32_t new_nthread = 0; char *auth_path = NULL; + char *xprt_path = NULL; + xlator_t *oldTHIS; + xlator_t *kid; + + /* + * Since we're not a fop, we can't really count on THIS being set + * correctly, and it needs to be or else GF_OPTION_RECONF won't work + * (because it won't find our options list). This is another thing + * that "just happened" to work before multiplexing, but now we need to + * handle it more explicitly. + */ + oldTHIS = THIS; + THIS = this; conf = this->private; @@ -764,6 +786,19 @@ reconfigure (xlator_t *this, dict_t *options) goto out; } + /* + * For some of the auth/rpc stuff, we need to operate on the correct + * child, but for other stuff we need to operate on the server + * translator itself. + */ + kid = NULL; + if (dict_get_str (options, "auth-path", &auth_path) == 0) { + kid = get_xlator_by_name (this, auth_path); + } + if (!kid) { + kid = this; + } + if (dict_get_int32 ( options, "inode-lru-limit", &inode_lru_limit) == 0){ conf->inode_lru_limit = inode_lru_limit; gf_msg_trace (this->name, 0, "Reconfigured inode-lru-limit to " @@ -795,48 +830,50 @@ reconfigure (xlator_t *this, dict_t *options) } GF_OPTION_RECONF ("statedump-path", statedump_path, - options, path, out); + options, path, do_auth); if (!statedump_path) { gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_STATEDUMP_PATH_ERROR, "Error while reconfiguring statedump path"); ret = -1; - goto out; + goto do_auth; } gf_path_strip_trailing_slashes (statedump_path); GF_FREE (this->ctx->statedump_path); this->ctx->statedump_path = gf_strdup (statedump_path); +do_auth: if (!conf->auth_modules) conf->auth_modules = dict_new (); dict_foreach (options, get_auth_types, conf->auth_modules); - ret = validate_auth_options (this, options); + ret = validate_auth_options (kid, options); if (ret == -1) { /* logging already done in validate_auth_options function. */ goto out; } - dict_foreach (this->options, _delete_auth_opt, this->options); - dict_foreach (options, _copy_auth_opt, this->options); + dict_foreach (kid->options, _delete_auth_opt, NULL); + dict_foreach (options, _copy_auth_opt, kid->options); - ret = gf_auth_init (this, conf->auth_modules); + ret = gf_auth_init (kid, conf->auth_modules); if (ret) { dict_unref (conf->auth_modules); goto out; } GF_OPTION_RECONF ("manage-gids", conf->server_manage_gids, options, - bool, out); + bool, do_rpc); GF_OPTION_RECONF ("gid-timeout", conf->gid_cache_timeout, options, - int32, out); + int32, do_rpc); if (gid_cache_reconf (&conf->gid_cache, conf->gid_cache_timeout) < 0) { gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_GRP_CACHE_ERROR, "Failed to reconfigure group cache."); - goto out; + goto do_rpc; } +do_rpc: rpc_conf = conf->rpc; if (!rpc_conf) { gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_RPC_CONF_ERROR, @@ -857,7 +894,14 @@ reconfigure (xlator_t *this, dict_t *options) if (conf->dync_auth) { pthread_mutex_lock (&conf->mutex); { - list_for_each_entry (xprt, &conf->xprt_list, list) { + /* + * Disconnecting will (usually) drop the last ref, + * which will cause the transport to be unlinked and + * freed while we're still traversing, which will cause + * us to crash unless we use list_for_each_entry_safe. + */ + list_for_each_entry_safe (xprt, xp_next, + &conf->xprt_list, list) { /* check for client authorization */ if (!xprt->clnt_options) { /* If clnt_options dictionary is null, @@ -871,25 +915,28 @@ reconfigure (xlator_t *this, dict_t *options) */ continue; } + /* + * Make sure we're only operating on + * connections that are relevant to the brick + * we're reconfiguring. + */ + if (dict_get_str (xprt->clnt_options, + "remote-subvolume", + &xprt_path) != 0) { + continue; + } + if (strcmp (xprt_path, auth_path) != 0) { + continue; + } ret = gf_authenticate (xprt->clnt_options, - options, conf->auth_modules); + options, + conf->auth_modules); if (ret == AUTH_ACCEPT) { - gf_msg (this->name, GF_LOG_TRACE, 0, + gf_msg (kid->name, GF_LOG_TRACE, 0, PS_MSG_CLIENT_ACCEPTED, "authorized client, hence we " "continue with this connection"); } else { - ret = dict_get_str (this->options, - "auth-path", - &auth_path); - if (ret) { - gf_msg (this->name, - GF_LOG_WARNING, 0, - PS_MSG_DICT_GET_FAILED, - "failed to get " - "auth-path"); - auth_path = NULL; - } gf_event (EVENT_CLIENT_AUTH_REJECT, "client_uid=%s;" "client_identifier=%s;" @@ -932,15 +979,21 @@ reconfigure (xlator_t *this, dict_t *options) } } + /* + * Let the event subsystem know that we're auto-scaling, with an + * initial count of one. + */ + ((struct event_pool *)(this->ctx->event_pool))->auto_thread_count = 1; + GF_OPTION_RECONF ("event-threads", new_nthread, options, int32, out); - ret = server_check_event_threads (this, conf, conf->event_threads, - new_nthread); + ret = server_check_event_threads (this, conf, new_nthread); if (ret) goto out; ret = server_init_grace_timer (this, options, conf); out: + THIS = oldTHIS; gf_msg_debug ("", 0, "returning %d", ret); return ret; } @@ -1001,8 +1054,7 @@ init (xlator_t *this) /* Set event threads to the configured default */ GF_OPTION_INIT("event-threads", conf->event_threads, int32, out); - ret = server_check_event_threads (this, conf, STARTING_EVENT_THREADS, - conf->event_threads); + ret = server_check_event_threads (this, conf, conf->event_threads); if (ret) goto out; @@ -1183,9 +1235,13 @@ init (xlator_t *this) } } #endif - this->private = conf; + FIRST_CHILD(this)->volfile_id + = gf_strdup (this->ctx->cmd_args.volfile_id); + + this->private = conf; ret = 0; + out: if (ret) { if (this != NULL) { @@ -1350,6 +1406,8 @@ notify (xlator_t *this, int32_t event, void *data, ...) { int ret = -1; server_conf_t *conf = NULL; + rpc_transport_t *xprt = NULL; + rpc_transport_t *xp_next = NULL; GF_VALIDATE_OR_GOTO (THIS->name, this, out); conf = this->private; @@ -1413,6 +1471,31 @@ notify (xlator_t *this, int32_t event, void *data, ...) } + case GF_EVENT_TRANSPORT_CLEANUP: + conf = this->private; + pthread_mutex_lock (&conf->mutex); + /* + * Disconnecting will (usually) drop the last ref, which will + * cause the transport to be unlinked and freed while we're + * still traversing, which will cause us to crash unless we use + * list_for_each_entry_safe. + */ + list_for_each_entry_safe (xprt, xp_next, + &conf->xprt_list, list) { + if (!xprt->xl_private) { + continue; + } + if (xprt->xl_private->bound_xl == data) { + gf_log (this->name, GF_LOG_INFO, + "disconnecting %s", + xprt->peerinfo.identifier); + rpc_transport_disconnect (xprt, _gf_false); + } + } + pthread_mutex_unlock (&conf->mutex); + /* NB: do *not* propagate anywhere else */ + break; + default: default_notify (this, event, data); break; @@ -1568,12 +1651,12 @@ struct volume_options options[] = { { .key = {"event-threads"}, .type = GF_OPTION_TYPE_INT, .min = 1, - .max = 32, - .default_value = "2", + .max = 1024, + .default_value = "1", .description = "Specifies the number of event threads to execute " "in parallel. Larger values would help process" " responses faster, depending on available processing" - " power. Range 1-32 threads." + " power." }, { .key = {"dynamic-auth"}, .type = GF_OPTION_TYPE_BOOL, |