summaryrefslogtreecommitdiffstats
path: root/glusterfsd/src/gf_attach.c
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-12-08 16:24:15 -0500
committerVijay Bellur <vbellur@redhat.com>2017-01-30 19:13:58 -0500
commit1a95fc3036db51b82b6a80952f0908bc2019d24a (patch)
treeb983ac196a8165d5cb5e860a5ef97d3e9a41b5c9 /glusterfsd/src/gf_attach.c
parent7f7d7a939e46b330a084d974451eee4757ba61b4 (diff)
core: run many bricks within one glusterfsd process
This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb BUG: 1385758 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/14763 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'glusterfsd/src/gf_attach.c')
-rw-r--r--glusterfsd/src/gf_attach.c247
1 files changed, 247 insertions, 0 deletions
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
new file mode 100644
index 00000000000..0393dc5f42f
--- /dev/null
+++ b/glusterfsd/src/gf_attach.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+//#include "config.h"
+#include "glusterfs.h"
+#include "globals.h"
+#include "glfs-internal.h"
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+
+int done = 0;
+int rpc_status;
+
+struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", NULL },
+ [GLUSTERD_BRICK_OP] = {"BRICK_OP", NULL },
+};
+
+struct rpc_clnt_program gf_attach_prog = {
+ .progname = "brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .proctable = gf_attach_actors,
+ .numproc = GLUSTERD_BRICK_MAXVALUE,
+};
+
+/*
+ * In a sane world, the generic RPC layer would be capable of tracking
+ * connection status by itself, with no help from us. It might invoke our
+ * callback if we had registered one, but only to provide information. Sadly,
+ * we don't live in that world. Instead, the callback *must* exist and *must*
+ * call rpc_clnt_{set,unset}_connected, because that's the only way those
+ * fields get set (with RPC both above and below us on the stack). If we don't
+ * do that, then rpc_clnt_submit doesn't think we're connected even when we
+ * are. It calls the socket code to reconnect, but the socket code tracks this
+ * stuff in a sane way so it knows we're connected and returns EINPROGRESS.
+ * Then we're stuck, connected but unable to use the connection. To make it
+ * work, we define and register this trivial callback.
+ */
+int
+my_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ printf ("connected\n");
+ rpc_clnt_set_connected (&rpc->conn);
+ break;
+ case RPC_CLNT_DISCONNECT:
+ printf ("disconnected\n");
+ rpc_clnt_unset_connected (&rpc->conn);
+ break;
+ default:
+ fprintf (stderr, "unknown RPC event\n");
+ }
+
+ return 0;
+}
+
+int32_t
+my_callback (struct rpc_req *req, struct iovec *iov, int count, void *frame)
+{
+ rpc_status = req->rpc_status;
+ done = 1;
+ return 0;
+}
+
+/* copied from gd_syncop_submit_request */
+int
+send_brick_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ void *req = &brick_req;
+ int i;
+
+ brick_req.op = op;
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+
+ req_size = xdr_sizeof ((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new ();
+ if (!iobref)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret == -1)
+ goto out;
+
+ iov.iov_len = ret;
+
+ for (i = 0; i < 60; ++i) {
+ if (rpc->conn.connected) {
+ break;
+ }
+ sleep (1);
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (rpc, &gf_attach_prog, op,
+ my_callback, &iov, 1, NULL, 0, iobref, frame,
+ NULL, 0, NULL, 0, NULL);
+ if (!ret) {
+ for (i = 0; !done && (i < 120); ++i) {
+ sleep (1);
+ }
+ }
+
+out:
+
+ iobref_unref (iobref);
+ iobuf_unref (iobuf);
+ STACK_DESTROY (frame->root);
+
+ if (rpc_status != 0) {
+ fprintf (stderr, "got error %d on RPC\n", rpc_status);
+ return EXIT_FAILURE;
+ }
+
+ printf ("OK\n");
+ return EXIT_SUCCESS;
+}
+
+int
+usage (char *prog)
+{
+ fprintf (stderr, "Usage: %s uds_path volfile_path (to attach)\n",
+ prog);
+ fprintf (stderr, " %s -d uds_path brick_path (to detach)\n",
+ prog);
+
+ return EXIT_FAILURE;
+}
+
+int
+main (int argc, char *argv[])
+{
+ glfs_t *fs;
+ struct rpc_clnt *rpc;
+ xlator_t that;
+ dict_t *options;
+ int ret;
+ int op = GLUSTERD_BRICK_ATTACH;
+
+ for (;;) {
+ switch (getopt (argc, argv, "d")) {
+ case 'd':
+ op = GLUSTERD_BRICK_TERMINATE;
+ break;
+ case -1:
+ goto done_parsing;
+ default:
+ return usage (argv[0]);
+ }
+ }
+done_parsing:
+ if (optind != (argc - 2)) {
+ return usage (argv[0]);
+ }
+
+ fs = glfs_new ("gf-attach");
+ if (!fs) {
+ fprintf (stderr, "glfs_new failed\n");
+ return EXIT_FAILURE;
+ }
+ that.ctx = fs->ctx;
+
+ (void) glfs_set_logging (fs, "/dev/stderr", 7);
+ /*
+ * This will actually fail because we haven't defined a volume, but
+ * it will do enough initialization to get us going.
+ */
+ (void) glfs_init (fs);
+
+ options = dict_new();
+ if (!options) {
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret != 0) {
+ fprintf (stderr, "failed to set transport type\n");
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str (options, "transport.address-family", "unix");
+ if (ret != 0) {
+ fprintf (stderr, "failed to set address family\n");
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str (options, "transport.socket.connect-path",
+ argv[optind]);
+ if (ret != 0) {
+ fprintf (stderr, "failed to set connect path\n");
+ return EXIT_FAILURE;
+ }
+
+ rpc = rpc_clnt_new (options, fs->ctx->master, "gf-attach-rpc", 0);
+ if (!rpc) {
+ fprintf (stderr, "rpc_clnt_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rpc_clnt_register_notify (rpc, my_notify, NULL) != 0) {
+ fprintf (stderr, "rpc_clnt_register_notify failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rpc_clnt_start(rpc) != 0) {
+ fprintf (stderr, "rpc_clnt_start failed\n");
+ return EXIT_FAILURE;
+ }
+
+ return send_brick_req (fs->ctx->master, rpc, argv[optind+1], op);
+}