summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-op-sm.c
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-12-08 16:24:15 -0500
committerVijay Bellur <vbellur@redhat.com>2017-01-30 19:13:58 -0500
commit1a95fc3036db51b82b6a80952f0908bc2019d24a (patch)
treeb983ac196a8165d5cb5e860a5ef97d3e9a41b5c9 /xlators/mgmt/glusterd/src/glusterd-op-sm.c
parent7f7d7a939e46b330a084d974451eee4757ba61b4 (diff)
core: run many bricks within one glusterfsd process
This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb BUG: 1385758 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/14763 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-op-sm.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c127
1 files changed, 101 insertions, 26 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index b24e91a457c..d9b18e00195 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -58,16 +58,27 @@ static int
glusterd_set_shared_storage (dict_t *dict, char *key, char *value,
char **op_errstr);
-/* Valid options for all volumes to be listed in the *
- * valid_all_vol_opts table. To add newer options to *
- * all volumes, we can just add more entries to this *
- * table *
+/*
+ * Valid options for all volumes to be listed in the valid_all_vol_opts table.
+ * To add newer options to all volumes, we can just add more entries to this
+ * table.
+ *
+ * It's important that every value have a default, or have a special handler
+ * in glusterd_get_global_options_for_all_vols, or else we might crash there.
*/
glusterd_all_vol_opts valid_all_vol_opts[] = {
- { GLUSTERD_QUORUM_RATIO_KEY },
- { GLUSTERD_SHARED_STORAGE_KEY },
- { GLUSTERD_GLOBAL_OP_VERSION_KEY },
- { GLUSTERD_MAX_OP_VERSION_KEY },
+ { GLUSTERD_QUORUM_RATIO_KEY, "0" },
+ { GLUSTERD_SHARED_STORAGE_KEY, "disable" },
+ /* This one actually gets filled in dynamically. */
+ { GLUSTERD_GLOBAL_OP_VERSION_KEY, "BUG_NO_OP_VERSION"},
+ /*
+ * This one should be filled in dynamically, but it didn't used to be
+ * (before the defaults were added here) so the value is unclear.
+ *
+ * TBD: add a dynamic handler to set the appropriate value
+ */
+ { GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"},
+ { GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"},
{ NULL },
};
@@ -557,7 +568,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
if (!brick_req)
goto out;
brick_req->op = GLUSTERD_BRICK_TERMINATE;
- brick_req->name = "";
+ brick_req->name = brickinfo->path;
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPING);
break;
case GD_OP_PROFILE_VOLUME:
@@ -618,28 +629,13 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
break;
case GD_OP_SNAP:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
- goto out;
-
- brick_req->op = GLUSTERD_BRICK_BARRIER;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- brick_req->name = gf_strdup (volname);
-
- break;
case GD_OP_BARRIER:
brick_req = GF_CALLOC (1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
if (!brick_req)
goto out;
brick_req->op = GLUSTERD_BRICK_BARRIER;
- ret = dict_get_str(dict, "volname", &volname);
- if (ret)
- goto out;
- brick_req->name = gf_strdup (volname);
+ brick_req->name = brickinfo->path;
break;
default:
@@ -754,6 +750,17 @@ out:
}
static int
+glusterd_validate_brick_mx_options (xlator_t *this, char *fullkey, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
+
+ //Placeholder function for now
+
+ return ret;
+}
+
+static int
glusterd_validate_shared_storage (char *key, char *value, char *errstr)
{
int32_t ret = -1;
@@ -1191,6 +1198,11 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
if (ret)
goto out;
+ ret = glusterd_validate_brick_mx_options (this, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+
local_key_op_version = glusterd_get_op_version_for_key (key);
if (local_key_op_version > local_new_op_version)
local_new_op_version = local_key_op_version;
@@ -2351,6 +2363,33 @@ out:
}
static int
+glusterd_set_brick_mx_opts (dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, key, out);
+ GF_VALIDATE_OR_GOTO (this->name, value, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = 0;
+
+ priv = this->private;
+
+ if (!strcmp (key, GLUSTERD_BRICK_MULTIPLEX_KEY)) {
+ ret = dict_set_dynstr (priv->opts, key, gf_strdup (value));
+ }
+
+out:
+ return ret;
+}
+
+static int
glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,
char **op_errstr)
{
@@ -2399,6 +2438,14 @@ glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,
goto out;
}
+ ret = glusterd_set_brick_mx_opts (dict, key, value, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MX_SET_FAIL,
+ "Failed to set brick multiplexing option");
+ goto out;
+ }
+
/* If the key is cluster.op-version, set conf->op_version to the value
* if needed and save it.
*/
@@ -2629,6 +2676,7 @@ out:
}
+
static int
glusterd_op_set_volume (dict_t *dict, char **errstr)
{
@@ -6094,6 +6142,8 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_pending_node_t *pending_node = NULL;
+ glusterd_conf_t *conf = THIS->private;
+ char pidfile[1024];
ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
if (ret)
@@ -6122,6 +6172,18 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
selected);
pending_node = NULL;
}
+ /*
+ * This is not really the right place to do it, but
+ * it's the most convenient.
+ * TBD: move this to *after* the RPC
+ */
+ brickinfo->status = GF_BRICK_STOPPED;
+ brickinfo->started_here = _gf_false;
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ brickinfo, conf);
+ gf_log (THIS->name, GF_LOG_INFO,
+ "unlinking pidfile %s", pidfile);
+ (void) sys_unlink (pidfile);
}
}
@@ -6144,7 +6206,8 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
glusterd_pending_node_t *pending_node = NULL;
int32_t command = 0;
int32_t force = 0;
-
+ glusterd_conf_t *conf = THIS->private;
+ char pidfile[1024];
ret = dict_get_str (dict, "volname", &volname);
@@ -6218,6 +6281,18 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
selected);
pending_node = NULL;
}
+ /*
+ * This is not really the right place to do it, but
+ * it's the most convenient.
+ * TBD: move this to *after* the RPC
+ */
+ brickinfo->status = GF_BRICK_STOPPED;
+ brickinfo->started_here = _gf_false;
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ brickinfo, conf);
+ gf_log (THIS->name, GF_LOG_INFO,
+ "unlinking pidfile %s", pidfile);
+ (void) sys_unlink (pidfile);
}
i++;
}