summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/multiplex.t2
-rw-r--r--tests/bugs/core/multiplex-limit-issue-151.t57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c394
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c38
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h12
13 files changed, 542 insertions, 58 deletions
diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t
index ed8788ff3bb..2f0f462f14d 100644
--- a/tests/basic/multiplex.t
+++ b/tests/basic/multiplex.t
@@ -17,6 +17,8 @@ function count_brick_pids {
| grep -v "N/A" | sort | uniq | wc -l
}
+cleanup
+
TEST glusterd
TEST $CLI volume set all cluster.brick-multiplex on
push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t
new file mode 100644
index 00000000000..9511756ecde
--- /dev/null
+++ b/tests/bugs/core/multiplex-limit-issue-151.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function count_up_bricks {
+ $CLI --xml volume status all | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST ! $CLI volume set all cluster.max-bricks-per-process -1
+TEST ! $CLI volume set all cluster.max-bricks-per-process foobar
+TEST $CLI volume set all cluster.max-bricks-per-process 3
+
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
+push_trapfunc "cleanup"
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0..5}
+TEST $CLI volume start $V0
+
+EXPECT 2 count_brick_processes
+EXPECT 2 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+TEST $CLI volume add-brick $V0 $H0:$B0/brick6
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start
+TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index ed171b69b66..33aac2f3dc8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -71,7 +71,8 @@ typedef enum gf_gld_mem_types_ {
gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55,
gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56,
gf_gld_mt_local_peers_t = gf_common_mt_end + 57,
- gf_gld_mt_end = gf_common_mt_end + 58,
+ gf_gld_mt_glusterd_brick_proc_t = gf_common_mt_end + 58,
+ gf_gld_mt_end = gf_common_mt_end + 59,
} gf_gld_mem_types_t;
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 9161d9058f0..14424d36890 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -41,7 +41,7 @@
#define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD
-#define GLFS_NUM_MESSAGES 602
+#define GLFS_NUM_MESSAGES 606
#define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
@@ -4869,6 +4869,38 @@
*/
#define GD_MSG_VOL_SET_VALIDATION_INFO (GLUSTERD_COMP_BASE + 602)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_MUX_LIMIT (GLUSTERD_COMP_BASE + 603)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICKPROC_REM_BRICK_FAILED (GLUSTERD_COMP_BASE + 604)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICKPROC_ADD_BRICK_FAILED (GLUSTERD_COMP_BASE + 605)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606)
+
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 9eba6025427..6b12d603728 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -79,6 +79,10 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
*/
{ GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"},
{ GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"},
+ /* Set this value to 1 by default implying non-multiplexed behaviour.
+ * TBD: Discuss the default value for this. Maybe this should be a
+ * dynamic value depending on the memory specifications per node */
+ { GLUSTERD_BRICKMUX_LIMIT_KEY, "1"},
{ NULL },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index c3f25ebe84c..1fc7a250748 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -488,8 +488,8 @@ __gluster_pmap_signin (rpcsvc_request_t *req)
{
pmap_signin_req args = {0,};
pmap_signin_rsp rsp = {0,};
- glusterd_brickinfo_t *brickinfo = NULL;
int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
ret = xdr_to_generic (req->msg[0], &args,
(xdrproc_t)xdr_pmap_signin_req);
@@ -502,6 +502,7 @@ __gluster_pmap_signin (rpcsvc_request_t *req)
GF_PMAP_PORT_BRICKSERVER, req->trans);
ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo);
+
fail:
glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
(xdrproc_t)xdr_pmap_signin_rsp);
@@ -569,6 +570,22 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
* glusterd end when a brick is killed from the
* backend */
brickinfo->status = GF_BRICK_STOPPED;
+
+ /* Remove brick from brick process if not already
+ * removed in the brick op phase. This situation would
+ * arise when the brick is killed explicitly from the
+ * backend */
+ ret = glusterd_brick_process_remove_brick (brickinfo);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Couldn't remove "
+ "brick %s:%s from brick process",
+ brickinfo->hostname,
+ brickinfo->path);
+ /* Ignore 'ret' here since the brick might
+ * have already been deleted in brick op phase
+ */
+ ret = 0;
+ }
}
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index e5fba968c07..913b5946b6e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -957,6 +957,7 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
gd1_mgmt_brick_op_req *req = NULL;
int ret = 0;
xlator_t *this = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
this = THIS;
args.op_ret = -1;
@@ -986,6 +987,23 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
GF_FREE (args.errstr);
}
+ if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) {
+ if (args.op_ret == 0) {
+ brickinfo = pnode->node;
+ ret = glusterd_brick_process_remove_brick (brickinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPROC_REM_BRICK_FAILED,
+ "Removing brick %s:%s from brick"
+ " process failed",
+ brickinfo->hostname,
+ brickinfo->path);
+ args.op_ret = ret;
+ goto out;
+ }
+ }
+ }
+
if (GD_OP_STATUS_VOLUME == op) {
ret = dict_set_int32 (args.dict, "index", pnode->index);
if (ret) {
@@ -1023,7 +1041,6 @@ out:
dict_unref (args.dict);
gd_brick_op_req_free (req);
return args.op_ret;
-
}
int32_t
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 7e50d597fc2..5edf9805f9c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -117,6 +117,46 @@ is_brick_mx_enabled (void)
return ret ? _gf_false: enabled;
}
+int
+get_mux_limit_per_process (int *mux_limit)
+{
+ char *value = NULL;
+ int ret = -1;
+ int max_bricks_per_proc = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ if (!is_brick_mx_enabled()) {
+ max_bricks_per_proc = 1;
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Can't get limit for number of bricks per brick "
+ "process from dict");
+ ret = 0;
+ } else {
+ ret = gf_string2int (value, &max_bricks_per_proc);
+ if (ret)
+ goto out;
+ }
+out:
+ *mux_limit = max_bricks_per_proc;
+
+ gf_msg_debug ("glusterd", 0, "Mux limit set to %d bricks per process", *mux_limit);
+
+ return ret;
+}
+
extern struct volopt_map_entry glusterd_volopt_map[];
extern glusterd_all_vol_opts valid_all_vol_opts[];
@@ -972,6 +1012,33 @@ out:
}
int32_t
+glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess)
+{
+ glusterd_brick_proc_t *new_brickprocess = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, brickprocess, out);
+
+ new_brickprocess = GF_CALLOC (1, sizeof(*new_brickprocess),
+ gf_gld_mt_glusterd_brick_proc_t);
+
+ if (!new_brickprocess)
+ goto out;
+
+ CDS_INIT_LIST_HEAD (&new_brickprocess->bricks);
+ CDS_INIT_LIST_HEAD (&new_brickprocess->brick_proc_list);
+
+ new_brickprocess->brick_count = 0;
+ *brickprocess = new_brickprocess;
+
+ ret = 0;
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
{
glusterd_brickinfo_t *new_brickinfo = NULL;
@@ -2033,6 +2100,15 @@ retry:
goto out;
}
+ ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s "
+ "to brick process failed.", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
connect:
ret = glusterd_brick_connect (volinfo, brickinfo, socketpath);
if (ret) {
@@ -2096,6 +2172,200 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
return 0;
}
+static gf_boolean_t
+unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
+{
+ /*
+ * Certain options are safe because they're already being handled other
+ * ways, such as being copied down to the bricks (all auth options) or
+ * being made irrelevant (event-threads). All others are suspect and
+ * must be checked in the next function.
+ */
+ if (fnmatch ("*auth*", key, 0) == 0) {
+ return _gf_false;
+ }
+
+ if (fnmatch ("*event-threads", key, 0) == 0) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+static int
+opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
+{
+ data_t *value2 = dict_get (dict2, key);
+ int32_t min_len;
+
+ /*
+ * If the option is only present on one, we can either look at the
+ * default or assume a mismatch. Looking at the default is pretty
+ * hard, because that's part of a structure within each translator and
+ * there's no dlopen interface to get at it, so we assume a mismatch.
+ * If the user really wants them to match (and for their bricks to be
+ * multiplexed, they can always reset the option).
+ */
+ if (!value2) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key);
+ return -1;
+ }
+
+ min_len = MIN (value1->len, value2->len);
+ if (strncmp (value1->data, value2->data, min_len) != 0) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "option mismatch, %s, %s != %s",
+ key, value1->data, value2->data);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc)
+{
+ cds_list_del_init (&brick_proc->brick_proc_list);
+ cds_list_del_init (&brick_proc->bricks);
+
+ GF_FREE (brick_proc);
+
+ return 0;
+}
+
+int
+glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brick_proc_t *brick_proc = NULL;
+ glusterd_brickinfo_t *brickinfoiter = NULL;
+ glusterd_brick_proc_t *brick_proc_tmp = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, brickinfo, out);
+
+ cds_list_for_each_entry_safe (brick_proc, brick_proc_tmp,
+ &priv->brick_procs, brick_proc_list) {
+ if (brickinfo->port != brick_proc->port) {
+ continue;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, (brick_proc->brick_count > 0), out);
+
+ cds_list_for_each_entry_safe (brickinfoiter, tmp,
+ &brick_proc->bricks, brick_list) {
+ if (strcmp (brickinfoiter->path, brickinfo->path) == 0) {
+ cds_list_del_init (&brickinfoiter->brick_list);
+
+ GF_FREE (brickinfoiter->logfile);
+ GF_FREE (brickinfoiter);
+ brick_proc->brick_count--;
+ break;
+ }
+ }
+
+ /* If all bricks have been removed, delete the brick process */
+ if (brick_proc->brick_count == 0) {
+ ret = glusterd_brickprocess_delete (brick_proc);
+ if (ret)
+ goto out;
+ }
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brick_proc_t *brick_proc = NULL;
+ glusterd_brickinfo_t *brickinfo_dup = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, brickinfo, out);
+
+ ret = glusterd_brickinfo_new (&brickinfo_dup);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NEW_INFO_FAIL,
+ "Failed to create new brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_dup (brickinfo, brickinfo_dup);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_brick_proc_for_port (brickinfo->port, &brick_proc);
+ if (ret) {
+ ret = glusterd_brickprocess_new (&brick_proc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPROC_NEW_FAILED, "Failed to create "
+ "new brick process instance");
+ goto out;
+ }
+
+ brick_proc->port = brickinfo->port;
+
+ cds_list_add_tail (&brick_proc->brick_proc_list, &priv->brick_procs);
+ }
+
+ cds_list_add_tail (&brickinfo_dup->brick_list, &brick_proc->bricks);
+ brick_proc->brick_count++;
+out:
+ return ret;
+}
+
+/* ret = 0 only when you get a brick process associated with the port
+ * ret = -1 otherwise
+ */
+int
+glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brick_proc_t *brick_proc = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ cds_list_for_each_entry (brick_proc, &priv->brick_procs, brick_proc_list) {
+ if (brick_proc->port == port) {
+ *brickprocess = brick_proc;
+ ret = 0;
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
int32_t
glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
@@ -2118,6 +2388,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
ret = 0;
+ ret = glusterd_brick_process_remove_brick (brickinfo);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Couldn't remove brick from"
+ " brick process");
+ goto out;
+ }
+
if (del_brick)
cds_list_del_init (&brickinfo->brick_list);
@@ -2149,11 +2426,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
GF_FREE (op_errstr);
}
}
+
(void) glusterd_brick_disconnect (brickinfo);
ret = 0;
}
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
+
gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
(void) sys_unlink (pidfile);
@@ -2161,7 +2440,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
if (del_brick)
glusterd_delete_brick (volinfo, brickinfo);
-
out:
return ret;
}
@@ -5090,6 +5368,7 @@ attach_brick (xlator_t *this,
}
(void) build_volfile_path (full_id, path, sizeof(path), NULL);
+
for (tries = 15; tries > 0; --tries) {
rpc = rpc_clnt_ref (other_brick->rpc);
if (rpc) {
@@ -5105,6 +5384,23 @@ attach_brick (xlator_t *this,
brickinfo->status = GF_BRICK_STARTED;
brickinfo->rpc =
rpc_clnt_ref (other_brick->rpc);
+ ret = glusterd_brick_process_add_brick (brickinfo,
+ volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
+ "Adding brick %s:%s to brick "
+ "process failed", brickinfo->hostname,
+ brickinfo->path);
+ return ret;
+ }
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Add brick"
+ " to brick process failed");
+ return ret;
+ }
+
return 0;
}
}
@@ -5126,56 +5422,6 @@ attach_brick (xlator_t *this,
return ret;
}
-static gf_boolean_t
-unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
-{
- /*
- * Certain options are safe because they're already being handled other
- * ways, such as being copied down to the bricks (all auth options) or
- * being made irrelevant (event-threads). All others are suspect and
- * must be checked in the next function.
- */
- if (fnmatch ("*auth*", key, 0) == 0) {
- return _gf_false;
- }
-
- if (fnmatch ("*event-threads", key, 0) == 0) {
- return _gf_false;
- }
-
- return _gf_true;
-}
-
-static int
-opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
-{
- data_t *value2 = dict_get (dict2, key);
- int32_t min_len;
-
- /*
- * If the option is only present on one, we can either look at the
- * default or assume a mismatch. Looking at the default is pretty
- * hard, because that's part of a structure within each translator and
- * there's no dlopen interface to get at it, so we assume a mismatch.
- * If the user really wants them to match (and for their bricks to be
- * multiplexed, they can always reset the option).
- */
- if (!value2) {
- gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key);
- return -1;
- }
-
- min_len = MIN (value1->len, value2->len);
- if (strncmp (value1->data, value2->data, min_len) != 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "option mismatch, %s, %s != %s",
- key, value1->data, value2->data);
- return -1;
- }
-
- return 0;
-}
-
/* This name was just getting too long, hence the abbreviations. */
static glusterd_brickinfo_t *
find_compat_brick_in_vol (glusterd_conf_t *conf,
@@ -5184,10 +5430,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
glusterd_brickinfo_t *brickinfo)
{
xlator_t *this = THIS;
- glusterd_brickinfo_t *other_brick;
+ glusterd_brickinfo_t *other_brick = NULL;
+ glusterd_brick_proc_t *brick_proc = NULL;
char pidfile2[PATH_MAX] = {0};
int32_t pid2 = -1;
int16_t retries = 15;
+ int mux_limit = -1;
+ int ret = -1;
/*
* If comp_vol is provided, we have to check *volume* compatibility
@@ -5219,6 +5468,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
}
+ ret = get_mux_limit_per_process (&mux_limit);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0, "Retrieving brick mux "
+ "limit failed. Returning NULL");
+ return NULL;
+ }
+
cds_list_for_each_entry (other_brick, &srch_vol->bricks,
brick_list) {
if (other_brick == brickinfo) {
@@ -5232,6 +5488,30 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
continue;
}
+ ret = glusterd_brick_proc_for_port (other_brick->port,
+ &brick_proc);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0, "Couldn't get brick "
+ "process corresponding to brick %s:%s",
+ other_brick->hostname, other_brick->path);
+ continue;
+ }
+
+ if (mux_limit != -1) {
+ if (brick_proc->brick_count >= mux_limit)
+ continue;
+ } else {
+ /* This means that the "cluster.max-bricks-per-process"
+ * options hasn't yet been explicitly set. Continue
+ * as if there's no limit set
+ */
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_NO_MUX_LIMIT,
+ "cluster.max-bricks-per-process options isn't "
+ "set. Continuing with no limit set for "
+ "brick multiplexing.");
+ }
+
GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
conf);
@@ -5508,6 +5788,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
(void) glusterd_brick_connect (volinfo, brickinfo,
socketpath);
+
+ ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
+ "Adding brick %s:%s to brick process "
+ "failed.", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
}
return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 7a739c85ebd..cf50e82e849 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -147,6 +147,9 @@ gf_boolean_t
glusterd_check_volume_exists (char *volname);
int32_t
+glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess);
+
+int32_t
glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo);
int32_t
@@ -175,6 +178,16 @@ glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo);
int32_t
glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);
+int
+glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t *volinfo);
+
+int
+glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess);
+
int32_t
glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 95056d501a3..7254e281497 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2214,8 +2214,6 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
goto out;
}
- count = volinfo->brick_count;
-
ret = dict_get_str (dict, "bricks", &bricks);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -2364,6 +2362,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
free_ptr = brick_list;
}
+ count = volinfo->brick_count;
+
if (count)
brick = strtok_r (brick_list+1, " \n", &saveptr);
caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 629d18ff507..c1aa66cbffb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1000,6 +1000,38 @@ out:
return ret;
}
+static int
+validate_mux_limit (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ uint val = 0;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ if (!is_brick_mx_enabled()) {
+ gf_asprintf (op_errstr, "Brick-multiplexing is not enabled. "
+ "Please enable brick multiplexing before trying "
+ "to set this option.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_WRONG_OPTS_SETTING, "%s", *op_errstr);
+ goto out;
+ }
+
+ ret = gf_string2uint (value, &val);
+ if (ret) {
+ gf_asprintf (op_errstr, "%s is not a valid count. "
+ "%s expects an unsigned integer.", value, key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
+ }
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
static int
validate_boolean (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
@@ -3408,6 +3440,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_10_0,
.validate_fn = validate_boolean
},
+ { .key = GLUSTERD_BRICKMUX_LIMIT_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "1",
+ .op_version = GD_OP_VERSION_3_12_0,
+ .validate_fn = validate_mux_limit
+ },
{ .key = "disperse.optimistic-change-log",
.voltype = "cluster/disperse",
.type = NO_DOC,
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index 14c1c6ae942..f8a38f965a6 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1723,6 +1723,7 @@ init (xlator_t *this)
CDS_INIT_LIST_HEAD (&conf->volumes);
CDS_INIT_LIST_HEAD (&conf->snapshots);
CDS_INIT_LIST_HEAD (&conf->missed_snaps_list);
+ CDS_INIT_LIST_HEAD (&conf->brick_procs);
pthread_mutex_init (&conf->mutex, NULL);
conf->rpc = rpc;
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 9546a389900..b2141853db4 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -55,6 +55,8 @@
#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
#define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex"
+#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process"
+
#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256
#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90
#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100
@@ -154,6 +156,7 @@ typedef struct {
struct pmap_registry *pmap;
struct cds_list_head volumes;
struct cds_list_head snapshots; /*List of snap volumes */
+ struct cds_list_head brick_procs; /* List of brick processes */
pthread_mutex_t xprt_lock;
struct list_head xprt_list;
gf_store_handle_t *handle;
@@ -233,6 +236,15 @@ struct glusterd_brickinfo {
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
+struct glusterd_brick_proc {
+ int port;
+ uint32_t brick_count;
+ struct cds_list_head brick_proc_list;
+ struct cds_list_head bricks;
+};
+
+typedef struct glusterd_brick_proc glusterd_brick_proc_t;
+
struct gf_defrag_brickinfo_ {
char *name;
int files;