diff options
author | Samikshan Bairagya <samikshan@gmail.com> | 2017-06-02 10:12:12 +0530 |
---|---|---|
committer | Atin Mukherjee <amukherj@redhat.com> | 2017-07-10 04:33:19 +0000 |
commit | 9e8ee31e643b7fbf7d46092c395ea27aaeb82f6b (patch) | |
tree | c48e28158aa31462bba580a8ef38e01ce1a5af6d | |
parent | e304f48fa262e5cdbe181fb3fee5dfb9c893108c (diff) |
glusterd: Introduce option to limit no. of muxed bricks per process
This commit introduces a new global option that can be set to limit
the number of multiplexed bricks in one process.
Usage:
`# gluster volume set all cluster.max-bricks-per-process <value>`
If this option is not set then multiplexing will happen for now
with no limitations set; i.e. a brick process will have as many
bricks multiplexed to it as possible. In other words the current
multiplexing behaviour won't change if this option isn't set to
any value.
This commit also introduces a brick process instance that contains
information about brick processes, like the number of bricks
handled by the process (which is 1 in non-multiplexing cases), list
of bricks, and port number which also serves as an unique identifier
for each brick process instance. The brick process list is
maintained in 'glusterd_conf_t'.
Updates: #151
Change-Id: Ib987d14ab0a4f6034dac01b73a4b2839f7b0b695
Signed-off-by: Samikshan Bairagya <samikshan@gmail.com>
Reviewed-on: https://review.gluster.org/17469
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
-rw-r--r-- | tests/basic/multiplex.t | 2 | ||||
-rw-r--r-- | tests/bugs/core/multiplex-limit-issue-151.t | 57 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mem-types.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 34 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 394 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 13 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 38 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 12 |
13 files changed, 542 insertions, 58 deletions
diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t index ed8788ff3bb..2f0f462f14d 100644 --- a/tests/basic/multiplex.t +++ b/tests/basic/multiplex.t @@ -17,6 +17,8 @@ function count_brick_pids { | grep -v "N/A" | sort | uniq | wc -l } +cleanup + TEST glusterd TEST $CLI volume set all cluster.brick-multiplex on push_trapfunc "$CLI volume set all cluster.brick-multiplex off" diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t new file mode 100644 index 00000000000..9511756ecde --- /dev/null +++ b/tests/bugs/core/multiplex-limit-issue-151.t @@ -0,0 +1,57 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_up_bricks { + $CLI --xml volume status all | grep '<status>1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +function count_brick_pids { + $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ + | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +TEST glusterd + +TEST $CLI volume set all cluster.brick-multiplex on +TEST ! $CLI volume set all cluster.max-bricks-per-process -1 +TEST ! $CLI volume set all cluster.max-bricks-per-process foobar +TEST $CLI volume set all cluster.max-bricks-per-process 3 + +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/brick{0..5} +TEST $CLI volume start $V0 + +EXPECT 2 count_brick_processes +EXPECT 2 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + +pkill gluster +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + +TEST $CLI volume add-brick $V0 $H0:$B0/brick6 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks + +TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start +TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index ed171b69b66..33aac2f3dc8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -71,7 +71,8 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56, gf_gld_mt_local_peers_t = gf_common_mt_end + 57, - gf_gld_mt_end = gf_common_mt_end + 58, + gf_gld_mt_glusterd_brick_proc_t = gf_common_mt_end + 58, + gf_gld_mt_end = gf_common_mt_end + 59, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 9161d9058f0..14424d36890 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 602 +#define GLFS_NUM_MESSAGES 606 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4869,6 +4869,38 @@ */ #define GD_MSG_VOL_SET_VALIDATION_INFO (GLUSTERD_COMP_BASE + 602) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_NO_MUX_LIMIT (GLUSTERD_COMP_BASE + 603) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_REM_BRICK_FAILED (GLUSTERD_COMP_BASE + 604) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_ADD_BRICK_FAILED (GLUSTERD_COMP_BASE + 605) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 9eba6025427..6b12d603728 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -79,6 +79,10 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { */ { GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"}, { GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"}, + /* Set this value to 1 by default implying non-multiplexed behaviour. + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + { GLUSTERD_BRICKMUX_LIMIT_KEY, "1"}, { NULL }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index c3f25ebe84c..1fc7a250748 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -488,8 +488,8 @@ __gluster_pmap_signin (rpcsvc_request_t *req) { pmap_signin_req args = {0,}; pmap_signin_rsp rsp = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req); @@ -502,6 +502,7 @@ __gluster_pmap_signin (rpcsvc_request_t *req) GF_PMAP_PORT_BRICKSERVER, req->trans); ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo); + fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_pmap_signin_rsp); @@ -569,6 +570,22 @@ __gluster_pmap_signout (rpcsvc_request_t *req) * glusterd end when a brick is killed from the * backend */ brickinfo->status = GF_BRICK_STOPPED; + + /* Remove brick from brick process if not already + * removed in the brick op phase. This situation would + * arise when the brick is killed explicitly from the + * backend */ + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg_debug (this->name, 0, "Couldn't remove " + "brick %s:%s from brick process", + brickinfo->hostname, + brickinfo->path); + /* Ignore 'ret' here since the brick might + * have already been deleted in brick op phase + */ + ret = 0; + } } } diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index e5fba968c07..913b5946b6e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -957,6 +957,7 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, gd1_mgmt_brick_op_req *req = NULL; int ret = 0; xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; this = THIS; args.op_ret = -1; @@ -986,6 +987,23 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, GF_FREE (args.errstr); } + if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) { + if (args.op_ret == 0) { + brickinfo = pnode->node; + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_REM_BRICK_FAILED, + "Removing brick %s:%s from brick" + " process failed", + brickinfo->hostname, + brickinfo->path); + args.op_ret = ret; + goto out; + } + } + } + if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); if (ret) { @@ -1023,7 +1041,6 @@ out: dict_unref (args.dict); gd_brick_op_req_free (req); return args.op_ret; - } int32_t diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 7e50d597fc2..5edf9805f9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -117,6 +117,46 @@ is_brick_mx_enabled (void) return ret ? _gf_false: enabled; } +int +get_mux_limit_per_process (int *mux_limit) +{ + char *value = NULL; + int ret = -1; + int max_bricks_per_proc = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + if (!is_brick_mx_enabled()) { + max_bricks_per_proc = 1; + ret = 0; + goto out; + } + + ret = dict_get_str (priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, &value); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Can't get limit for number of bricks per brick " + "process from dict"); + ret = 0; + } else { + ret = gf_string2int (value, &max_bricks_per_proc); + if (ret) + goto out; + } +out: + *mux_limit = max_bricks_per_proc; + + gf_msg_debug ("glusterd", 0, "Mux limit set to %d bricks per process", *mux_limit); + + return ret; +} + extern struct volopt_map_entry glusterd_volopt_map[]; extern glusterd_all_vol_opts valid_all_vol_opts[]; @@ -972,6 +1012,33 @@ out: } int32_t +glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess) +{ + glusterd_brick_proc_t *new_brickprocess = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO (THIS->name, brickprocess, out); + + new_brickprocess = GF_CALLOC (1, sizeof(*new_brickprocess), + gf_gld_mt_glusterd_brick_proc_t); + + if (!new_brickprocess) + goto out; + + CDS_INIT_LIST_HEAD (&new_brickprocess->bricks); + CDS_INIT_LIST_HEAD (&new_brickprocess->brick_proc_list); + + new_brickprocess->brick_count = 0; + *brickprocess = new_brickprocess; + + ret = 0; + +out: + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + return ret; +} + +int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { glusterd_brickinfo_t *new_brickinfo = NULL; @@ -2033,6 +2100,15 @@ retry: goto out; } + ret = glusterd_brick_process_add_brick (brickinfo, volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s " + "to brick process failed.", brickinfo->hostname, + brickinfo->path); + goto out; + } + connect: ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); if (ret) { @@ -2096,6 +2172,200 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) return 0; } +static gf_boolean_t +unsafe_option (dict_t *this, char *key, data_t *value, void *arg) +{ + /* + * Certain options are safe because they're already being handled other + * ways, such as being copied down to the bricks (all auth options) or + * being made irrelevant (event-threads). All others are suspect and + * must be checked in the next function. + */ + if (fnmatch ("*auth*", key, 0) == 0) { + return _gf_false; + } + + if (fnmatch ("*event-threads", key, 0) == 0) { + return _gf_false; + } + + return _gf_true; +} + +static int +opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) +{ + data_t *value2 = dict_get (dict2, key); + int32_t min_len; + + /* + * If the option is only present on one, we can either look at the + * default or assume a mismatch. Looking at the default is pretty + * hard, because that's part of a structure within each translator and + * there's no dlopen interface to get at it, so we assume a mismatch. + * If the user really wants them to match (and for their bricks to be + * multiplexed, they can always reset the option). + */ + if (!value2) { + gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); + return -1; + } + + min_len = MIN (value1->len, value2->len); + if (strncmp (value1->data, value2->data, min_len) != 0) { + gf_log (THIS->name, GF_LOG_DEBUG, + "option mismatch, %s, %s != %s", + key, value1->data, value2->data); + return -1; + } + + return 0; +} + +int +glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc) +{ + cds_list_del_init (&brick_proc->brick_proc_list); + cds_list_del_init (&brick_proc->bricks); + + GF_FREE (brick_proc); + + return 0; +} + +int +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + glusterd_brickinfo_t *brickinfoiter = NULL; + glusterd_brick_proc_t *brick_proc_tmp = NULL; + glusterd_brickinfo_t *tmp = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + + cds_list_for_each_entry_safe (brick_proc, brick_proc_tmp, + &priv->brick_procs, brick_proc_list) { + if (brickinfo->port != brick_proc->port) { + continue; + } + + GF_VALIDATE_OR_GOTO (this->name, (brick_proc->brick_count > 0), out); + + cds_list_for_each_entry_safe (brickinfoiter, tmp, + &brick_proc->bricks, brick_list) { + if (strcmp (brickinfoiter->path, brickinfo->path) == 0) { + cds_list_del_init (&brickinfoiter->brick_list); + + GF_FREE (brickinfoiter->logfile); + GF_FREE (brickinfoiter); + brick_proc->brick_count--; + break; + } + } + + /* If all bricks have been removed, delete the brick process */ + if (brick_proc->brick_count == 0) { + ret = glusterd_brickprocess_delete (brick_proc); + if (ret) + goto out; + } + break; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + glusterd_brickinfo_t *brickinfo_dup = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + + ret = glusterd_brickinfo_new (&brickinfo_dup); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_BRICK_NEW_INFO_FAIL, + "Failed to create new brickinfo"); + goto out; + } + + ret = glusterd_brickinfo_dup (brickinfo, brickinfo_dup); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup brickinfo"); + goto out; + } + + ret = glusterd_brick_proc_for_port (brickinfo->port, &brick_proc); + if (ret) { + ret = glusterd_brickprocess_new (&brick_proc); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_NEW_FAILED, "Failed to create " + "new brick process instance"); + goto out; + } + + brick_proc->port = brickinfo->port; + + cds_list_add_tail (&brick_proc->brick_proc_list, &priv->brick_procs); + } + + cds_list_add_tail (&brickinfo_dup->brick_list, &brick_proc->bricks); + brick_proc->brick_count++; +out: + return ret; +} + +/* ret = 0 only when you get a brick process associated with the port + * ret = -1 otherwise + */ +int +glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + cds_list_for_each_entry (brick_proc, &priv->brick_procs, brick_proc_list) { + if (brick_proc->port == port) { + *brickprocess = brick_proc; + ret = 0; + break; + } + } +out: + return ret; +} + int32_t glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, @@ -2118,6 +2388,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, ret = 0; + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg_debug (this->name, 0, "Couldn't remove brick from" + " brick process"); + goto out; + } + if (del_brick) cds_list_del_init (&brickinfo->brick_list); @@ -2149,11 +2426,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, GF_FREE (op_errstr); } } + (void) glusterd_brick_disconnect (brickinfo); ret = 0; } GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); + gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile); (void) sys_unlink (pidfile); @@ -2161,7 +2440,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, if (del_brick) glusterd_delete_brick (volinfo, brickinfo); - out: return ret; } @@ -5090,6 +5368,7 @@ attach_brick (xlator_t *this, } (void) build_volfile_path (full_id, path, sizeof(path), NULL); + for (tries = 15; tries > 0; --tries) { rpc = rpc_clnt_ref (other_brick->rpc); if (rpc) { @@ -5105,6 +5384,23 @@ attach_brick (xlator_t *this, brickinfo->status = GF_BRICK_STARTED; brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); + ret = glusterd_brick_process_add_brick (brickinfo, + volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick " + "process failed", brickinfo->hostname, + brickinfo->path); + return ret; + } + + if (ret) { + gf_msg_debug (this->name, 0, "Add brick" + " to brick process failed"); + return ret; + } + return 0; } } @@ -5126,56 +5422,6 @@ attach_brick (xlator_t *this, return ret; } -static gf_boolean_t -unsafe_option (dict_t *this, char *key, data_t *value, void *arg) -{ - /* - * Certain options are safe because they're already being handled other - * ways, such as being copied down to the bricks (all auth options) or - * being made irrelevant (event-threads). All others are suspect and - * must be checked in the next function. - */ - if (fnmatch ("*auth*", key, 0) == 0) { - return _gf_false; - } - - if (fnmatch ("*event-threads", key, 0) == 0) { - return _gf_false; - } - - return _gf_true; -} - -static int -opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) -{ - data_t *value2 = dict_get (dict2, key); - int32_t min_len; - - /* - * If the option is only present on one, we can either look at the - * default or assume a mismatch. Looking at the default is pretty - * hard, because that's part of a structure within each translator and - * there's no dlopen interface to get at it, so we assume a mismatch. - * If the user really wants them to match (and for their bricks to be - * multiplexed, they can always reset the option). - */ - if (!value2) { - gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); - return -1; - } - - min_len = MIN (value1->len, value2->len); - if (strncmp (value1->data, value2->data, min_len) != 0) { - gf_log (THIS->name, GF_LOG_DEBUG, - "option mismatch, %s, %s != %s", - key, value1->data, value2->data); - return -1; - } - - return 0; -} - /* This name was just getting too long, hence the abbreviations. */ static glusterd_brickinfo_t * find_compat_brick_in_vol (glusterd_conf_t *conf, @@ -5184,10 +5430,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, glusterd_brickinfo_t *brickinfo) { xlator_t *this = THIS; - glusterd_brickinfo_t *other_brick; + glusterd_brickinfo_t *other_brick = NULL; + glusterd_brick_proc_t *brick_proc = NULL; char pidfile2[PATH_MAX] = {0}; int32_t pid2 = -1; int16_t retries = 15; + int mux_limit = -1; + int ret = -1; /* * If comp_vol is provided, we have to check *volume* compatibility @@ -5219,6 +5468,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, gf_log (THIS->name, GF_LOG_DEBUG, "all options match"); } + ret = get_mux_limit_per_process (&mux_limit); + if (ret) { + gf_msg_debug (THIS->name, 0, "Retrieving brick mux " + "limit failed. Returning NULL"); + return NULL; + } + cds_list_for_each_entry (other_brick, &srch_vol->bricks, brick_list) { if (other_brick == brickinfo) { @@ -5232,6 +5488,30 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, continue; } + ret = glusterd_brick_proc_for_port (other_brick->port, + &brick_proc); + if (ret) { + gf_msg_debug (THIS->name, 0, "Couldn't get brick " + "process corresponding to brick %s:%s", + other_brick->hostname, other_brick->path); + continue; + } + + if (mux_limit != -1) { + if (brick_proc->brick_count >= mux_limit) + continue; + } else { + /* This means that the "cluster.max-bricks-per-process" + * options hasn't yet been explicitly set. Continue + * as if there's no limit set + */ + gf_msg (THIS->name, GF_LOG_WARNING, 0, + GD_MSG_NO_MUX_LIMIT, + "cluster.max-bricks-per-process options isn't " + "set. Continuing with no limit set for " + "brick multiplexing."); + } + GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick, conf); @@ -5508,6 +5788,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, (void) glusterd_brick_connect (volinfo, brickinfo, socketpath); + + ret = glusterd_brick_process_add_brick (brickinfo, volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick process " + "failed.", brickinfo->hostname, + brickinfo->path); + goto out; + } } return 0; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 7a739c85ebd..cf50e82e849 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -147,6 +147,9 @@ gf_boolean_t glusterd_check_volume_exists (char *volname); int32_t +glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess); + +int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo); int32_t @@ -175,6 +178,16 @@ glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo); int32_t glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); +int +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo); + +int +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); + +int +glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess); + int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 95056d501a3..7254e281497 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2214,8 +2214,6 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } - count = volinfo->brick_count; - ret = dict_get_str (dict, "bricks", &bricks); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2364,6 +2362,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) free_ptr = brick_list; } + count = volinfo->brick_count; + if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 629d18ff507..c1aa66cbffb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1000,6 +1000,38 @@ out: return ret; } +static int +validate_mux_limit (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint val = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + if (!is_brick_mx_enabled()) { + gf_asprintf (op_errstr, "Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before trying " + "to set this option."); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_WRONG_OPTS_SETTING, "%s", *op_errstr); + goto out; + } + + ret = gf_string2uint (value, &val); + if (ret) { + gf_asprintf (op_errstr, "%s is not a valid count. " + "%s expects an unsigned integer.", value, key); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_INVALID_ENTRY, "%s", *op_errstr); + } +out: + gf_msg_debug ("glusterd", 0, "Returning %d", ret); + + return ret; +} static int validate_boolean (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, @@ -3408,6 +3440,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_10_0, .validate_fn = validate_boolean }, + { .key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = "1", + .op_version = GD_OP_VERSION_3_12_0, + .validate_fn = validate_mux_limit + }, { .key = "disperse.optimistic-change-log", .voltype = "cluster/disperse", .type = NO_DOC, diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 14c1c6ae942..f8a38f965a6 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1723,6 +1723,7 @@ init (xlator_t *this) CDS_INIT_LIST_HEAD (&conf->volumes); CDS_INIT_LIST_HEAD (&conf->snapshots); CDS_INIT_LIST_HEAD (&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD (&conf->brick_procs); pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 9546a389900..b2141853db4 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -55,6 +55,8 @@ #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" +#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" + #define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 #define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 #define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 @@ -154,6 +156,7 @@ typedef struct { struct pmap_registry *pmap; struct cds_list_head volumes; struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ pthread_mutex_t xprt_lock; struct list_head xprt_list; gf_store_handle_t *handle; @@ -233,6 +236,15 @@ struct glusterd_brickinfo { typedef struct glusterd_brickinfo glusterd_brickinfo_t; +struct glusterd_brick_proc { + int port; + uint32_t brick_count; + struct cds_list_head brick_proc_list; + struct cds_list_head bricks; +}; + +typedef struct glusterd_brick_proc glusterd_brick_proc_t; + struct gf_defrag_brickinfo_ { char *name; int files; |