diff options
-rw-r--r-- | tests/basic/multiplex.t | 2 | ||||
-rw-r--r-- | tests/bugs/core/multiplex-limit-issue-151.t | 57 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mem-types.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 34 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 394 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 13 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 38 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 12 |
13 files changed, 542 insertions, 58 deletions
diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t index ed8788ff3bb..2f0f462f14d 100644 --- a/tests/basic/multiplex.t +++ b/tests/basic/multiplex.t @@ -17,6 +17,8 @@ function count_brick_pids { | grep -v "N/A" | sort | uniq | wc -l } +cleanup + TEST glusterd TEST $CLI volume set all cluster.brick-multiplex on push_trapfunc "$CLI volume set all cluster.brick-multiplex off" diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t new file mode 100644 index 00000000000..9511756ecde --- /dev/null +++ b/tests/bugs/core/multiplex-limit-issue-151.t @@ -0,0 +1,57 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_up_bricks { + $CLI --xml volume status all | grep '<status>1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +function count_brick_pids { + $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ + | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +TEST glusterd + +TEST $CLI volume set all cluster.brick-multiplex on +TEST ! $CLI volume set all cluster.max-bricks-per-process -1 +TEST ! $CLI volume set all cluster.max-bricks-per-process foobar +TEST $CLI volume set all cluster.max-bricks-per-process 3 + +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/brick{0..5} +TEST $CLI volume start $V0 + +EXPECT 2 count_brick_processes +EXPECT 2 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + +pkill gluster +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + +TEST $CLI volume add-brick $V0 $H0:$B0/brick6 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks + +TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start +TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index ed171b69b66..33aac2f3dc8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -71,7 +71,8 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56, gf_gld_mt_local_peers_t = gf_common_mt_end + 57, - gf_gld_mt_end = gf_common_mt_end + 58, + gf_gld_mt_glusterd_brick_proc_t = gf_common_mt_end + 58, + gf_gld_mt_end = gf_common_mt_end + 59, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 9161d9058f0..14424d36890 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 602 +#define GLFS_NUM_MESSAGES 606 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4869,6 +4869,38 @@ */ #define GD_MSG_VOL_SET_VALIDATION_INFO (GLUSTERD_COMP_BASE + 602) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_NO_MUX_LIMIT (GLUSTERD_COMP_BASE + 603) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_REM_BRICK_FAILED (GLUSTERD_COMP_BASE + 604) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_ADD_BRICK_FAILED (GLUSTERD_COMP_BASE + 605) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 9eba6025427..6b12d603728 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -79,6 +79,10 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { */ { GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"}, { GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"}, + /* Set this value to 1 by default implying non-multiplexed behaviour. + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + { GLUSTERD_BRICKMUX_LIMIT_KEY, "1"}, { NULL }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index c3f25ebe84c..1fc7a250748 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -488,8 +488,8 @@ __gluster_pmap_signin (rpcsvc_request_t *req) { pmap_signin_req args = {0,}; pmap_signin_rsp rsp = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req); @@ -502,6 +502,7 @@ __gluster_pmap_signin (rpcsvc_request_t *req) GF_PMAP_PORT_BRICKSERVER, req->trans); ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo); + fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_pmap_signin_rsp); @@ -569,6 +570,22 @@ __gluster_pmap_signout (rpcsvc_request_t *req) * glusterd end when a brick is killed from the * backend */ brickinfo->status = GF_BRICK_STOPPED; + + /* Remove brick from brick process if not already + * removed in the brick op phase. This situation would + * arise when the brick is killed explicitly from the + * backend */ + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg_debug (this->name, 0, "Couldn't remove " + "brick %s:%s from brick process", + brickinfo->hostname, + brickinfo->path); + /* Ignore 'ret' here since the brick might + * have already been deleted in brick op phase + */ + ret = 0; + } } } diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index e5fba968c07..913b5946b6e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -957,6 +957,7 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, gd1_mgmt_brick_op_req *req = NULL; int ret = 0; xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; this = THIS; args.op_ret = -1; @@ -986,6 +987,23 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, GF_FREE (args.errstr); } + if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) { + if (args.op_ret == 0) { + brickinfo = pnode->node; + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_REM_BRICK_FAILED, + "Removing brick %s:%s from brick" + " process failed", + brickinfo->hostname, + brickinfo->path); + args.op_ret = ret; + goto out; + } + } + } + if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); if (ret) { @@ -1023,7 +1041,6 @@ out: dict_unref (args.dict); gd_brick_op_req_free (req); return args.op_ret; - } int32_t diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 7e50d597fc2..5edf9805f9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -117,6 +117,46 @@ is_brick_mx_enabled (void) return ret ? _gf_false: enabled; } +int +get_mux_limit_per_process (int *mux_limit) +{ + char *value = NULL; + int ret = -1; + int max_bricks_per_proc = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + if (!is_brick_mx_enabled()) { + max_bricks_per_proc = 1; + ret = 0; + goto out; + } + + ret = dict_get_str (priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, &value); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Can't get limit for number of bricks per brick " + "process from dict"); + ret = 0; + } else { + ret = gf_string2int (value, &max_bricks_per_proc); + if (ret) + goto out; + } +out: + *mux_limit = max_bricks_per_proc; + + gf_msg_debug ("glusterd", 0, "Mux limit set to %d bricks per process", *mux_limit); + + return ret; +} + extern struct volopt_map_entry glusterd_volopt_map[]; extern glusterd_all_vol_opts valid_all_vol_opts[]; @@ -972,6 +1012,33 @@ out: } int32_t +glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess) +{ + glusterd_brick_proc_t *new_brickprocess = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO (THIS->name, brickprocess, out); + + new_brickprocess = GF_CALLOC (1, sizeof(*new_brickprocess), + gf_gld_mt_glusterd_brick_proc_t); + + if (!new_brickprocess) + goto out; + + CDS_INIT_LIST_HEAD (&new_brickprocess->bricks); + CDS_INIT_LIST_HEAD (&new_brickprocess->brick_proc_list); + + new_brickprocess->brick_count = 0; + *brickprocess = new_brickprocess; + + ret = 0; + +out: + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + return ret; +} + +int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { glusterd_brickinfo_t *new_brickinfo = NULL; @@ -2033,6 +2100,15 @@ retry: goto out; } + ret = glusterd_brick_process_add_brick (brickinfo, volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s " + "to brick process failed.", brickinfo->hostname, + brickinfo->path); + goto out; + } + connect: ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); if (ret) { @@ -2096,6 +2172,200 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) return 0; } +static gf_boolean_t +unsafe_option (dict_t *this, char *key, data_t *value, void *arg) +{ + /* + * Certain options are safe because they're already being handled other + * ways, such as being copied down to the bricks (all auth options) or + * being made irrelevant (event-threads). All others are suspect and + * must be checked in the next function. + */ + if (fnmatch ("*auth*", key, 0) == 0) { + return _gf_false; + } + + if (fnmatch ("*event-threads", key, 0) == 0) { + return _gf_false; + } + + return _gf_true; +} + +static int +opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) +{ + data_t *value2 = dict_get (dict2, key); + int32_t min_len; + + /* + * If the option is only present on one, we can either look at the + * default or assume a mismatch. Looking at the default is pretty + * hard, because that's part of a structure within each translator and + * there's no dlopen interface to get at it, so we assume a mismatch. + * If the user really wants them to match (and for their bricks to be + * multiplexed, they can always reset the option). + */ + if (!value2) { + gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); + return -1; + } + + min_len = MIN (value1->len, value2->len); + if (strncmp (value1->data, value2->data, min_len) != 0) { + gf_log (THIS->name, GF_LOG_DEBUG, + "option mismatch, %s, %s != %s", + key, value1->data, value2->data); + return -1; + } + + return 0; +} + +int +glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc) +{ + cds_list_del_init (&brick_proc->brick_proc_list); + cds_list_del_init (&brick_proc->bricks); + + GF_FREE (brick_proc); + + return 0; +} + +int +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + glusterd_brickinfo_t *brickinfoiter = NULL; + glusterd_brick_proc_t *brick_proc_tmp = NULL; + glusterd_brickinfo_t *tmp = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + + cds_list_for_each_entry_safe (brick_proc, brick_proc_tmp, + &priv->brick_procs, brick_proc_list) { + if (brickinfo->port != brick_proc->port) { + continue; + } + + GF_VALIDATE_OR_GOTO (this->name, (brick_proc->brick_count > 0), out); + + cds_list_for_each_entry_safe (brickinfoiter, tmp, + &brick_proc->bricks, brick_list) { + if (strcmp (brickinfoiter->path, brickinfo->path) == 0) { + cds_list_del_init (&brickinfoiter->brick_list); + + GF_FREE (brickinfoiter->logfile); + GF_FREE (brickinfoiter); + brick_proc->brick_count--; + break; + } + } + + /* If all bricks have been removed, delete the brick process */ + if (brick_proc->brick_count == 0) { + ret = glusterd_brickprocess_delete (brick_proc); + if (ret) + goto out; + } + break; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + glusterd_brickinfo_t *brickinfo_dup = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + + ret = glusterd_brickinfo_new (&brickinfo_dup); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_BRICK_NEW_INFO_FAIL, + "Failed to create new brickinfo"); + goto out; + } + + ret = glusterd_brickinfo_dup (brickinfo, brickinfo_dup); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup brickinfo"); + goto out; + } + + ret = glusterd_brick_proc_for_port (brickinfo->port, &brick_proc); + if (ret) { + ret = glusterd_brickprocess_new (&brick_proc); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_NEW_FAILED, "Failed to create " + "new brick process instance"); + goto out; + } + + brick_proc->port = brickinfo->port; + + cds_list_add_tail (&brick_proc->brick_proc_list, &priv->brick_procs); + } + + cds_list_add_tail (&brickinfo_dup->brick_list, &brick_proc->bricks); + brick_proc->brick_count++; +out: + return ret; +} + +/* ret = 0 only when you get a brick process associated with the port + * ret = -1 otherwise + */ +int +glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + cds_list_for_each_entry (brick_proc, &priv->brick_procs, brick_proc_list) { + if (brick_proc->port == port) { + *brickprocess = brick_proc; + ret = 0; + break; + } + } +out: + return ret; +} + int32_t glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, @@ -2118,6 +2388,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, ret = 0; + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg_debug (this->name, 0, "Couldn't remove brick from" + " brick process"); + goto out; + } + if (del_brick) cds_list_del_init (&brickinfo->brick_list); @@ -2149,11 +2426,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, GF_FREE (op_errstr); } } + (void) glusterd_brick_disconnect (brickinfo); ret = 0; } GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); + gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile); (void) sys_unlink (pidfile); @@ -2161,7 +2440,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, if (del_brick) glusterd_delete_brick (volinfo, brickinfo); - out: return ret; } @@ -5090,6 +5368,7 @@ attach_brick (xlator_t *this, } (void) build_volfile_path (full_id, path, sizeof(path), NULL); + for (tries = 15; tries > 0; --tries) { rpc = rpc_clnt_ref (other_brick->rpc); if (rpc) { @@ -5105,6 +5384,23 @@ attach_brick (xlator_t *this, brickinfo->status = GF_BRICK_STARTED; brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); + ret = glusterd_brick_process_add_brick (brickinfo, + volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick " + "process failed", brickinfo->hostname, + brickinfo->path); + return ret; + } + + if (ret) { + gf_msg_debug (this->name, 0, "Add brick" + " to brick process failed"); + return ret; + } + return 0; } } @@ -5126,56 +5422,6 @@ attach_brick (xlator_t *this, return ret; } -static gf_boolean_t -unsafe_option (dict_t *this, char *key, data_t *value, void *arg) -{ - /* - * Certain options are safe because they're already being handled other - * ways, such as being copied down to the bricks (all auth options) or - * being made irrelevant (event-threads). All others are suspect and - * must be checked in the next function. - */ - if (fnmatch ("*auth*", key, 0) == 0) { - return _gf_false; - } - - if (fnmatch ("*event-threads", key, 0) == 0) { - return _gf_false; - } - - return _gf_true; -} - -static int -opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) -{ - data_t *value2 = dict_get (dict2, key); - int32_t min_len; - - /* - * If the option is only present on one, we can either look at the - * default or assume a mismatch. Looking at the default is pretty - * hard, because that's part of a structure within each translator and - * there's no dlopen interface to get at it, so we assume a mismatch. - * If the user really wants them to match (and for their bricks to be - * multiplexed, they can always reset the option). - */ - if (!value2) { - gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); - return -1; - } - - min_len = MIN (value1->len, value2->len); - if (strncmp (value1->data, value2->data, min_len) != 0) { - gf_log (THIS->name, GF_LOG_DEBUG, - "option mismatch, %s, %s != %s", - key, value1->data, value2->data); - return -1; - } - - return 0; -} - /* This name was just getting too long, hence the abbreviations. */ static glusterd_brickinfo_t * find_compat_brick_in_vol (glusterd_conf_t *conf, @@ -5184,10 +5430,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, glusterd_brickinfo_t *brickinfo) { xlator_t *this = THIS; - glusterd_brickinfo_t *other_brick; + glusterd_brickinfo_t *other_brick = NULL; + glusterd_brick_proc_t *brick_proc = NULL; char pidfile2[PATH_MAX] = {0}; int32_t pid2 = -1; int16_t retries = 15; + int mux_limit = -1; + int ret = -1; /* * If comp_vol is provided, we have to check *volume* compatibility @@ -5219,6 +5468,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, gf_log (THIS->name, GF_LOG_DEBUG, "all options match"); } + ret = get_mux_limit_per_process (&mux_limit); + if (ret) { + gf_msg_debug (THIS->name, 0, "Retrieving brick mux " + "limit failed. Returning NULL"); + return NULL; + } + cds_list_for_each_entry (other_brick, &srch_vol->bricks, brick_list) { if (other_brick == brickinfo) { @@ -5232,6 +5488,30 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, continue; } + ret = glusterd_brick_proc_for_port (other_brick->port, + &brick_proc); + if (ret) { + gf_msg_debug (THIS->name, 0, "Couldn't get brick " + "process corresponding to brick %s:%s", + other_brick->hostname, other_brick->path); + continue; + } + + if (mux_limit != -1) { + if (brick_proc->brick_count >= mux_limit) + continue; + } else { + /* This means that the "cluster.max-bricks-per-process" + * options hasn't yet been explicitly set. Continue + * as if there's no limit set + */ + gf_msg (THIS->name, GF_LOG_WARNING, 0, + GD_MSG_NO_MUX_LIMIT, + "cluster.max-bricks-per-process options isn't " + "set. Continuing with no limit set for " + "brick multiplexing."); + } + GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick, conf); @@ -5508,6 +5788,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, (void) glusterd_brick_connect (volinfo, brickinfo, socketpath); + + ret = glusterd_brick_process_add_brick (brickinfo, volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick process " + "failed.", brickinfo->hostname, + brickinfo->path); + goto out; + } } return 0; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 7a739c85ebd..cf50e82e849 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -147,6 +147,9 @@ gf_boolean_t glusterd_check_volume_exists (char *volname); int32_t +glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess); + +int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo); int32_t @@ -175,6 +178,16 @@ glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo); int32_t glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); +int +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo); + +int +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); + +int +glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess); + int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 95056d501a3..7254e281497 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2214,8 +2214,6 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } - count = volinfo->brick_count; - ret = dict_get_str (dict, "bricks", &bricks); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2364,6 +2362,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) free_ptr = brick_list; } + count = volinfo->brick_count; + if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 629d18ff507..c1aa66cbffb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1000,6 +1000,38 @@ out: return ret; } +static int +validate_mux_limit (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint val = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + if (!is_brick_mx_enabled()) { + gf_asprintf (op_errstr, "Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before trying " + "to set this option."); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_WRONG_OPTS_SETTING, "%s", *op_errstr); + goto out; + } + + ret = gf_string2uint (value, &val); + if (ret) { + gf_asprintf (op_errstr, "%s is not a valid count. " + "%s expects an unsigned integer.", value, key); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_INVALID_ENTRY, "%s", *op_errstr); + } +out: + gf_msg_debug ("glusterd", 0, "Returning %d", ret); + + return ret; +} static int validate_boolean (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, @@ -3408,6 +3440,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_10_0, .validate_fn = validate_boolean }, + { .key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = "1", + .op_version = GD_OP_VERSION_3_12_0, + .validate_fn = validate_mux_limit + }, { .key = "disperse.optimistic-change-log", .voltype = "cluster/disperse", .type = NO_DOC, diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 14c1c6ae942..f8a38f965a6 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1723,6 +1723,7 @@ init (xlator_t *this) CDS_INIT_LIST_HEAD (&conf->volumes); CDS_INIT_LIST_HEAD (&conf->snapshots); CDS_INIT_LIST_HEAD (&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD (&conf->brick_procs); pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 9546a389900..b2141853db4 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -55,6 +55,8 @@ #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" +#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" + #define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 #define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 #define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 @@ -154,6 +156,7 @@ typedef struct { struct pmap_registry *pmap; struct cds_list_head volumes; struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ pthread_mutex_t xprt_lock; struct list_head xprt_list; gf_store_handle_t *handle; @@ -233,6 +236,15 @@ struct glusterd_brickinfo { typedef struct glusterd_brickinfo glusterd_brickinfo_t; +struct glusterd_brick_proc { + int port; + uint32_t brick_count; + struct cds_list_head brick_proc_list; + struct cds_list_head bricks; +}; + +typedef struct glusterd_brick_proc glusterd_brick_proc_t; + struct gf_defrag_brickinfo_ { char *name; int files; |