summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-12-08 16:24:15 -0500
committerVijay Bellur <vbellur@redhat.com>2017-01-30 19:13:58 -0500
commit1a95fc3036db51b82b6a80952f0908bc2019d24a (patch)
treeb983ac196a8165d5cb5e860a5ef97d3e9a41b5c9 /xlators/mgmt
parent7f7d7a939e46b330a084d974451eee4757ba61b4 (diff)
core: run many bricks within one glusterfsd process
This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb BUG: 1385758 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/14763 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c12
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c127
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c171
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c68
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c613
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h10
18 files changed, 955 insertions, 234 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 938663ba863..c78fbd8345c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -2905,18 +2905,24 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
if (cmd == GF_OP_CMD_DETACH_START)
defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
+ /*
+ * We need to set this *before* we issue commands to the
+ * bricks, or else we might end up setting it after the bricks
+ * have responded. If we fail to send the request(s) we'll
+ * clear it ourselves because nobody else will.
+ */
+ volinfo->decommission_in_progress = 1;
ret = glusterd_handle_defrag_start
(volinfo, err_str, sizeof (err_str),
defrag_cmd,
glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
- if (!ret)
- volinfo->decommission_in_progress = 1;
-
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_REBALANCE_START_FAIL,
"failed to start the rebalance");
+ /* TBD: shouldn't we do more than print a message? */
+ volinfo->decommission_in_progress = 0;
}
} else {
if (GLUSTERD_STATUS_STARTED == volinfo->status)
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 364623317ef..b6f0197aa19 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -3365,7 +3365,8 @@ int
glusterd_rpc_create (struct rpc_clnt **rpc,
dict_t *options,
rpc_clnt_notify_t notify_fn,
- void *notify_data)
+ void *notify_data,
+ gf_boolean_t force)
{
struct rpc_clnt *new_rpc = NULL;
int ret = -1;
@@ -3376,6 +3377,11 @@ glusterd_rpc_create (struct rpc_clnt **rpc,
GF_ASSERT (options);
+ if (force && rpc && *rpc) {
+ (void) rpc_clnt_unref (*rpc);
+ *rpc = NULL;
+ }
+
/* TODO: is 32 enough? or more ? */
new_rpc = rpc_clnt_new (options, this, this->name, 16);
if (!new_rpc)
@@ -3531,7 +3537,8 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
}
ret = glusterd_rpc_create (&peerinfo->rpc, options,
- glusterd_peer_rpc_notify, peerctx);
+ glusterd_peer_rpc_notify, peerctx,
+ _gf_false);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_RPC_CREATE_FAIL,
@@ -4638,6 +4645,7 @@ gd_is_global_option (char *opt_key)
return (strcmp (opt_key, GLUSTERD_SHARED_STORAGE_KEY) == 0 ||
strcmp (opt_key, GLUSTERD_QUORUM_RATIO_KEY) == 0 ||
strcmp (opt_key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0 ||
+ strcmp (opt_key, GLUSTERD_BRICK_MULTIPLEX_KEY) == 0 ||
strcmp (opt_key, GLUSTERD_MAX_OP_VERSION_KEY) == 0);
out:
@@ -5308,8 +5316,6 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
count, brickinfo->rdma_port);
fprintf (fp, "Volume%d.Brick%d.status: %s\n", count_bkp,
count, brickinfo->status ? "Started" : "Stopped");
- fprintf (fp, "Volume%d.Brick%d.signedin: %s\n", count_bkp,
- count, brickinfo->signed_in ? "True" : "False");
/*FIXME: This is a hacky way of figuring out whether a
* brick belongs to the hot or cold tier */
@@ -5495,6 +5501,9 @@ __glusterd_handle_get_state (rpcsvc_request_t *req)
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
GF_VALIDATE_OR_GOTO (this->name, req, out);
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DAEMON_STATE_REQ_RCVD,
+ "Received request to get state for glusterd");
+
ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
snprintf (err_str, sizeof (err_str), "Failed to decode "
@@ -5525,14 +5534,17 @@ __glusterd_handle_get_state (rpcsvc_request_t *req)
}
}
- gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DAEMON_STATE_REQ_RCVD,
- "Received request to get state for glusterd");
-
ret = glusterd_get_state (req, dict);
out:
- if (dict)
+ if (dict && ret) {
+ /*
+ * When glusterd_to_cli (called from glusterd_get_state)
+ * succeeds, it frees the dict for us, so this would be a
+ * double free, but in other cases it's our responsibility.
+ */
dict_unref (dict);
+ }
return ret;
}
@@ -5658,6 +5670,20 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
case RPC_CLNT_DISCONNECT:
rpc_clnt_unset_connected (&rpc->conn);
+ if (rpc != brickinfo->rpc) {
+ /*
+ * There used to be a bunch of races in the volume
+ * start/stop code that could result in us getting here
+ * and setting the brick status incorrectly. Many of
+ * those have been fixed or avoided, but just in case
+ * any are still left it doesn't hurt to keep the extra
+ * check and avoid further damage.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "got disconnect from stale rpc on %s",
+ brickinfo->path);
+ break;
+ }
if (glusterd_is_brick_started (brickinfo)) {
gf_msg (this->name, GF_LOG_INFO, 0,
GD_MSG_BRICK_DISCONNECTED,
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index c1392734d79..96d39f03007 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -178,7 +178,7 @@ out:
return ret;
}
-static size_t
+size_t
build_volfile_path (char *volume_id, char *path,
size_t path_len, char *trusted_str)
{
@@ -841,6 +841,7 @@ __server_getspec (rpcsvc_request_t *req)
peerinfo = &req->trans->peerinfo;
volume = args.key;
+
/* Need to strip leading '/' from volnames. This was introduced to
* support nfs style mount parameters for native gluster mount
*/
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 00de88f4e36..5f1339cb5fd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -28,7 +28,7 @@
* - Append to the list of messages defined, towards the end
* - Retain macro naming as glfs_msg_X (for redability across developers)
* NOTE: Rules for message format modifications
- * 3) Check acorss the code if the message ID macro in question is reused
+ * 3) Check across the code if the message ID macro in question is reused
* anywhere. If reused then then the modifications should ensure correctness
* everywhere, or needs a new message ID as (1) above was not adhered to. If
* not used anywhere, proceed with the required modification.
@@ -41,7 +41,7 @@
#define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD
-#define GLFS_NUM_MESSAGES 595
+#define GLFS_NUM_MESSAGES 597
#define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
@@ -4817,5 +4817,18 @@
*/
/*------------*/
+
+#define GD_MSG_BRICK_MX_SET_FAIL (GLUSTERD_COMP_BASE + 596)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define GD_MSG_NO_SIG_TO_PID_ZERO (GLUSTERD_COMP_BASE + 597)
+
+/*------------*/
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index b24e91a457c..d9b18e00195 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -58,16 +58,27 @@ static int
glusterd_set_shared_storage (dict_t *dict, char *key, char *value,
char **op_errstr);
-/* Valid options for all volumes to be listed in the *
- * valid_all_vol_opts table. To add newer options to *
- * all volumes, we can just add more entries to this *
- * table *
+/*
+ * Valid options for all volumes to be listed in the valid_all_vol_opts table.
+ * To add newer options to all volumes, we can just add more entries to this
+ * table.
+ *
+ * It's important that every value have a default, or have a special handler
+ * in glusterd_get_global_options_for_all_vols, or else we might crash there.
*/
glusterd_all_vol_opts valid_all_vol_opts[] = {
- { GLUSTERD_QUORUM_RATIO_KEY },
- { GLUSTERD_SHARED_STORAGE_KEY },
- { GLUSTERD_GLOBAL_OP_VERSION_KEY },
- { GLUSTERD_MAX_OP_VERSION_KEY },
+ { GLUSTERD_QUORUM_RATIO_KEY, "0" },
+ { GLUSTERD_SHARED_STORAGE_KEY, "disable" },
+ /* This one actually gets filled in dynamically. */
+ { GLUSTERD_GLOBAL_OP_VERSION_KEY, "BUG_NO_OP_VERSION"},
+ /*
+ * This one should be filled in dynamically, but it didn't used to be
+ * (before the defaults were added here) so the value is unclear.
+ *
+ * TBD: add a dynamic handler to set the appropriate value
+ */
+ { GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"},
+ { GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"},
{ NULL },
};
@@ -557,7 +568,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
if (!brick_req)
goto out;
brick_req->op = GLUSTERD_BRICK_TERMINATE;
- brick_req->name = "";
+ brick_req->name = brickinfo->path;
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPING);
break;
case GD_OP_PROFILE_VOLUME:
@@ -618,28 +629,13 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
break;
case GD_OP_SNAP:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
- goto out;
-
- brick_req->op = GLUSTERD_BRICK_BARRIER;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- brick_req->name = gf_strdup (volname);
-
- break;
case GD_OP_BARRIER:
brick_req = GF_CALLOC (1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
if (!brick_req)
goto out;
brick_req->op = GLUSTERD_BRICK_BARRIER;
- ret = dict_get_str(dict, "volname", &volname);
- if (ret)
- goto out;
- brick_req->name = gf_strdup (volname);
+ brick_req->name = brickinfo->path;
break;
default:
@@ -754,6 +750,17 @@ out:
}
static int
+glusterd_validate_brick_mx_options (xlator_t *this, char *fullkey, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
+
+ //Placeholder function for now
+
+ return ret;
+}
+
+static int
glusterd_validate_shared_storage (char *key, char *value, char *errstr)
{
int32_t ret = -1;
@@ -1191,6 +1198,11 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
if (ret)
goto out;
+ ret = glusterd_validate_brick_mx_options (this, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+
local_key_op_version = glusterd_get_op_version_for_key (key);
if (local_key_op_version > local_new_op_version)
local_new_op_version = local_key_op_version;
@@ -2351,6 +2363,33 @@ out:
}
static int
+glusterd_set_brick_mx_opts (dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, key, out);
+ GF_VALIDATE_OR_GOTO (this->name, value, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = 0;
+
+ priv = this->private;
+
+ if (!strcmp (key, GLUSTERD_BRICK_MULTIPLEX_KEY)) {
+ ret = dict_set_dynstr (priv->opts, key, gf_strdup (value));
+ }
+
+out:
+ return ret;
+}
+
+static int
glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,
char **op_errstr)
{
@@ -2399,6 +2438,14 @@ glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,
goto out;
}
+ ret = glusterd_set_brick_mx_opts (dict, key, value, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MX_SET_FAIL,
+ "Failed to set brick multiplexing option");
+ goto out;
+ }
+
/* If the key is cluster.op-version, set conf->op_version to the value
* if needed and save it.
*/
@@ -2629,6 +2676,7 @@ out:
}
+
static int
glusterd_op_set_volume (dict_t *dict, char **errstr)
{
@@ -6094,6 +6142,8 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_pending_node_t *pending_node = NULL;
+ glusterd_conf_t *conf = THIS->private;
+ char pidfile[1024];
ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
if (ret)
@@ -6122,6 +6172,18 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
selected);
pending_node = NULL;
}
+ /*
+ * This is not really the right place to do it, but
+ * it's the most convenient.
+ * TBD: move this to *after* the RPC
+ */
+ brickinfo->status = GF_BRICK_STOPPED;
+ brickinfo->started_here = _gf_false;
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ brickinfo, conf);
+ gf_log (THIS->name, GF_LOG_INFO,
+ "unlinking pidfile %s", pidfile);
+ (void) sys_unlink (pidfile);
}
}
@@ -6144,7 +6206,8 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
glusterd_pending_node_t *pending_node = NULL;
int32_t command = 0;
int32_t force = 0;
-
+ glusterd_conf_t *conf = THIS->private;
+ char pidfile[1024];
ret = dict_get_str (dict, "volname", &volname);
@@ -6218,6 +6281,18 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
selected);
pending_node = NULL;
}
+ /*
+ * This is not really the right place to do it, but
+ * it's the most convenient.
+ * TBD: move this to *after* the RPC
+ */
+ brickinfo->status = GF_BRICK_STOPPED;
+ brickinfo->started_here = _gf_false;
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ brickinfo, conf);
+ gf_log (THIS->name, GF_LOG_INFO,
+ "unlinking pidfile %s", pidfile);
+ (void) sys_unlink (pidfile);
}
i++;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 142f7ba89f7..48275c57e12 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -166,7 +166,8 @@ typedef enum cli_cmd_type_ {
} cli_cmd_type;
typedef struct glusterd_all_volume_options {
- char *option;
+ char *option;
+ char *dflt_val;
} glusterd_all_vol_opts;
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index 2c27473f190..2e87ff6ecdf 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -93,25 +93,21 @@ pmap_registry_get (xlator_t *this)
}
-static char*
-nextword (char *str)
-{
- while (*str && !isspace (*str))
- str++;
- while (*str && isspace (*str))
- str++;
-
- return str;
-}
-
+/*
+ * The "destroy" argument avoids a double search in pmap_registry_remove - one
+ * to find the entry in the table, and the other to find the particular
+ * brickname within that entry (which might cover multiple bricks). We do the
+ * actual deletion here by "whiting out" the brick name with spaces. It's up
+ * to pmap_registry_remove to figure out what to do from there.
+ */
int
pmap_registry_search (xlator_t *this, const char *brickname,
- gf_pmap_port_type_t type)
+ gf_pmap_port_type_t type, gf_boolean_t destroy)
{
struct pmap_registry *pmap = NULL;
int p = 0;
char *brck = NULL;
- char *nbrck = NULL;
+ size_t i;
pmap = pmap_registry_get (this);
@@ -119,13 +115,38 @@ pmap_registry_search (xlator_t *this, const char *brickname,
if (!pmap->ports[p].brickname || pmap->ports[p].type != type)
continue;
- for (brck = pmap->ports[p].brickname;;) {
- nbrck = strtail (brck, brickname);
- if (nbrck && (!*nbrck || isspace (*nbrck)))
- return p;
- brck = nextword (brck);
- if (!*brck)
+ brck = pmap->ports[p].brickname;
+ for (;;) {
+ for (i = 0; brck[i] && !isspace (brck[i]); ++i)
+ ;
+ if (!i) {
break;
+ }
+ if (strncmp (brck, brickname, i) == 0) {
+ /*
+ * Without this check, we'd break when brck
+ * is merely a substring of brickname.
+ */
+ if (brickname[i] == '\0') {
+ if (destroy) do {
+ *(brck++) = ' ';
+ } while (--i);
+ return p;
+ }
+ }
+ brck += i;
+ /*
+ * Skip over *any* amount of whitespace, including
+ * none (if we're already at the end of the string).
+ */
+ while (isspace (*brck))
+ ++brck;
+ /*
+ * We're either at the end of the string (which will be
+ * handled above strncmp on the next iteration) or at
+ * the next non-whitespace substring (which will be
+ * handled by strncmp itself).
+ */
}
}
@@ -240,8 +261,13 @@ pmap_registry_bind (xlator_t *this, int port, const char *brickname,
p = port;
pmap->ports[p].type = type;
- free (pmap->ports[p].brickname);
- pmap->ports[p].brickname = strdup (brickname);
+ if (pmap->ports[p].brickname) {
+ char *tmp = pmap->ports[p].brickname;
+ asprintf (&pmap->ports[p].brickname, "%s %s", tmp, brickname);
+ free (tmp);
+ } else {
+ pmap->ports[p].brickname = strdup (brickname);
+ }
pmap->ports[p].type = type;
pmap->ports[p].xprt = xprt;
@@ -256,12 +282,69 @@ out:
}
int
+pmap_registry_extend (xlator_t *this, int port, const char *brickname)
+{
+ struct pmap_registry *pmap = NULL;
+ char *old_bn;
+ char *new_bn;
+ size_t bn_len;
+ char *entry;
+ int found = 0;
+
+ pmap = pmap_registry_get (this);
+
+ if (port > GF_PORT_MAX) {
+ return -1;
+ }
+
+ switch (pmap->ports[port].type) {
+ case GF_PMAP_PORT_LEASED:
+ case GF_PMAP_PORT_BRICKSERVER:
+ break;
+ default:
+ return -1;
+ }
+
+ old_bn = pmap->ports[port].brickname;
+ if (old_bn) {
+ bn_len = strlen(brickname);
+ entry = strstr (old_bn, brickname);
+ while (entry) {
+ found = 1;
+ if ((entry != old_bn) && (entry[-1] != ' ')) {
+ found = 0;
+ }
+ if ((entry[bn_len] != ' ') && (entry[bn_len] != '\0')) {
+ found = 0;
+ }
+ if (found) {
+ return 0;
+ }
+ entry = strstr (entry + bn_len, brickname);
+ }
+ asprintf (&new_bn, "%s %s", old_bn, brickname);
+ } else {
+ new_bn = strdup (brickname);
+ }
+
+ if (!new_bn) {
+ return -1;
+ }
+
+ pmap->ports[port].brickname = new_bn;
+ free (old_bn);
+
+ return 0;
+}
+
+int
pmap_registry_remove (xlator_t *this, int port, const char *brickname,
gf_pmap_port_type_t type, void *xprt)
{
struct pmap_registry *pmap = NULL;
int p = 0;
glusterd_conf_t *priv = NULL;
+ char *brick_str;
priv = this->private;
pmap = priv->pmap;
@@ -277,7 +360,7 @@ pmap_registry_remove (xlator_t *this, int port, const char *brickname,
}
if (brickname && strchr (brickname, '/')) {
- p = pmap_registry_search (this, brickname, type);
+ p = pmap_registry_search (this, brickname, type, _gf_true);
if (p)
goto remove;
}
@@ -294,11 +377,29 @@ remove:
GD_MSG_BRICK_REMOVE, "removing brick %s on port %d",
pmap->ports[p].brickname, p);
- free (pmap->ports[p].brickname);
+ if (xprt && (xprt == pmap->ports[p].xprt)) {
+ pmap->ports[p].xprt = NULL;
+ }
- pmap->ports[p].type = GF_PMAP_PORT_FREE;
- pmap->ports[p].brickname = NULL;
- pmap->ports[p].xprt = NULL;
+ /*
+ * This is where we garbage-collect. If all of the brick names have
+ * been "whited out" by pmap_registry_search(...,destroy=_gf_true) and
+ * there's no xprt either, then we have nothing left worth saving and
+ * can delete the entire entry.
+ */
+ if (!pmap->ports[p].xprt) {
+ brick_str = pmap->ports[p].brickname;
+ if (brick_str) {
+ while (*brick_str != '\0') {
+ if (*(brick_str++) != ' ') {
+ goto out;
+ }
+ }
+ }
+ free (pmap->ports[p].brickname);
+ pmap->ports[p].brickname = NULL;
+ pmap->ports[p].type = GF_PMAP_PORT_FREE;
+ }
out:
return 0;
@@ -322,7 +423,8 @@ __gluster_pmap_portbybrick (rpcsvc_request_t *req)
brick = args.brick;
- port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER);
+ port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER,
+ _gf_false);
if (!port)
rsp.op_ret = -1;
@@ -380,15 +482,6 @@ gluster_pmap_brickbyport (rpcsvc_request_t *req)
}
-static int
-glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo,
- gf_boolean_t value)
-{
- brickinfo->signed_in = value;
-
- return 0;
-}
-
int
__gluster_pmap_signin (rpcsvc_request_t *req)
{
@@ -413,9 +506,6 @@ fail:
(xdrproc_t)xdr_pmap_signin_rsp);
free (args.brick);//malloced by xdr
- if (!ret)
- glusterd_brick_update_signin (brickinfo, _gf_true);
-
return 0;
}
@@ -454,9 +544,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
req->trans);
}
- if (!ret)
- glusterd_brick_update_signin (brickinfo, _gf_false);
-
fail:
glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
(xdrproc_t)xdr_pmap_signout_rsp);
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
index 14187daee2b..9965a9577b5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -40,10 +40,11 @@ int pmap_mark_port_leased (xlator_t *this, int port);
int pmap_registry_alloc (xlator_t *this);
int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
gf_pmap_port_type_t type, void *xprt);
+int pmap_registry_extend (xlator_t *this, int port, const char *brickname);
int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
gf_pmap_port_type_t type, void *xprt);
int pmap_registry_search (xlator_t *this, const char *brickname,
- gf_pmap_port_type_t type);
+ gf_pmap_port_type_t type, gf_boolean_t destroy);
struct pmap_registry *pmap_registry_get (xlator_t *this);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 00b84e076c3..bc6cddea7f7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -315,7 +315,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
sleep (5);
- ret = glusterd_rebalance_rpc_create (volinfo, _gf_false);
+ ret = glusterd_rebalance_rpc_create (volinfo);
//FIXME: this cbk is passed as NULL in all occurrences. May be
//we never needed it.
@@ -363,8 +363,7 @@ out:
}
int
-glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
- gf_boolean_t reconnect)
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo)
{
dict_t *options = NULL;
char sockfile[PATH_MAX] = {0,};
@@ -383,35 +382,27 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
if (!defrag)
goto out;
- //rpc obj for rebalance process already in place.
- if (glusterd_defrag_rpc_get (defrag)) {
- ret = 0;
- glusterd_defrag_rpc_put (defrag);
- goto out;
- }
GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo);
- /* If reconnecting check if defrag sockfile exists in the new location
+ /* Check if defrag sockfile exists in the new location
* in /var/run/ , if it does not try the old location
*/
- if (reconnect) {
- ret = sys_stat (sockfile, &buf);
- /* TODO: Remove this once we don't need backward compatibility
- * with the older path
- */
- if (ret && (errno == ENOENT)) {
- gf_msg (this->name, GF_LOG_WARNING, errno,
- GD_MSG_FILE_OP_FAILED, "Rebalance sockfile "
- "%s does not exist. Trying old path.",
- sockfile);
- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo,
- priv);
- ret =sys_stat (sockfile, &buf);
- if (ret && (ENOENT == errno)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_REBAL_NO_SOCK_FILE, "Rebalance "
- "sockfile %s does not exist", sockfile);
- goto out;
- }
+ ret = sys_stat (sockfile, &buf);
+ /* TODO: Remove this once we don't need backward compatibility
+ * with the older path
+ */
+ if (ret && (errno == ENOENT)) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED, "Rebalance sockfile "
+ "%s does not exist. Trying old path.",
+ sockfile);
+ GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo,
+ priv);
+ ret =sys_stat (sockfile, &buf);
+ if (ret && (ENOENT == errno)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REBAL_NO_SOCK_FILE, "Rebalance "
+ "sockfile %s does not exist", sockfile);
+ goto out;
}
}
@@ -429,7 +420,7 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
glusterd_volinfo_ref (volinfo);
ret = glusterd_rpc_create (&defrag->rpc, options,
- glusterd_defrag_notify, volinfo);
+ glusterd_defrag_notify, volinfo, _gf_true);
if (ret) {
gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
"Glusterd RPC creation failed");
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index eb1a714bfd5..fb29c6efcfd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -326,22 +326,6 @@ out:
return ret;
}
-static int
-rb_kill_destination_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *dst_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- char pidfile[PATH_MAX] = {0,};
-
- priv = THIS->private;
-
- snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_DSTBRICK_PIDFILE);
-
- return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true);
-}
-
int
glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
@@ -526,17 +510,6 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)
goto out;
}
- if (gf_is_local_addr (dst_brickinfo->hostname)) {
- gf_msg_debug (this->name, 0, "I AM THE DESTINATION HOST");
- ret = rb_kill_destination_brick (volinfo, dst_brickinfo);
- if (ret) {
- gf_msg (this->name, GF_LOG_CRITICAL, 0,
- GD_MSG_BRK_CLEANUP_FAIL,
- "Unable to cleanup dst brick");
- goto out;
- }
- }
-
ret = glusterd_svcs_stop (volinfo);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 6a350361998..c75a1011fb3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -886,19 +886,6 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- /* Restore is successful therefore delete the original volume's
- * volinfo. If the volinfo is already restored then we should
- * delete the backend LVMs */
- if (!gf_uuid_is_null (parent_volinfo->restored_from_snap)) {
- ret = glusterd_lvm_snapshot_remove (rsp_dict,
- parent_volinfo);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_LVM_REMOVE_FAILED,
- "Failed to remove LVM backend");
- }
- }
-
/* Detach the volinfo from priv->volumes, so that no new
* command can ref it any more and then unref it.
*/
@@ -2847,13 +2834,12 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,
GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);
if (gf_is_service_running (pidfile, &pid)) {
- ret = kill (pid, SIGKILL);
- if (ret && errno != ESRCH) {
- gf_msg (this->name, GF_LOG_ERROR, errno,
- GD_MSG_PID_KILL_FAIL, "Unable to kill pid "
- "%d reason : %s", pid, strerror(errno));
- goto out;
- }
+ int send_attach_req (xlator_t *this, struct rpc_clnt *rpc,
+ char *path, int op);
+ (void) send_attach_req (this, brickinfo->rpc,
+ brickinfo->path,
+ GLUSTERD_BRICK_TERMINATE);
+ brickinfo->status = GF_BRICK_STOPPED;
}
/* Check if the brick is mounted and then try unmounting the brick */
@@ -2895,13 +2881,28 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,
"path %s (brick: %s): %s. Retry(%d)", mount_pt,
brickinfo->path, strerror (errno), retry_count);
- sleep (1);
+ /*
+ * This used to be one second, but that wasn't long enough
+ * to get past the spurious EPERM errors that prevent some
+ * tests (especially bug-1162462.t) from passing reliably.
+ *
+ * TBD: figure out where that garbage is coming from
+ */
+ sleep (3);
}
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_UNOUNT_FAILED, "umount failed for "
"path %s (brick: %s): %s.", mount_pt,
brickinfo->path, strerror (errno));
+ /*
+ * This is cheating, but necessary until we figure out how to
+ * shut down a brick within a still-living brick daemon so that
+ * random translators aren't keeping the mountpoint alive.
+ *
+ * TBD: figure out a real solution
+ */
+ ret = 0;
goto out;
}
@@ -7599,20 +7600,21 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo,
brickinfo, priv);
- ret = gf_is_service_running (pidfile, &pid);
- ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
- keyprefix, index);
- if (ret < 0) {
- goto out;
- }
+ if (gf_is_service_running (pidfile, &pid)) {
+ ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
- ret = dict_set_int32 (rsp_dict, key, pid);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SET_FAILED,
- "Could not save pid %d", pid);
- goto out;
+ ret = dict_set_int32 (rsp_dict, key, pid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save pid %d", pid);
+ goto out;
+ }
}
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 970aed2924c..07501f2407d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -152,8 +152,6 @@ gd_brick_op_req_free (gd1_mgmt_brick_op_req *req)
if (!req)
return;
- if (strcmp (req->name, "") != 0)
- GF_FREE (req->name);
GF_FREE (req->input.input_val);
GF_FREE (req);
}
@@ -998,6 +996,21 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
goto out;
}
}
+
+ if (req->op == GLUSTERD_BRICK_TERMINATE) {
+ if (args.op_ret && (args.op_errno == ENOTCONN)) {
+ /*
+ * This is actually OK. It happens when the target
+ * brick process exits and we saw the closed connection
+ * before we read the response. If we didn't read the
+ * response quickly enough that's kind of our own
+ * fault, and the fact that the process exited means
+ * that our goal of terminating the brick was achieved.
+ */
+ args.op_ret = 0;
+ }
+ }
+
if (args.op_ret == 0)
glusterd_handle_node_rsp (dict_out, pnode->node, op,
args.dict, op_ctx, errstr,
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 5f9098f3e9d..5cad58cbb2e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -93,6 +93,30 @@
#define NLMV4_VERSION 4
#define NLMV1_VERSION 1
+int
+send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op);
+
+static gf_boolean_t
+is_brick_mx_enabled ()
+{
+ char *value = NULL;
+ int ret = 0;
+ gf_boolean_t enabled = _gf_false;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+
+ priv = this->private;
+
+ ret = dict_get_str (priv->opts, GLUSTERD_BRICK_MULTIPLEX_KEY, &value);
+
+ if (!ret)
+ ret = gf_string2boolean (value, &enabled);
+
+ return ret ? _gf_false: enabled;
+}
+
extern struct volopt_map_entry glusterd_volopt_map[];
extern glusterd_all_vol_opts valid_all_vol_opts[];
@@ -1690,8 +1714,6 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
char *sockpath, size_t len)
{
- char export_path[PATH_MAX] = {0,};
- char sock_filepath[PATH_MAX] = {0,};
char volume_dir[PATH_MAX] = {0,};
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
@@ -1706,11 +1728,18 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
priv = this->private;
GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv);
- GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
- snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
- volume_dir, brickinfo->hostname, export_path);
+ if (is_brick_mx_enabled ()) {
+ snprintf (sockpath, len, "%s/run/daemon-%s.socket",
+ volume_dir, brickinfo->hostname);
+ } else {
+ char export_path[PATH_MAX] = {0,};
+ char sock_filepath[PATH_MAX] = {0,};
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
+ snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
+ volume_dir, brickinfo->hostname, export_path);
- glusterd_set_socket_filepath (sock_filepath, sockpath, len);
+ glusterd_set_socket_filepath (sock_filepath, sockpath, len);
+ }
}
/* connection happens only if it is not aleady connected,
@@ -1749,7 +1778,7 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo,
ret = glusterd_rpc_create (&rpc, options,
glusterd_brick_rpc_notify,
- brickid);
+ brickid, _gf_false);
if (ret) {
GF_FREE (brickid);
goto out;
@@ -1802,6 +1831,8 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
char glusterd_uuid[1024] = {0,};
char valgrind_logfile[PATH_MAX] = {0};
char rdma_brick_path[PATH_MAX] = {0,};
+ struct rpc_clnt *rpc = NULL;
+ rpc_clnt_connection_t *conn = NULL;
GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
@@ -1823,16 +1854,33 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
goto out;
}
- ret = _mk_rundir_p (volinfo);
- if (ret)
- goto out;
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ if (gf_is_service_running (pidfile, NULL)) {
+ goto connect;
+ }
+ /*
+ * There are all sorts of races in the start/stop code that could leave
+ * a UNIX-domain socket or RPC-client object associated with a
+ * long-dead incarnation of this brick, while the new incarnation is
+ * listening on a new socket at the same path and wondering why we
+ * haven't shown up. To avoid the whole mess and be on the safe side,
+ * we just blow away anything that might have been left over, and start
+ * over again.
+ */
glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath,
sizeof (socketpath));
-
- GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
- if (gf_is_service_running (pidfile, NULL))
- goto connect;
+ (void) glusterd_unlink_file (socketpath);
+ rpc = brickinfo->rpc;
+ if (rpc) {
+ brickinfo->rpc = NULL;
+ conn = &rpc->conn;
+ if (conn->reconnect) {
+ (void ) gf_timer_call_cancel (rpc->ctx, conn->reconnect);
+ //rpc_clnt_unref (rpc);
+ }
+ rpc_clnt_unref (rpc);
+ }
port = pmap_assign_port (THIS, brickinfo->port, brickinfo->path);
@@ -1933,6 +1981,7 @@ retry:
brickinfo->port = port;
brickinfo->rdma_port = rdma_port;
+ brickinfo->started_here = _gf_true;
if (wait) {
synclock_unlock (&priv->big_lock);
@@ -1978,6 +2027,7 @@ connect:
brickinfo->hostname, brickinfo->path, socketpath);
goto out;
}
+
out:
return ret;
}
@@ -2035,9 +2085,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
gf_boolean_t del_brick)
{
xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- char pidfile[PATH_MAX] = {0,};
int ret = 0;
+ char *op_errstr = NULL;
GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
@@ -2045,18 +2094,32 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
this = THIS;
GF_ASSERT (this);
- priv = this->private;
if (del_brick)
cds_list_del_init (&brickinfo->brick_list);
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- (void) glusterd_brick_disconnect (brickinfo);
- GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
- ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false);
- if (ret == 0) {
- glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
- (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo);
+ /*
+ * In a post-multiplexing world, even if we're not actually
+ * doing any multiplexing, just dropping the RPC connection
+ * isn't enough. There might be many such connections during
+ * the brick daemon's lifetime, even if we only consider the
+ * management RPC port (because tests etc. might be manually
+ * attaching and detaching bricks). Therefore, we have to send
+ * an actual signal instead.
+ */
+ if (is_brick_mx_enabled ()) {
+ (void) send_attach_req (this, brickinfo->rpc,
+ brickinfo->path,
+ GLUSTERD_BRICK_TERMINATE);
+ } else {
+ (void) glusterd_brick_terminate (volinfo, brickinfo,
+ NULL, 0, &op_errstr);
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ }
+ (void) glusterd_brick_disconnect (brickinfo);
}
+ ret = 0;
}
if (del_brick)
@@ -4843,16 +4906,350 @@ out:
return ret;
}
+static int32_t
+my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
+{
+ call_frame_t *frame = v_frame;
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+int
+send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ void *req = &brick_req;
+ void *errlbl = &&err;
+ extern struct rpc_clnt_program gd_brick_prog;
+
+ if (!rpc) {
+ gf_log (this->name, GF_LOG_ERROR, "called with null rpc");
+ return -1;
+ }
+
+ brick_req.op = op;
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+
+ req_size = xdr_sizeof ((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf) {
+ goto *errlbl;
+ }
+ errlbl = &&maybe_free_iobuf;
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto *errlbl;
+ }
+ errlbl = &&free_iobref;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto *errlbl;
+ }
+
+ iobref_add (iobref, iobuf);
+ /*
+ * Drop our reference to the iobuf. The iobref should already have
+ * one after iobref_add, so when we unref that we'll free the iobuf as
+ * well. This allows us to pass just the iobref as frame->local.
+ */
+ iobuf_unref (iobuf);
+ /* Set the pointer to null so we don't free it on a later error. */
+ iobuf = NULL;
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret == -1) {
+ goto *errlbl;
+ }
+
+ iov.iov_len = ret;
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (rpc, &gd_brick_prog, op,
+ my_callback, &iov, 1, NULL, 0, iobref, frame,
+ NULL, 0, NULL, 0, NULL);
+ return ret;
+
+free_iobref:
+ iobref_unref (iobref);
+maybe_free_iobuf:
+ if (iobuf) {
+ iobuf_unref (iobuf);
+ }
+err:
+ return -1;
+}
+
+extern size_t
+build_volfile_path (char *volume_id, char *path,
+ size_t path_len, char *trusted_str);
+
+
+static int
+attach_brick (xlator_t *this,
+ glusterd_brickinfo_t *brickinfo,
+ glusterd_brickinfo_t *other_brick,
+ glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t *other_vol)
+{
+ glusterd_conf_t *conf = this->private;
+ char pidfile1[PATH_MAX] = {0};
+ char pidfile2[PATH_MAX] = {0};
+ char unslashed[PATH_MAX] = {'\0',};
+ char full_id[PATH_MAX] = {'\0',};
+ char path[PATH_MAX] = {'\0',};
+ int ret;
+
+ gf_log (this->name, GF_LOG_INFO,
+ "add brick %s to existing process for %s",
+ brickinfo->path, other_brick->path);
+
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, unslashed);
+
+ ret = pmap_registry_extend (this, other_brick->port,
+ brickinfo->path);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "adding brick to process failed");
+ return -1;
+ }
+
+ brickinfo->port = other_brick->port;
+ brickinfo->status = GF_BRICK_STARTED;
+ brickinfo->started_here = _gf_true;
+ brickinfo->rpc = rpc_clnt_ref (other_brick->rpc);
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf);
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf);
+ (void) sys_unlink (pidfile2);
+ (void) sys_link (pidfile1, pidfile2);
+
+ if (volinfo->is_snap_volume) {
+ snprintf (full_id, sizeof(full_id), "/%s/%s/%s.%s.%s",
+ GLUSTERD_VOL_SNAP_DIR_PREFIX,
+ volinfo->snapshot->snapname,
+ volinfo->volname, brickinfo->hostname, unslashed);
+ } else {
+ snprintf (full_id, sizeof(full_id), "%s.%s.%s",
+ volinfo->volname, brickinfo->hostname, unslashed);
+ }
+ (void) build_volfile_path (full_id, path, sizeof(path), NULL);
+
+ int tries = 0;
+ while (tries++ <= 10) {
+ ret = send_attach_req (this, other_brick->rpc, path,
+ GLUSTERD_BRICK_ATTACH);
+ if (!ret) {
+ return 0;
+ }
+ /*
+ * It might not actually be safe to manipulate the lock like
+ * this, but if we don't then the connection can never actually
+ * complete and retries are useless. Unfortunately, all of the
+ * alternatives (e.g. doing all of this in a separate thread)
+ * are much more complicated and risky. TBD: see if there's a
+ * better way
+ */
+ synclock_unlock (&conf->big_lock);
+ sleep (1);
+ synclock_lock (&conf->big_lock);
+ }
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "attach failed for %s", brickinfo->path);
+ return ret;
+}
+
+static glusterd_brickinfo_t *
+find_compatible_brick_in_volume (glusterd_conf_t *conf,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *this = THIS;
+ glusterd_brickinfo_t *other_brick;
+ char pidfile2[PATH_MAX] = {0};
+ int32_t pid2 = -1;
+
+ cds_list_for_each_entry (other_brick, &volinfo->bricks,
+ brick_list) {
+ if (other_brick == brickinfo) {
+ continue;
+ }
+ if (!other_brick->started_here) {
+ continue;
+ }
+ if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
+ continue;
+ }
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, other_brick,
+ conf);
+ if (!gf_is_service_running (pidfile2, &pid2)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "cleaning up dead brick %s:%s",
+ other_brick->hostname, other_brick->path);
+ other_brick->started_here = _gf_false;
+ sys_unlink (pidfile2);
+ continue;
+ }
+ return other_brick;
+ }
+
+ return NULL;
+}
+
+static gf_boolean_t
+unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
+{
+ /*
+ * Certain options are safe because they're already being handled other
+ * ways, such as being copied down to the bricks (all auth options) or
+ * being made irrelevant (event-threads). All others are suspect and
+ * must be checked in the next function.
+ */
+ if (fnmatch ("*auth*", key, 0) == 0) {
+ return _gf_false;
+ }
+
+ if (fnmatch ("*event-threads", key, 0) == 0) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+static int
+opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
+{
+ data_t *value2 = dict_get (dict2, key);
+ int32_t min_len;
+
+ /*
+ * If the option is only present on one, we can either look at the
+ * default or assume a mismatch. Looking at the default is pretty
+ * hard, because that's part of a structure within each translator and
+ * there's no dlopen interface to get at it, so we assume a mismatch.
+ * If the user really wants them to match (and for their bricks to be
+ * multiplexed, they can always reset the option).
+ */
+ if (!value2) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key);
+ return -1;
+ }
+
+ min_len = MIN (value1->len, value2->len);
+ if (strncmp (value1->data, value2->data, min_len) != 0) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "option mismatch, %s, %s != %s",
+ key, value1->data, value2->data);
+ return -1;
+ }
+
+ return 0;
+}
+
+static glusterd_brickinfo_t *
+find_compatible_brick (glusterd_conf_t *conf,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t **other_vol_p)
+{
+ glusterd_brickinfo_t *other_brick;
+ glusterd_volinfo_t *other_vol;
+
+ /* Just return NULL here if multiplexing is disabled. */
+ if (!is_brick_mx_enabled ()) {
+ return NULL;
+ }
+
+ other_brick = find_compatible_brick_in_volume (conf, volinfo,
+ brickinfo);
+ if (other_brick) {
+ *other_vol_p = volinfo;
+ return other_brick;
+ }
+
+ cds_list_for_each_entry (other_vol, &conf->volumes, vol_list) {
+ if (other_vol == volinfo) {
+ continue;
+ }
+ if (volinfo->is_snap_volume) {
+ /*
+ * Snap volumes do have different options than their
+ * parents, but are nonetheless generally compatible.
+ * Skip the option comparison for now, until we figure
+ * out how to handle this (e.g. compare at the brick
+ * level instead of the volume level for this case).
+ *
+ * TBD: figure out compatibility for snap bricks
+ */
+ goto no_opt_compare;
+ }
+ /*
+ * It's kind of a shame that we have to do this check in both
+ * directions, but an option might only exist on one of the two
+ * dictionaries and dict_foreach_match will only find that one.
+ */
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "comparing options for %s and %s",
+ volinfo->volname, other_vol->volname);
+ if (dict_foreach_match (volinfo->dict, unsafe_option, NULL,
+ opts_mismatch, other_vol->dict) < 0) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "failure forward");
+ continue;
+ }
+ if (dict_foreach_match (other_vol->dict, unsafe_option, NULL,
+ opts_mismatch, volinfo->dict) < 0) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "failure backward");
+ continue;
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
+no_opt_compare:
+ other_brick = find_compatible_brick_in_volume (conf,
+ other_vol,
+ brickinfo);
+ if (other_brick) {
+ *other_vol_p = other_vol;
+ return other_brick;
+ }
+ }
+
+ return NULL;
+}
+
int
glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
gf_boolean_t wait)
{
- int ret = -1;
- xlator_t *this = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_brickinfo_t *other_brick;
+ glusterd_conf_t *conf = NULL;
+ int32_t pid = -1;
+ char pidfile[PATH_MAX] = {0};
+ FILE *fp;
+ char socketpath[PATH_MAX] = {0};
+ glusterd_volinfo_t *other_vol;
this = THIS;
GF_ASSERT (this);
+ conf = this->private;
if ((!brickinfo) || (!volinfo))
goto out;
@@ -4876,6 +5273,77 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
ret = 0;
goto out;
}
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
+ if (gf_is_service_running (pidfile, &pid)) {
+ /*
+ * In general, if the pidfile exists and points to a running
+ * process, this will already be set. However, that's not the
+ * case when we're starting up and bricks are already running.
+ */
+ if (brickinfo->status != GF_BRICK_STARTED) {
+ gf_log (this->name, GF_LOG_INFO,
+ "discovered already-running brick %s",
+ brickinfo->path);
+ //brickinfo->status = GF_BRICK_STARTED;
+ (void) pmap_registry_bind (this,
+ brickinfo->port, brickinfo->path,
+ GF_PMAP_PORT_BRICKSERVER, NULL);
+ /*
+ * This will unfortunately result in a separate RPC
+ * connection per brick, even though they're all in
+ * the same process. It works, but it would be nicer
+ * if we could find a pre-existing connection to that
+ * same port (on another brick) and re-use that.
+ * TBD: re-use RPC connection across bricks
+ */
+ glusterd_set_brick_socket_filepath (volinfo, brickinfo,
+ socketpath, sizeof (socketpath));
+ (void) glusterd_brick_connect (volinfo, brickinfo,
+ socketpath);
+ }
+ return 0;
+ }
+
+ ret = _mk_rundir_p (volinfo);
+ if (ret)
+ goto out;
+
+ other_brick = find_compatible_brick (conf, volinfo, brickinfo,
+ &other_vol);
+ if (other_brick) {
+ ret = attach_brick (this, brickinfo, other_brick,
+ volinfo, other_vol);
+ if (ret == 0) {
+ goto out;
+ }
+ }
+
+ /*
+ * This hack is necessary because our brick-process management is a
+ * total nightmare. We expect a brick process's socket and pid files
+ * to be ready *immediately* after we start it. Ditto for it calling
+ * back to bind its port. Unfortunately, none of that is realistic.
+ * Any process takes non-zero time to start up. This has *always* been
+ * racy and unsafe; it just became more visible with multiplexing.
+ *
+ * The right fix would be to do all of this setup *in the parent*,
+ * which would include (among other things) getting the PID back from
+ * the "runner" code. That's all prohibitively difficult and risky.
+ * To work around the more immediate problems, we create a stub pidfile
+ * here to let gf_is_service_running know that we expect the process to
+ * be there shortly, and then it gets filled in with a real PID when
+ * the process does finish starting up.
+ *
+ * TBD: pray for GlusterD 2 to be ready soon.
+ */
+ (void) sys_unlink (pidfile);
+ fp = fopen (pidfile, "w+");
+ if (fp) {
+ (void) fprintf (fp, "0\n");
+ (void) fclose (fp);
+ }
+
ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -5813,11 +6281,12 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
-
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
if (glusterd_is_brick_started (brickinfo)) {
- brick_online = gf_is_service_running (pidfile, &pid);
+ if (gf_is_service_running (pidfile, &pid)) {
+ brick_online = _gf_true;
+ }
}
memset (key, 0, sizeof (key));
@@ -6880,10 +7349,12 @@ out:
return ret;
}
-int
-glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo,
- char *options, int option_cnt, char **op_errstr)
+
+static int
+glusterd_brick_signal (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr,
+ int sig)
{
int ret = -1;
xlator_t *this = NULL;
@@ -6916,6 +7387,7 @@ glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf);
+ /* TBD: use gf_is_service_running instead of almost-identical code? */
pidfile = fopen (pidfile_path, "r");
if (!pidfile) {
gf_msg ("glusterd", GF_LOG_ERROR, errno,
@@ -6934,24 +7406,35 @@ glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
goto out;
}
- snprintf (dumpoptions_path, sizeof (dumpoptions_path),
- DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
- ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
- if (ret < 0) {
- gf_msg ("glusterd", GF_LOG_ERROR, 0,
- GD_MSG_BRK_STATEDUMP_FAIL,
- "error while parsing the statedump "
- "options");
- ret = -1;
+ if (pid == 0) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_NO_SIG_TO_PID_ZERO,
+ "refusing to send signal %d to pid zero", sig);
goto out;
}
+ if (sig == SIGUSR1) {
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options",
+ pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options,
+ option_cnt);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRK_STATEDUMP_FAIL,
+ "error while parsing the statedump "
+ "options");
+ ret = -1;
+ goto out;
+ }
+ }
+
gf_msg ("glusterd", GF_LOG_INFO, 0,
GD_MSG_STATEDUMP_INFO,
- "Performing statedump on brick with pid %d",
- pid);
+ "sending signal %d to brick with pid %d",
+ sig, pid);
- kill (pid, SIGUSR1);
+ kill (pid, sig);
sleep (1);
ret = 0;
@@ -6963,6 +7446,26 @@ out:
}
int
+glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr)
+{
+ return glusterd_brick_signal (volinfo, brickinfo,
+ options, option_cnt, op_errstr,
+ SIGUSR1);
+}
+
+int
+glusterd_brick_terminate (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr)
+{
+ return glusterd_brick_signal (volinfo, brickinfo,
+ options, option_cnt, op_errstr,
+ SIGTERM);
+}
+
+int
glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr)
{
int ret = -1;
@@ -7446,7 +7949,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
"volume=%s", volinfo->volname);
goto out;
}
- ret = glusterd_rebalance_rpc_create (volinfo, _gf_true);
+ ret = glusterd_rebalance_rpc_create (volinfo);
break;
}
case GF_DEFRAG_STATUS_NOT_STARTED:
@@ -7978,9 +8481,10 @@ glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload,
glusterd_submit_reply (req, arg, payload, payloadcount, iobref,
(xdrproc_t) xdrproc);
- if (dict)
- dict_unref (dict);
+ if (dict) {
+ dict_unref (dict);
+ }
return ret;
}
@@ -11356,6 +11860,7 @@ glusterd_get_global_options_for_all_vols (rpcsvc_request_t *req, dict_t *ctx,
char *allvolopt = NULL;
int32_t i = 0;
gf_boolean_t exists = _gf_false;
+ gf_boolean_t need_free;
this = THIS;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
@@ -11414,13 +11919,16 @@ glusterd_get_global_options_for_all_vols (rpcsvc_request_t *req, dict_t *ctx,
ret = dict_get_str (priv->opts, allvolopt, &def_val);
/* If global option isn't set explicitly */
+
+ need_free = _gf_false;
if (!def_val) {
- if (!strcmp (allvolopt, GLUSTERD_GLOBAL_OP_VERSION_KEY))
+ if (!strcmp (allvolopt,
+ GLUSTERD_GLOBAL_OP_VERSION_KEY)) {
gf_asprintf (&def_val, "%d", priv->op_version);
- else if (!strcmp (allvolopt, GLUSTERD_QUORUM_RATIO_KEY))
- gf_asprintf (&def_val, "%d", 0);
- else if (!strcmp (allvolopt, GLUSTERD_SHARED_STORAGE_KEY))
- gf_asprintf (&def_val, "%s", "disable");
+ need_free = _gf_true;
+ } else {
+ def_val = valid_all_vol_opts[i].dflt_val;
+ }
}
count++;
@@ -11443,6 +11951,9 @@ glusterd_get_global_options_for_all_vols (rpcsvc_request_t *req, dict_t *ctx,
goto out;
}
+ if (need_free) {
+ GF_FREE (def_val);
+ }
def_val = NULL;
allvolopt = NULL;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index e801c1a03a3..a9aefb85246 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -386,6 +386,12 @@ int
glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
char *options, int option_cnt, char **op_errstr);
+
+int
+glusterd_brick_terminate (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr);
+
int
glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index f5ddef4755d..957bbfcee25 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1516,6 +1516,8 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
out:
return ret;
}
+
+#if 0
static int
brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -1538,6 +1540,7 @@ brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
out:
return ret;
}
+#endif
static int
brick_graph_add_decompounder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
@@ -2456,7 +2459,11 @@ static volgen_brick_xlator_t server_graph_table[] = {
{brick_graph_add_changetimerecorder, "changetimerecorder"},
#endif
{brick_graph_add_bd, "bd"},
+ /*
+ * TBD: Figure out why trash breaks multiplexing. AFAICT it should fail
+ * the same way already.
{brick_graph_add_trash, "trash"},
+ */
{brick_graph_add_arbiter, "arbiter"},
{brick_graph_add_posix, "posix"},
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index ecc4f9609c1..ad5fe909578 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2612,7 +2612,7 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr)
}
ret = dict_get_str (conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
- if (ret == -1) {
+ if (ret != 0) {
gf_msg (this->name, GF_LOG_INFO, 0,
GD_MSG_DICT_GET_FAILED, "Global dict not present.");
ret = 0;
@@ -3069,7 +3069,8 @@ glusterd_clearlocks_get_local_client_ports (glusterd_volinfo_t *volinfo,
brickinfo->path);
port = pmap_registry_search (THIS, brickname,
- GF_PMAP_PORT_BRICKSERVER);
+ GF_PMAP_PORT_BRICKSERVER,
+ _gf_false);
if (!port) {
ret = -1;
gf_msg_debug (THIS->name, 0, "Couldn't get port "
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 7da0de20291..9f877b6d620 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3145,6 +3145,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.flags = OPT_FLAG_CLIENT_OPT,
.op_version = GD_OP_VERSION_3_9_1,
},
+
+ /* Brick multiplexing options */
+ { .key = GLUSTERD_BRICK_MULTIPLEX_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "off",
+ .op_version = GD_OP_VERSION_3_10_0
+ },
{ .key = NULL
}
};
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index d00e4e20811..f3c7e1d6891 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -54,6 +54,7 @@
"S32gluster_enable_shared_storage.sh"
#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
+#define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex"
#define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf"
#define GANESHA_EXPORT_DIRECTORY CONFDIR"/exports"
@@ -77,7 +78,6 @@
"for more details."
#define OPERRSTR_COMMIT_FAIL "Commit failed on %s. Please check the log file "\
"for more details."
-
struct glusterd_volinfo_;
typedef struct glusterd_volinfo_ glusterd_volinfo_t;
@@ -215,7 +215,6 @@ struct glusterd_brickinfo {
int port;
int rdma_port;
char *logfile;
- gf_boolean_t signed_in;
gf_store_handle_t *shandle;
gf_brick_status_t status;
struct rpc_clnt *rpc;
@@ -232,6 +231,7 @@ struct glusterd_brickinfo {
*/
uint16_t group;
uuid_t jbr_uuid;
+ gf_boolean_t started_here;
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
@@ -1048,7 +1048,8 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
int
glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options,
- rpc_clnt_notify_t notify_fn, void *notify_data);
+ rpc_clnt_notify_t notify_fn, void *notify_data,
+ gf_boolean_t force);
/* handler functions */
@@ -1064,8 +1065,7 @@ int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
size_t len, int cmd, defrag_cbk_fn_t cbk,
glusterd_op_t op);
int
-glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
- gf_boolean_t reconnect);
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo);
int glusterd_rebalance_defrag_init (glusterd_volinfo_t *volinfo,
defrag_cbk_fn_t cbk);