summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2017-03-20 12:32:33 -0400
committerAtin Mukherjee <amukherj@redhat.com>2017-03-30 23:33:32 -0400
commita7ce0548b7969050644891cd90c0bf134fa1594c (patch)
tree484e634b55b55830e1003c797c8e9e4c47418313
parented89f3ef064b6100bb0731f2493d915f6fb141c3 (diff)
glusterd: hold off volume deletes while still restarting bricks
We need to do this because modifying the volume/brick tree while glusterd_restart_bricks is still walking it can lead to segfaults. Without waiting we could accidentally "slip in" while attach_brick has released big_lock between retries and make such a modification. Change-Id: I30ccc4efa8d286aae847250f5d4fb28956a74b03 BUG: 1432542 Signed-off-by: Jeff Darcy <jeff@pl.atyp.us> Reviewed-on: https://review.gluster.org/16927 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
-rw-r--r--tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t (renamed from tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t)5
-rw-r--r--tests/bugs/core/bug-1432542-mpx-restart-crash.t91
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c39
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
7 files changed, 141 insertions, 14 deletions
diff --git a/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t b/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t
index ed401f6e6ad..a227f8275ed 100644
--- a/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t
+++ b/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t
@@ -21,6 +21,11 @@ push_trapfunc "cleanup"
TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
TEST $CLI volume start $V0
+# We can't expect a valid port number instantly. We need to wait for the
+# bricks to finish coming up. In every other case we use EXPECT_WITHIN, but
+# this first time we need to wait more explicitly.
+sleep $PROCESS_UP_TIMEOUT
+
port_brick0=$(get_nth_brick_port_for_volume $V0 1)
# restart the volume
diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
new file mode 100644
index 00000000000..970a181c83d
--- /dev/null
+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+NUM_VOLS=20
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried. That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail. Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures. Arguably,
+# those are spurious because we will eventually catch up. We're just not
+# ready *yet*. More to the point, even if the errors aren't spurious that's
+# not what we're testing right now. Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+ printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ local brick_base=$(get_brick_base $1)
+ local cmd="$CLI volume create $vol_name replica 2"
+ local b
+ for b in $(seq 0 5); do
+ local this_brick=${brick_base}/brick$b
+ mkdir -p $this_brick
+ cmd="$cmd $H0:$this_brick"
+ done
+ TEST $cmd
+ TEST $CLI volume start $vol_name
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $vol_name "Status"
+ local mount_point=$(get_mount_point $1)
+ mkdir -p $mount_point
+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+ local v
+ for v in $(seq 1 $NUM_VOLS); do
+ local mount_point=$(get_mount_point $v)
+ force_umount $mount_point
+ rm -rf $mount_point
+ local vol_name=$(printf "%s-vol%02d" $V0 $v)
+ $CLI volume stop $vol_name
+ $CLI volume delete $vol_name
+ rm -rf $(get_brick_base $1) &
+ done &> /dev/null
+ wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=95
+for i in $(seq 1 $NUM_VOLS); do
+ create_volume $i
+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+done
+
+# Kill glusterd, and wait a bit for all traces to disappear.
+TEST killall -9 glusterd
+sleep 5
+TEST killall -9 glusterfsd
+sleep 5
+
+# Restart glusterd. This is where the brick daemon supposedly dumps core,
+# though I (jdarcy) have yet to see that. Again, give it a while to settle,
+# just to be sure.
+TEST glusterd
+
+cleanup_func
+trap - EXIT
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 55c2bda1dd1..3558ae91059 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -6044,6 +6044,15 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
return ret;
}
+static void
+glusterd_wait_for_blockers (glusterd_conf_t *priv)
+{
+ while (priv->blockers) {
+ synclock_unlock (&priv->big_lock);
+ sleep (1);
+ synclock_lock (&priv->big_lock);
+ }
+}
int32_t
glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
@@ -6063,18 +6072,22 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_STOP_VOLUME:
+ glusterd_wait_for_blockers (this->private);
ret = glusterd_op_stop_volume (dict);
break;
case GD_OP_DELETE_VOLUME:
+ glusterd_wait_for_blockers (this->private);
ret = glusterd_op_delete_volume (dict);
break;
case GD_OP_ADD_BRICK:
+ glusterd_wait_for_blockers (this->private);
ret = glusterd_op_add_brick (dict, op_errstr);
break;
case GD_OP_REPLACE_BRICK:
+ glusterd_wait_for_blockers (this->private);
ret = glusterd_op_replace_brick (dict, rsp_dict);
break;
@@ -6082,11 +6095,13 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
ret = glusterd_op_set_volume (dict, op_errstr);
break;
+
case GD_OP_RESET_VOLUME:
ret = glusterd_op_reset_volume (dict, op_errstr);
break;
case GD_OP_REMOVE_BRICK:
+ glusterd_wait_for_blockers (this->private);
ret = glusterd_op_remove_brick (dict, op_errstr);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index f8525611ed4..a0df27c8bae 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -3134,8 +3134,8 @@ out:
int
glusterd_spawn_daemons (void *opaque)
{
- glusterd_conf_t *conf = THIS->private;
- int ret = -1;
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
synclock_lock (&conf->big_lock);
glusterd_restart_bricks (conf);
@@ -4904,9 +4904,13 @@ static int32_t
my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
{
call_frame_t *frame = v_frame;
+ glusterd_conf_t *conf = frame->this->private;
- STACK_DESTROY (frame->root);
+ synclock_lock (&conf->big_lock);
+ --(conf->blockers);
+ synclock_unlock (&conf->big_lock);
+ STACK_DESTROY (frame->root);
return 0;
}
@@ -4923,6 +4927,7 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
void *req = &brick_req;
void *errlbl = &&err;
struct rpc_clnt_connection *conn;
+ glusterd_conf_t *conf = this->private;
extern struct rpc_clnt_program gd_brick_prog;
if (!rpc) {
@@ -4982,9 +4987,13 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
iov.iov_len = ret;
/* Send the msg */
+ ++(conf->blockers);
ret = rpc_clnt_submit (rpc, &gd_brick_prog, op,
- my_callback, &iov, 1, NULL, 0, iobref, frame,
- NULL, 0, NULL, 0, NULL);
+ my_callback, &iov, 1, NULL, 0, iobref,
+ frame, NULL, 0, NULL, 0, NULL);
+ if (ret) {
+ --(conf->blockers);
+ }
return ret;
free_iobref:
@@ -5016,6 +5025,8 @@ attach_brick (xlator_t *this,
char full_id[PATH_MAX] = {'\0',};
char path[PATH_MAX] = {'\0',};
int ret;
+ int tries;
+ rpc_clnt_t *rpc;
gf_log (this->name, GF_LOG_INFO,
"add brick %s to existing process for %s",
@@ -5052,12 +5063,15 @@ attach_brick (xlator_t *this,
}
(void) build_volfile_path (full_id, path, sizeof(path), NULL);
- int tries = 0;
- while (tries++ <= 15) {
- ret = send_attach_req (this, other_brick->rpc, path,
- GLUSTERD_BRICK_ATTACH);
- if (!ret) {
- return 0;
+ for (tries = 15; tries > 0; --tries) {
+ rpc = rpc_clnt_ref (other_brick->rpc);
+ if (rpc) {
+ ret = send_attach_req (this, rpc, path,
+ GLUSTERD_BRICK_ATTACH);
+ rpc_clnt_unref (rpc);
+ if (!ret) {
+ return 0;
+ }
}
/*
* It might not actually be safe to manipulate the lock like
@@ -5423,6 +5437,8 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
conf = this->private;
GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ ++(conf->blockers);
+
ret = glusterd_get_quorum_cluster_counts (this, &active_count,
&quorum_count);
if (ret)
@@ -5502,6 +5518,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
ret = 0;
out:
+ --(conf->blockers);
conf->restart_done = _gf_true;
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index d7983f6fd71..5d164fd8df3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2761,14 +2761,11 @@ glusterd_op_delete_volume (dict_t *dict)
{
int ret = 0;
char *volname = NULL;
- glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *volinfo = NULL;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index 11d4b8bc79f..d26e959d3a3 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1867,6 +1867,7 @@ init (xlator_t *this)
if (ret < 0)
goto out;
+ conf->blockers = 0;
/* If the peer count is less than 2 then this would be the best time to
* spawn process/bricks that may need (re)starting since last time
* (this) glusterd was up. */
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index e5b7b9f8f38..a773a2b6221 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -190,6 +190,7 @@ typedef struct {
int ping_timeout;
uint32_t generation;
int32_t workers;
+ uint32_t blockers;
} glusterd_conf_t;