glusterd: add "volume label" command

This command is necessary when the local disk/filesystem containing a brick is unexpectedly lost and then recreated. Since 961bc80c, trying to start the brick will fail because the trusted.glusterfs.volume-id xattr is missing, and if we can't start it then we can't replace-brick or self-heal so we're stuck in a permanently degraded state. This command provides a way to label the empty brick with the proper volume ID so that further repair actions become possible. Change-Id: I1c1e5273a018b7a6b8d0852daf111ddc3fddfdc2 BUG: 860297 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/4259 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Jeff Darcy <jdarcy@redhat.com> 2012-12-03 12:16:28 -0500
committer: Anand Avati <avati@redhat.com> 2012-12-04 14:42:30 -0800
commit: 924702de358160b2536138c073d293b76512838a (patch)
tree: 4b44790ef6500b3eca00d1cc78954b7ee19aad83 /tests
parent: 86b01a278b55b19adefe9625e586252a5c75b474 (diff)
1 files changed, 81 insertions, 7 deletions
diff --git a/tests/bugs/bug-860297.t b/tests/bugs/bug-860297.t
index 2a3ca7a7a6c..fa1b1ff285a 100644..100755
--- a/tests/bugs/bug-860297.t
+++ b/tests/bugs/bug-860297.t
@@ -1,13 +1,87 @@
 #!/bin/bash
+
 . $(dirname $0)/../include.rc
-cleanup;
+cleanup
+
+function recreate {
+	# The rm is necessary so we don't get fooled by leftovers from old runs.
+	rm -rf $1 && mkdir -p $1
+}
+
+function count_bricks {
+	local count
+	local pid
+	count=0
+	for pid in /var/lib/glusterd/vols/${1}/run/*pid; do
+		if kill -0 $(cat $pid); then
+			count=$((count+1))
+		fi
+	done
+	echo $count
+}
 
 TEST glusterd
 TEST pidof glusterd
-TEST $CLI volume info
-TEST $CLI volume create $V0 $H0:$B0/brick1
-setfattr -x trusted.glusterfs.volume-id $B0/brick1
-## If Extended attribute trusted.glusterfs.volume-id is not present
-## then volume should not be able to start
-TEST ! $CLI volume start $V0;
+TEST $CLI volume info;
+
+## Start and create a volume
+TEST recreate ${B0}/${V0}-0
+TEST recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+function volinfo_field()
+{
+    local vol=$1;
+    local field=$2;
+
+    $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify that all bricks start.
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 2 count_bricks $V0
+TEST $CLI volume stop $V0
+
+# Nuke one of the bricks and make sure it *doesn't* start.
+TEST recreate ${B0}/${V0}-1
+# We can't do the usual TEST/startup thing here because of another bug.  If
+# a server fails to start a brick, it won't start any others either.  Since
+# all of our bricks in testing are on one server, that means no bricks start
+# and so the volume doesn't start either.  Changing the order etc. doesn't
+# help, because the attempted startup order is non-deterministic.  Instead,
+# we just don't rely on whether or not the volume starts; the brick count is
+# sufficient for our purposes.
+$CLI volume start $V0;
+EXPECT 1 count_bricks $V0
+# If we can't depend on the volume starting, we can't depend on it stopping
+# either.
+$CLI volume stop $V0
+
+# Label the recreated brick and make sure it starts now.
+TEST $CLI volume label $V0 ${H0}:${B0}/${V0}-1
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 2 count_bricks $V0
+
+# Make sure we can mount and use the volume.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST dd if=/dev/zero of=$M0/block bs=4k count=1
+
+if [ "$EXIT_EARLY" = "1" ]; then
+	exit 0;
+fi
+
+## Finish up
+TEST umount $M0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
 cleanup;
author	Jeff Darcy <jdarcy@redhat.com>	2012-12-03 12:16:28 -0500
committer	Anand Avati <avati@redhat.com>	2012-12-04 14:42:30 -0800
commit	924702de358160b2536138c073d293b76512838a (patch)
tree	4b44790ef6500b3eca00d1cc78954b7ee19aad83 /tests
parent	86b01a278b55b19adefe9625e586252a5c75b474 (diff)