From 3fcf536f4983fc8a3da7c5204f3dd9b75259a7a8 Mon Sep 17 00:00:00 2001 From: Richard Wareing Date: Thu, 24 Sep 2015 15:53:12 -0700 Subject: cluster/afr: Non-destructive GFID unsplit brain support for v3.6.x Summary: - v3.6.3 port of non-destructive GFID unsplit-brain code, almost a re-write for AFR2, but the original behavior lives on. - This feature allows the GlusterFS filesystem to automagically resolve GFID splitbrain situations by choosing the authorative file based on the last modification time. Other policies such as majority or size are also possible but not implemented just yet. - Core feature to Halo Geo-Replication, as this (gfid) form of split-brain is an everyday possibility with async mounts, so there needs to be an automated & scalable method to resolve them via the SHD or optionally in-line by FUSE clients or NFS daemons. - Operational notes: 1. Files or directory entries are supported, you can even write files into a directory and they will not be lost. 2. Streamed writes to a files are fully supported while a split-brain resolution happens, i.e. the writes will not be interrupted while the unsplit takes place. 3. Un-split (ones which are determined not to be "authoritative") files are renamed like so: "._" Test Plan: - Run prove -v tests/basic/gfid_unsplit.t - Test output: https://phabricator.fb.com/P20041740 Reviewers: moox, dph, sshreyas Reviewed By: sshreyas Differential Revision: https://phabricator.fb.com/D2479409 Signature: t1:2479409:1443208319:373218aa9758a1b48db23ea5e211ec303fa92e64 Blame Revision: Change-Id: I5b3d2e79fad74b4372c02b86219e8ee98f5e29dc Change-Id: I8ef719bcccb19ab6674647e02b72e1b36155fed9 Signed-off-by: Jeff Darcy Reviewed-on: https://review.gluster.org/17720 Smoke: Gluster Build System Tested-by: Jeff Darcy Reviewed-by: Jeff Darcy CentOS-regression: Gluster Build System --- tests/basic/gfid_unsplit.t | 102 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/basic/gfid_unsplit.t (limited to 'tests/basic') diff --git a/tests/basic/gfid_unsplit.t b/tests/basic/gfid_unsplit.t new file mode 100644 index 00000000000..9bb52f4533a --- /dev/null +++ b/tests/basic/gfid_unsplit.t @@ -0,0 +1,102 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +# Setup a cluster with 3 replicas, and fav child by majority on +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}; +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 nfs.disable off +#EST $CLI volume set $V0 cluster.favorite-child-by-majority on +#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on +TEST $CLI volume set $V0 cluster.favorite-child-policy majority +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 +sleep 5 + +# Part I: FUSE Test +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \ + --attribute-timeout=0 --entry-timeout=0 + +dd if=/dev/urandom of=$M0/splitfile bs=128k count=5 2>/dev/null + +MD5=$(md5sum $M0/splitfile | cut -d\ -f1) + +# Create a split-brain by downing a brick, and flipping the +# gfid on the down brick, then bring the brick back up. +TEST kill_brick $V0 $H0 $B0/${V0}1 +GFID_DIR_B1="$B0/${V0}1/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}1/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')" +rm -rf $GFID_DIR_B1 +TEST setfattr -n "trusted.gfid" -v "0xfd551a5cfddd4c1aa4d096ef09ef5c08" $B0/${V0}1/splitfile + +GFID_DIR_B3="$B0/${V0}3/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}3/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')" +#EST rm -f $B0/${V0}3/splitfile +#m -rf $GFID_DIR_B3 + +# Restart the down brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +sleep 5 + +# Tickle the file to trigger the gfid unsplit +TEST stat $M0/splitfile +sleep 1 + +# Verify the file is readable +TEST dd if=$M0/splitfile of=/dev/null 2>/dev/null + +# Verify the MD5 signature of the file +HEALED_MD5=$(md5sum $M0/splitfile | cut -d\ -f1) +TEST [ "$MD5" == "$HEALED_MD5" ] + +# Verify the file can be removed +TEST rm -f $M0/splitfile + +# Part II: NFS test +TEST mount -t nfs -o nolock,noatime,noacl,soft,intr $H0:/$V0 $N0; + +dd if=/dev/urandom of=$N0/splitfile bs=128k count=5 2>/dev/null + +MD5=$(md5sum $N0/splitfile | cut -d\ -f1) + +# Create a split-brain by downing a brick, and flipping the +# gfid on the down brick, then bring the brick back up. +TEST kill_brick $V0 $H0 $B0/${V0}1 +GFID_DIR_B1="$B0/${V0}1/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}1/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')" +rm -rf $GFID_DIR_B1 +TEST setfattr -n "trusted.gfid" -v "0xfd551a5cfddd4c1aa4d096ef09ef5c08" $B0/${V0}1/splitfile + +GFID_DIR_B3="$B0/${V0}3/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}3/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')" +#EST rm -f $B0/${V0}3/splitfile +#m -rf $GFID_DIR_B3 + +# Restart the down brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +sleep 5 + +# Tickle the file to trigger the gfid unsplit +TEST stat $N0/splitfile +sleep 1 + +# Verify the file is readable +TEST dd if=$N0/splitfile of=/dev/null 2>/dev/null + +# Verify the MD5 signature of the file +HEALED_MD5=$(md5sum $N0/splitfile | cut -d\ -f1) +TEST [ "$MD5" == "$HEALED_MD5" ] + +# Verify the file can be removed +TEST rm -f $N0/splitfile + +cleanup -- cgit