summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2016-09-06 18:28:42 +0530
committerRaghavendra Bhat <raghavendra@redhat.com>2016-09-08 10:09:33 -0700
commitb86a7de9b5ea9dcd0a630dbe09fce6d9ad0d8944 (patch)
treee9507103a00cc7ce0da30ebc8bc9fc8c8f2f2571
parent593b7a83f7408e59ab7b3ef7dfc4fe4096d6e3cd (diff)
feature/bitrot: Fix recovery of corrupted hardlink
Problem: When a file with hardlink is corrupted in ec volume, the recovery steps mentioned was not working. Only name and metadata was healing but not the data. Cause: The bad file marker in the inode context is not removed. Hence when self heal tries to open the file for data healing, it fails with EIO. Background: The bitrot deletes inode context during forget. Briefly, the recovery steps involves following steps. 1. Delete the entry marked with bad file xattr from backend. Delete all the hardlinks including .glusters hardlink as well. 2. Access the each hardlink of the file including original from the mount. The step 2 will send lookup to the brick where the files are deleted from backend and returns with ENOENT. On ENOENT, server xlator forgets the inode if there are no dentries associated with it. But in case hardlinks, the forget won't be called as dentries (other hardlink files) are associated with the inode. Hence bitrot stube won't delete it's context failing the data self heal. Fix: Bitrot-stub should delete the inode context on getting ENOENT during lookup. Change-Id: Ice6adc18625799e7afd842ab33b3517c2be264c1 BUG: 1373520 Signed-off-by: Kotresh HR <khiremat@redhat.com> Reviewed-on: http://review.gluster.org/15408 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--tests/bitrot/bug-1294786.t6
-rw-r--r--tests/bitrot/bug-1373520.t63
-rw-r--r--tests/volume.rc4
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c12
4 files changed, 80 insertions, 5 deletions
diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t
index 4911c1dd6c2..d395d76b7ed 100644
--- a/tests/bitrot/bug-1294786.t
+++ b/tests/bitrot/bug-1294786.t
@@ -16,10 +16,6 @@ function get_node_uuid {
getfattr -n trusted.glusterfs.node-uuid --only-values $M0/FILE1 2>/dev/null
}
-function get_quarantine_count {
- ls -l "$B1/.glusterfs/quanrantine" | wc -l
-}
-
cleanup;
TEST launch_cluster 2
@@ -64,7 +60,7 @@ TEST touch "$B1/.glusterfs/quanrantine/$gfid1"
TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid1"
TEST touch "$B1/.glusterfs/quanrantine/$gfid2"
TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid2"
-EXPECT "4" get_quarantine_count;
+EXPECT "4" get_quarantine_count "$B1";
TEST $CLI_1 volume stop $V0
TEST $CLI_1 volume start $V0
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
new file mode 100644
index 00000000000..3a0ac5293e0
--- /dev/null
+++ b/tests/bitrot/bug-1373520.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create a disperse volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+#Disable md-cache
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+#Mount the volume
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Enable bitrot
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+#Create hardlink
+TEST `ln $M0/FILE1 $M0/HL_FILE1`
+
+#Corrupt file from back-end
+TEST stat $B0/${V0}5/FILE1
+SIZE=$(stat -c %s $B0/${V0}5/FILE1)
+echo "Corrupted data" >> $B0/${V0}5/FILE1
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+
+#Manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/${V0}5/FILE1
+TEST touch "$B0/${V0}5/.glusterfs/quanrantine/$gfid1"
+TEST chmod 000 "$B0/${V0}5/.glusterfs/quanrantine/$gfid1"
+EXPECT "3" get_quarantine_count "$B0/${V0}5";
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Trigger lookup so that bitrot xlator marks file as bad in its inode context.
+TEST stat $M0/FILE1
+
+#Delete file and all links from backend
+TEST stat $B0/${V0}5/FILE1
+TEST `ls -li $B0/${V0}5/FILE1 | awk '{print $1}' | xargs find $B0/${V0}5/ -inum | xargs -r rm -rf`
+
+#Access files
+TEST cat $M0/FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/FILE1
+
+TEST cat $M0/HL_FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/HL_FILE1
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 1b62c026a28..aa614c50489 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -611,6 +611,10 @@ function get_scrubd_count {
ps auxww | grep glusterfs | grep scrub.pid | grep -v grep | wc -l
}
+function get_quarantine_count {
+ ls -l "$1/.glusterfs/quanrantine" | wc -l
+}
+
function get_quotad_count {
ps auxww | grep glusterfs | grep quotad.pid | grep -v grep | wc -l
}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 67103f6b5e1..4e01f5c86b1 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -2631,6 +2631,18 @@ br_stub_handle_lookup_error (xlator_t *this, inode_t *inode, int32_t op_errno)
}
UNLOCK (&inode->lock);
+ if (__br_stub_is_bad_object (ctx)) {
+ /* File is not present, might be deleted for recovery,
+ * del the bitrot inode context
+ */
+ ctx_addr = 0;
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (ctx_addr) {
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx);
+ }
+ }
+
out:
return;
}