summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2018-09-28 17:00:00 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2018-12-12 12:57:59 +0000
commitfbdaffdb6d90409124507b3d9b15fc5d6b3ed8e6 (patch)
treeb3aa463db2e970750f87f6f7a341adda3d0e170e
parent2438964129d37a6ec62006916ff4e454f6b034b9 (diff)
afr: assign gfid during name heal when no 'source' is present.
Problem: If parent dir is in split-brain or has dirty xattrs set, and the file has gfid missing on one of the bricks, then name heal won't assign the gfid. Fix: Use the brick we select the gfid from as the 'source'. Note: Problem was found while trying to debug a split-brain issue on Cynthia Zhou's setup. fixes: bz#1655545 Change-Id: Id088d4f0fb017aa35122de426654194e581ed742 Reported-by: Cynthia Zhou <cynthia.zhou@nokia-sbell.com> Signed-off-by: Ravishankar N <ravishankar@redhat.com> (cherry picked from commit 4d58730c0cd6ab5db39aec8a15276f7bd3371b04)
-rw-r--r--tests/bugs/replicate/bug-1637249-gfid-heal.t149
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c51
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c47
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h2
5 files changed, 201 insertions, 52 deletions
diff --git a/tests/bugs/replicate/bug-1637249-gfid-heal.t b/tests/bugs/replicate/bug-1637249-gfid-heal.t
new file mode 100644
index 00000000000..e824f14531e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637249-gfid-heal.t
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+###############################################################################
+
+# Test for gfid + name heal when there is no 'source' brick, i.e. parent dir
+# xattrs are in split-brain or have dirty xattrs.
+
+TEST mkdir $M0/dir_pending
+TEST dd if=/dev/urandom of=$M0/dir_pending/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_pending/dir11
+TEST mkdir $M0/dir_dirty
+TEST touch $M0/dir_dirty/file2
+
+# Set pending entry xattrs on dir_pending and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir_pending
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/dir_pending
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+# Set dirty entry xattrs on dir_dirty and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dir_dirty
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir_dirty/file2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_dirty/file2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+# Create a file under dir_pending directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_pending/file3
+
+# Create a file under dir_pending directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_pending/file4
+TEST touch $B0/${V0}1/dir_pending/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_pending/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_pending/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_pending/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+TEST stat $M0/dir_dirty/file2
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir_dirty/file2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+TEST stat $M0/dir_pending/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_pending/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_pending/file3
+
+TEST stat $M0/dir_pending/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file4
+###############################################################################
+
+# Test for gfid + name heal when all bricks are 'source', i.e. parent dir
+# does not have any pending or dirty xattrs.
+
+TEST mkdir $M0/dir_clean
+TEST dd if=/dev/urandom of=$M0/dir_clean/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_clean/dir11
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+# Create a file under dir_clean directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_clean/file3
+
+# Create a file under dir_clean directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_clean/file4
+TEST touch $B0/${V0}1/dir_clean/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_clean/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_clean/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_clean/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+TEST stat $M0/dir_clean/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_clean/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_clean/file3
+
+TEST stat $M0/dir_clean/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file4
+###############################################################################
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 402f5ea5888..c0498b31578 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -21,7 +21,7 @@ afr_heal_synctask(xlator_t *this, afr_local_t *local);
int
afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
inode_t *inode, struct afr_reply *replies, int source,
- unsigned char *sources, void *gfid)
+ unsigned char *sources, void *gfid, int *gfid_idx)
{
afr_private_t *priv = NULL;
call_frame_t *frame = NULL;
@@ -37,15 +37,30 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
priv = this->private;
wind_on = alloca0(priv->child_count);
- ia_type = replies[source].poststat.ia_type;
- if ((ia_type == IA_INVAL) &&
- (AFR_COUNT(sources, priv->child_count) == priv->child_count)) {
- /* If a file is present on some bricks of the replica but parent
- * dir does not have pending xattrs, all bricks are sources and
- * the 'source' we selected earlier might be one where the file
- * is not actually present. Hence check if file is present in
- * any of the sources.*/
- for (i = 0; i < priv->child_count; i++) {
+ if (source >= 0 && replies[source].valid && replies[source].op_ret == 0)
+ ia_type = replies[source].poststat.ia_type;
+
+ if (ia_type != IA_INVAL)
+ goto heal;
+
+ /* If ia_type is still invalid, it means either
+ * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain
+ * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks
+ * are sources) and the 'source' we selected earlier might be the one where
+ * the file is not actually present.
+ *
+ * In both cases, let us pick a brick with a successful reply and use its
+ * ia_type.
+ * */
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ } else {
+ /* case (b) above. */
if (i == source)
continue;
if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
@@ -55,6 +70,7 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
}
}
+heal:
/* gfid heal on those subvolumes that do not have gfid associated
* with the inode and update those replies.
*/
@@ -103,7 +119,22 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
afr_reply_wipe(&replies[i]);
afr_reply_copy(&replies[i], &local->replies[i]);
}
+ if (gfid_idx && (*gfid_idx == -1)) {
+ /*Pick a brick where the gifd heal was successful.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid)) {
+ *gfid_idx = i;
+ break;
+ }
+ }
+ }
out:
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
+ ret = -afr_final_errno(local, priv);
+ }
loc_wipe(&loc);
if (frame)
AFR_STACK_DESTROY(frame);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 619558e94b7..fb6952c10fd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -192,7 +192,7 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (replies[source].op_ret == 0) {
ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
source, sources,
- &replies[source].poststat.ia_gfid);
+ &replies[source].poststat.ia_gfid, NULL);
if (ret)
return ret;
}
@@ -319,7 +319,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
source, sources,
- &replies[source].poststat.ia_gfid);
+ &replies[source].poststat.ia_gfid, NULL);
if (ret)
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 39aacee6ecf..aa20ad1b835 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -18,7 +18,8 @@ __afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies, void *gfid,
unsigned char *locked_on, int source,
- unsigned char *sources, gf_boolean_t is_gfid_absent)
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
{
int ret = 0;
int up_count = 0;
@@ -46,8 +47,8 @@ __afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
}
}
- afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
- sources, gfid);
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
out:
return ret;
@@ -146,35 +147,6 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
return ret;
}
-/* This function is to be called after ensuring that there is no gfid mismatch
- * for the inode across multiple sources
- */
-static int
-afr_selfheal_gfid_idx_get(xlator_t *this, struct afr_reply *replies,
- unsigned char *sources)
-{
- int i = 0;
- int gfid_idx = -1;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid || replies[i].op_ret != 0)
- continue;
-
- if (!sources[i])
- continue;
-
- if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
- continue;
-
- gfid_idx = i;
- break;
- }
- return gfid_idx;
-}
-
static gf_boolean_t
afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
{
@@ -400,21 +372,18 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
gfid = gfid_req;
} else {
gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
}
is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
- is_gfid_absent);
+ is_gfid_absent, &gfid_idx);
if (ret)
return ret;
- if (gfid_idx == -1) {
- gfid_idx = afr_selfheal_gfid_idx_get(this, replies, sources);
- if (gfid_idx == -1)
- return -1;
- }
-
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
inode, replies, gfid_idx);
if (ret == -EIO)
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 9c7418c7169..41adbe05dc8 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -119,7 +119,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode);
int
afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
inode_t *inode, struct afr_reply *replies, int source,
- unsigned char *sources, void *gfid);
+ unsigned char *sources, void *gfid, int *gfid_idx);
int
afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,