diff options
-rw-r--r-- | tests/bugs/replicate/bug-1493415-gfid-heal.t | 68 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 78 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 41 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 64 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 4 |
5 files changed, 188 insertions, 67 deletions
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t new file mode 100644 index 00000000000..125c35a7a21 --- /dev/null +++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t @@ -0,0 +1,68 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST $CLI volume set $V0 self-heal-daemon off + +# Create base entry in indices/xattrop +echo "Data" > $M0/FILE + +#------------------------------------------------------------------------------# +TEST touch $M0/f1 +gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1) +gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1) + +# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a +# brick crash at the point where file got created but no xattrs were set. +TEST setfattr -x trusted.gfid $B0/${V0}1/f1 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 + +# Assume there were no pending xattrs on parent dir due to 1st brick crashing +# too. Then name heal from client must heal the gfid. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST stat $M0/f1 +EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 + +#------------------------------------------------------------------------------# +TEST mkdir $M0/dir +TEST touch $M0/dir/f2 +gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2) +gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2) + +# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a +# brick crash at the point where file got created but no xattrs were set. +TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 + +#Now simulate setting of pending entry xattr on parent dir of 1st brick. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir +create_brick_xattrop_entry $B0/${V0}0 dir + +#Trigger entry-heal via shd +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 + +#------------------------------------------------------------------------------# +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 55b978bf962..20e81dd43e0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -20,6 +20,84 @@ void afr_heal_synctask (xlator_t *this, afr_local_t *local); int +afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, + inode_t *inode, struct afr_reply *replies, + int source, void *gfid) +{ + afr_private_t *priv = NULL; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + unsigned char *wind_on = NULL; + ia_type_t ia_type = IA_INVAL; + dict_t *xdata = NULL; + loc_t loc = {0, }; + int ret = 0; + int i = 0; + + priv = this->private; + wind_on = alloca0 (priv->child_count); + ia_type = replies[source].poststat.ia_type; + + /* gfid heal on those subvolumes that do not have gfid associated + * with the inode and update those replies. + */ + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; + if (!gf_uuid_is_null (replies[i].poststat.ia_gfid) || + replies[i].poststat.ia_type != ia_type) + continue; + + wind_on[i] = 1; + } + + if (AFR_COUNT(wind_on, priv->child_count) == 0) + return 0; + + xdata = dict_new (); + if (!xdata) { + ret = -ENOMEM; + goto out; + } + + ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16); + if (ret) { + ret = -ENOMEM; + goto out; + } + + frame = afr_frame_create (this); + if (!frame) { + ret = -ENOMEM; + goto out; + } + + local = frame->local; + loc.parent = inode_ref (parent); + gf_uuid_copy (loc.pargfid, parent->gfid); + loc.name = name; + loc.inode = inode_ref (inode); + + AFR_ONLIST (wind_on, frame, afr_selfheal_discover_cbk, lookup, + &loc, xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!wind_on[i]) + continue; + afr_reply_wipe (&replies[i]); + afr_reply_copy (&replies[i], &local->replies[i]); + } +out: + loc_wipe (&loc); + if (frame) + AFR_STACK_DESTROY (frame); + if (xdata) + dict_unref (xdata); + + return ret; +} + +int afr_gfid_sbrain_source_from_src_brick (xlator_t *this, struct afr_reply *replies, char *src_brick) diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index afa3f9044e1..647dd71911b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -64,7 +64,6 @@ afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name, return ret; } - int afr_selfheal_recreate_entry (call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, @@ -164,7 +163,6 @@ out: return ret; } - static int __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, char *name, inode_t *inode, int source, @@ -187,6 +185,14 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, (replies[source].op_errno != ENOENT)) return -replies[source].op_errno; + if (replies[source].op_ret == 0) { + ret = afr_lookup_and_heal_gfid (this, fd->inode, name, + inode, replies, source, + &replies[source].poststat.ia_gfid); + if (ret) + return ret; + } + for (i = 0; i < priv->child_count; i++) { if (!healed_sinks[i]) continue; @@ -223,8 +229,12 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, int i = 0; int ret = -1; afr_private_t *priv = NULL; + void *gfid = NULL; + ia_type_t ia_type = IA_INVAL; priv = this->private; + gfid = &replies[src_idx].poststat.ia_gfid; + ia_type = replies[src_idx].poststat.ia_type; for (i = 0; i < priv->child_count; i++) { if (i == src_idx) @@ -236,8 +246,8 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, if (replies[i].op_ret != 0) continue; - if (gf_uuid_compare (replies[src_idx].poststat.ia_gfid, - replies[i].poststat.ia_gfid)) { + if (gf_uuid_compare (gfid, replies[i].poststat.ia_gfid) && + (ia_type == replies[i].poststat.ia_type)) { ret = afr_gfid_split_brain_source (this, replies, inode, pargfid, bname, src_idx, i, @@ -251,8 +261,7 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, return ret; } - if ((replies[src_idx].poststat.ia_type) != - (replies[i].poststat.ia_type)) { + if (ia_type != replies[i].poststat.ia_type) { gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, "Type mismatch detected " "for <gfid:%s>/%s>, %s on %s and %s on %s. " @@ -310,6 +319,12 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, } } + ret = afr_lookup_and_heal_gfid (this, fd->inode, name, inode, replies, + source, + &replies[source].poststat.ia_gfid); + if (ret) + return ret; + /* In case of type mismatch / unable to resolve gfid mismatch on the * entry, return -1.*/ ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, inode, @@ -542,9 +557,19 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, struct afr_reply *replies = NULL; struct afr_reply *par_replies = NULL; afr_private_t *priv = NULL; + dict_t *xattr = NULL; priv = this->private; + xattr = dict_new (); + if (!xattr) + return -ENOMEM; + ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1); + if (ret) { + dict_unref (xattr); + return -1; + } + sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); @@ -576,7 +601,7 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name, replies, locked_on, - NULL); + xattr); if (!inode) { ret = -ENOMEM; goto unlock; @@ -608,6 +633,8 @@ unlock: afr_replies_wipe (replies, priv->child_count); if (par_replies) afr_replies_wipe (par_replies, priv->child_count); + if (xattr) + dict_unref (xattr); return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 2aad4c75bee..352d151207e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -18,50 +18,18 @@ int __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies, void *gfid, - unsigned char *locked_on, + unsigned char *locked_on, int source, gf_boolean_t is_gfid_absent) { int ret = 0; int up_count = 0; int locked_count = 0; - int i = 0; afr_private_t *priv = NULL; - dict_t *xdata = NULL; - loc_t loc = {0, }; - call_frame_t *new_frame = NULL; - afr_local_t *new_local = NULL; - unsigned char *wind_on = NULL; priv = this->private; - wind_on = alloca0 (priv->child_count); - - new_frame = afr_frame_create (this); - if (!new_frame) { - ret = -ENOMEM; - goto out; - } - - new_local = new_frame->local; gf_uuid_copy (parent->gfid, pargfid); - xdata = dict_new (); - if (!xdata) { - ret = -ENOMEM; - goto out; - } - - ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16); - if (ret) { - ret = -ENOMEM; - goto out; - } - - loc.parent = inode_ref (parent); - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.pargfid, pargfid); - loc.name = bname; - if (is_gfid_absent) { /* Ensure all children of AFR are up before performing gfid heal, to * guard against the possibility of gfid split brain. */ @@ -79,34 +47,10 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, } } - /* gfid heal on those subvolumes that do not have gfid associated - * with the inode and update those replies. - */ - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid && replies[i].op_ret == 0 && - !gf_uuid_is_null (replies[i].poststat.ia_gfid) && - !gf_uuid_compare (replies[i].poststat.ia_gfid, gfid)) - continue; - wind_on[i] = 1; - } - - AFR_ONLIST (wind_on, new_frame, afr_selfheal_discover_cbk, lookup, - &loc, xdata); - - for (i = 0; i < priv->child_count; i++) { - if (!wind_on[i]) - continue; - afr_reply_wipe (&replies[i]); - afr_reply_copy (&replies[i], &new_local->replies[i]); - } + afr_lookup_and_heal_gfid (this, parent, bname, inode, replies, source, + gfid); out: - loc_wipe (&loc); - if (xdata) - dict_unref (xdata); - if (new_frame) - AFR_STACK_DESTROY (new_frame); - return ret; } @@ -481,7 +425,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode, - replies, gfid, locked_on, + replies, gfid, locked_on, source, is_gfid_absent); if (ret) return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index b54da1facfd..8b0047df28f 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -110,6 +110,10 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode); int afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode); +int +afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, + inode_t *inode, struct afr_reply *replies, int source, + void *gfid); int afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, |