diff options
-rw-r--r-- | tests/basic/afr/resolve.t | 8 | ||||
-rw-r--r-- | tests/volume.rc | 7 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 56 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 61 |
4 files changed, 105 insertions, 27 deletions
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t index ce7ec841418..2d400563c2e 100644 --- a/tests/basic/afr/resolve.t +++ b/tests/basic/afr/resolve.t @@ -24,7 +24,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}0 rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 #Test that the lookup returns ENOENT instead of ESTALE #If lookup returns ESTALE this command will fail with ESTALE TEST touch f @@ -44,8 +44,8 @@ echo jkl > $M1/b #gfid-mismatch happened. This should result in EIO TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 -# The kernel knows nothing about the tricks done to the volume, and the file +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +# The kernel knows nothing about the tricks done to the volume, and the file # may still be in page cache. Wait for timeout. -EXPECT_WITHIN 10 "" cat $M0/b +EXPECT_WITHIN 10 "^$" cat $M0/b cleanup diff --git a/tests/volume.rc b/tests/volume.rc index 36f1350b9bc..7e8810852f5 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -103,6 +103,13 @@ function _afr_child_up_status { echo "$up" } +function afr_child_up_status_meta { + local mnt=$1 + local repl=$2 + local child=$3 + grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}' +} + function afr_child_up_status { local vol=$1 #brick_id is (brick-num in volume info - 1) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 533a7b5d5a1..04e4ca315f2 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1219,6 +1219,50 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2) return _gf_true; } +static int +afr_get_parent_read_subvol (xlator_t *this, inode_t *parent, + struct afr_reply *replies, unsigned char *readable) +{ + int i = 0; + int par_read_subvol = -1; + int par_read_subvol_iter = -1; + afr_private_t *priv = NULL; + + priv = this->private; + + if (parent) + par_read_subvol = afr_data_subvol_get (parent, this, 0, 0); + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + + if (replies[i].op_ret < 0) + continue; + + if (par_read_subvol_iter == -1) { + par_read_subvol_iter = i; + continue; + } + + if ((i != par_read_subvol) && readable[i]) + par_read_subvol_iter = i; + + if (i == par_read_subvol) + par_read_subvol_iter = i; + } + /* At the end of the for-loop, the only reason why @par_read_subvol_iter + * could be -1 is when this LOOKUP has failed on all sub-volumes. + * So it is okay to send an arbitrary subvolume (0 in this case) + * as parent read subvol. + */ + if (par_read_subvol_iter == -1) + par_read_subvol_iter = 0; + + return par_read_subvol_iter; + +} + static void afr_lookup_done (call_frame_t *frame, xlator_t *this) { @@ -1227,23 +1271,25 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) int i = -1; int op_errno = 0; int read_subvol = 0; + int par_read_subvol = 0; unsigned char *readable = NULL; int event = 0; struct afr_reply *replies = NULL; uuid_t read_gfid = {0, }; gf_boolean_t locked_entry = _gf_false; gf_boolean_t can_interpret = _gf_true; + inode_t *parent = NULL; priv = this->private; local = frame->local; replies = local->replies; + parent = local->loc.parent; locked_entry = afr_is_entry_possibly_under_txn (local, this); readable = alloca0 (priv->child_count); - afr_inode_read_subvol_get (local->loc.parent, this, readable, - NULL, &event); + afr_inode_read_subvol_get (parent, this, readable, NULL, &event); /* First, check if we have a gfid-change from somewhere, If so, propagate that so that a fresh lookup can be @@ -1269,7 +1315,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) "underway" in creation */ local->op_ret = -1; local->op_errno = ENOENT; - read_subvol = i; goto unwind; } @@ -1347,10 +1392,13 @@ unwind: if (read_subvol == -1) read_subvol = 0; + par_read_subvol = afr_get_parent_read_subvol (this, parent, replies, + readable); + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, &local->replies[read_subvol].poststat, local->replies[read_subvol].xdata, - &local->replies[read_subvol].postparent); + &local->replies[par_read_subvol].postparent); } /* diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index af6a1787593..28bf89f2842 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -123,22 +123,50 @@ out: return 0; } +static int +afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol) +{ + int gen = 0; + int entry_read_subvol = 0; + unsigned char *data_readable = NULL; + unsigned char *metadata_readable = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + data_readable = alloca0 (priv->child_count); + metadata_readable = alloca0 (priv->child_count); + + afr_inode_read_subvol_get (inode, this, data_readable, + metadata_readable, &gen); + + if (gen != priv->event_generation || + !data_readable[par_read_subvol] || + !metadata_readable[par_read_subvol]) + return -1; + + /* Once the control reaches the following statement, it means that the + * parent's read subvol is perfectly readable. So calling + * either afr_data_subvol_get() or afr_metadata_subvol_get() would + * yield the same result. Hence, choosing afr_data_subvol_get() below. + */ + entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0); + if (entry_read_subvol != par_read_subvol) + return -1; + + return 0; + +} static void afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol, gf_dirent_t *entries, fd_t *fd) { - afr_private_t *priv = NULL; - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - unsigned char *data_readable = NULL; - unsigned char *metadata_readable = NULL; - int gen = 0; + int ret = -1; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + xlator_t *this = NULL; - priv = THIS->private; - - data_readable = alloca0 (priv->child_count); - metadata_readable = alloca0 (priv->child_count); + this = THIS; list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) { if (__is_root_gfid (fd->inode->gfid) && @@ -150,17 +178,12 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol, list_add_tail (&entry->list, &entries->list); if (entry->inode) { - gen = 0; - afr_inode_read_subvol_get (entry->inode, THIS, - data_readable, - metadata_readable, &gen); - - if (gen != priv->event_generation || - !data_readable[subvol] || - !metadata_readable[subvol]) { - + ret = afr_validate_read_subvol (entry->inode, this, + subvol); + if (ret == -1) { inode_unref (entry->inode); entry->inode = NULL; + continue; } } } |