diff options
author | Ravishankar N <ravishankar@redhat.com> | 2017-04-02 18:08:04 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2017-04-18 22:29:25 -0400 |
commit | 10dad995c989e9d77c341135d7c48817baba966c (patch) | |
tree | be377c185b2bdaa186d7d534b26d14e7d0765c5f | |
parent | 43695cfba3bcbb53d7127128e37a724862f65017 (diff) |
afr: don't do a post-op on a brick if op failed
Problem:
In afr-v2, self-blaming xattrs are not there by design. But if the FOP
failed on a brick due to an error other than ENOTCONN (or even due to
ENOTCONN, but we regained connection before postop was wound), we wind
the post-op also on the failed brick, leading to setting self-blaming
xattrs on that brick. This can lead to undesired results like healing of
files in split-brain etc.
Fix:
If a fop failed on a brick on which pre-op was successful, do not
perform post-op on it. This also produces the desired effect of not
resetting the dirty xattr on the brick, which is how it should be
because if the fop failed on a brick, there is no reason to clear the
dirty bit which actually serves as an indication of the failure.
Change-Id: I5f1caf4d1b39f36cf8093ccef940118638caa9c4
BUG: 1438255
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://review.gluster.org/16976
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r-- | tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t | 46 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 21 |
2 files changed, 61 insertions, 6 deletions
diff --git a/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t new file mode 100644 index 00000000000..edfd0d7820d --- /dev/null +++ b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t @@ -0,0 +1,46 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +NEW_USER=bug1438255 +NEW_UID=1438255 +NEW_GID=1438255 + +TEST groupadd -o -g ${NEW_GID} ${NEW_USER}-${NEW_GID} +TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -K MAIL_DIR=/dev/null ${NEW_USER} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +TEST touch $M0/FILE +TEST kill_brick $V0 $H0 $B0/${V0}2 +chown $NEW_UID:$NEW_GID $M0/FILE +EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE +EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +# setfattr done as NEW_USER fails on 3rd brick with EPERM but suceeds on +# the first 2 and hence on the mount. +su -m bug1438255 -c "setfattr -n user.myattr -v myvalue $M0/FILE" +TEST [ $? -eq 0 ] +EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE +EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE +# Brick 3 does not have any self-blaming pending xattr. +TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE + +TEST userdel --force ${NEW_USER} +TEST groupdel ${NEW_USER}-${NEW_GID} +cleanup + + diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index cbfddf2009d..7dda3da67ab 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -35,6 +35,7 @@ afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this); int afr_changelog_call_count (afr_transaction_type type, unsigned char *pre_op_subvols, + unsigned char *failed_subvols, unsigned int child_count); int afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, @@ -590,11 +591,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock) int afr_changelog_call_count (afr_transaction_type type, unsigned char *pre_op_subvols, + unsigned char *failed_subvols, unsigned int child_count) { + int i = 0; int call_count = 0; - call_count = AFR_COUNT(pre_op_subvols, child_count); + for (i = 0; i < child_count; i++) { + if (pre_op_subvols[i] && !failed_subvols[i]) { + call_count++; + } + } if (type == AFR_ENTRY_RENAME_TRANSACTION) call_count *= 2; @@ -1352,6 +1359,7 @@ afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count, *call_count = afr_changelog_call_count (local->transaction.type, local->transaction.pre_op, + local->transaction.failed_subvols, priv->child_count); if (*call_count == 0) { @@ -1418,7 +1426,8 @@ afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, for (i = 0; i < priv->child_count; i++) { /* Means lock did not succeed on this brick */ - if (!local->transaction.pre_op[i]) + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) continue; STACK_WIND_COOKIE (frame, compound_cbk, @@ -1562,9 +1571,8 @@ afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, local->c_args = args; for (i = 0; i < priv->child_count; i++) { - /* pre_op[i] has to be true for all nodes that were - * successfully locked. */ - if (!local->transaction.pre_op[i]) + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) continue; STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk, (void *) (long) i, @@ -1606,7 +1614,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, return 0; for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) continue; switch (local->transaction.type) { |