diff options
-rwxr-xr-x | tests/bugs/bug-1161311.t | 129 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 143 |
2 files changed, 253 insertions, 19 deletions
diff --git a/tests/bugs/bug-1161311.t b/tests/bugs/bug-1161311.t new file mode 100755 index 00000000000..52ed1555c20 --- /dev/null +++ b/tests/bugs/bug-1161311.t @@ -0,0 +1,129 @@ +#!/bin/bash + +# This tests for hard link preservation for files that are linked, when the +# file is undergoing migration + +# --- Improvements and other tests --- +## Fail rebalance of the large file for which links are created during P1/2 +### phases of migration +## Start with multiple hard links to the file and then create more during P1/2 +### phases of migration +## Test the same with NFS as the mount rather than FUSE +## Create links when file is under P2 of migration specifically +## Test with quota, to error out during hard link creation (if possible) + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +checksticky () { + i=0; + while [ ! -k $1 ]; do + sleep 1 + i=$((i+1)); + # Try for 10 seconds to get the sticky bit state + # else fail the test, as we may never see it + if [[ $i == 10 ]]; then + return $i + fi + echo "Waiting... $i" + done + echo "Done... got out @ $i" + return 0 +} + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}; + +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT '3' brick_count $V0 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount FUSE with caching disabled (read-write) +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +# Create a directories to hold the links +TEST mkdir $M0/dir1 +TEST mkdir -p $M0/dir2/dir3 + +# Create a large file (1GB), so that rebalance takes time +dd if=/dev/urandom of=$M0/dir1/FILE2 bs=64k count=10240 + +# Rename the file to create a linkto, for rebalance to +# act on the file +## FILE1 and FILE2 hashes are, 678b1c4a e22c1ada, so they fall +## into separate bricks when brick count is 3 +TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1 + +# unmount and remount the volume +TEST umount $M0 +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +# Start the rebalance +TEST $CLI volume rebalance $V0 start force + +# Wait for FILE to get the sticky bit on, so that file is under +# active rebalance, before creating the links +TEST checksticky $B0/${V0}3/dir1/FILE1 + +# Create the links +## FILE3 FILE5 FILE7 have hashes, c8c91469 566d26ce 22ce7eba +## Which fall into separate bricks on a 3 brick layout +cd $M0 +TEST ln ./dir1/FILE1 ./dir1/FILE7 +TEST ln ./dir1/FILE1 ./dir1/FILE5 +TEST ln ./dir1/FILE1 ./dir1/FILE3 + +TEST ln ./dir1/FILE1 ./dir2/FILE7 +TEST ln ./dir1/FILE1 ./dir2/FILE5 +TEST ln ./dir1/FILE1 ./dir2/FILE3 + +TEST ln ./dir1/FILE1 ./dir2/dir3/FILE7 +TEST ln ./dir1/FILE1 ./dir2/dir3/FILE5 +TEST ln ./dir1/FILE1 ./dir2/dir3/FILE3 +cd / + +# Ideally for this test to have done its job, the file should still be +# under migration, so check the sticky bit again +TEST checksticky $B0/${V0}3/dir1/FILE1 + +# Wait for rebalance to complete +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 + +# Check if all files are clean and migrated right +## stat on the original file should show linkcount of 10 +linkcountsrc=$(stat -c %h $M0/dir1/FILE1) +TEST [[ $linkcountsrc == 10 ]] + +## inode and size of every file should be same as original file +inodesrc=$(stat -c %i $M0/dir1/FILE1) +TEST [[ $(stat -c %i $M0/dir1/FILE3) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir1/FILE5) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir1/FILE7) == $inodesrc ]] + +TEST [[ $(stat -c %i $M0/dir2/FILE3) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/FILE5) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/FILE7) == $inodesrc ]] + +TEST [[ $(stat -c %i $M0/dir2/dir3/FILE3) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/dir3/FILE5) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/dir3/FILE7) == $inodesrc ]] + +# Check, newer link creations +cd $M0 +TEST ln ./dir1/FILE1 ./FILE1 +TEST ln ./dir2/FILE3 ./FILE3 +TEST ln ./dir2/dir3/FILE5 ./FILE5 +TEST ln ./dir1/FILE7 ./FILE7 +cd / +linkcountsrc=$(stat -c %h $M0/dir1/FILE1) +TEST [[ $linkcountsrc == 14 ]] + +cleanup; diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index d396c2ee4ab..bc9d04d36f8 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -28,6 +28,8 @@ #include <libgen.h> #include <signal.h> +int dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret); + int dht_aggregate (dict_t *this, char *key, data_t *value, void *data) { @@ -4490,54 +4492,156 @@ err: return 0; } - int dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - call_frame_t *prev = NULL; - dht_layout_t *layout = NULL; dht_local_t *local = NULL; - - prev = cookie; + int ret = -1; + gf_boolean_t stbuf_merged = _gf_false; + xlator_t *subvol = NULL; local = frame->local; - if (op_ret == -1) - goto out; - - layout = dht_layout_for_subvol (this, prev->this); - if (!layout) { - gf_msg_debug (this->name, 0, - "no pre-set layout for subvolume %s", - prev->this->name); - op_ret = -1; - op_errno = EINVAL; + if (op_ret == -1) { + /* No continuation on DHT inode missing errors, as we should + * then have a good stbuf that states P2 happened. We would + * get inode missing if, the file completed migrated between + * the lookup and the link call */ goto out; } + /* Update parent on success, even if P1/2 checks are positve. + * The second call on success will further update the parent */ if (local->loc.parent) { dht_inode_ctx_time_update (local->loc.parent, this, preparent, 0); dht_inode_ctx_time_update (local->loc.parent, this, postparent, 1); } - if (local->linked == _gf_true) { - local->stbuf = *stbuf; + + /* Update linkto attrs, if this is the first call and non-P2, + * if we detect P2 then we need to trust the attrs from the + * second call, not the first */ + if (local->linked == _gf_true && + ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2 (stbuf)) + || (local->call_cnt != 1 && + IS_DHT_MIGRATION_PHASE2 (&local->stbuf)))) { + dht_iatt_merge (this, &local->stbuf, stbuf, NULL); + stbuf_merged = _gf_true; dht_linkfile_attr_heal (frame, this); } + + /* No further P1/2 checks if we are in the second iteration of + * the call */ + if (local->call_cnt != 1) { + goto out; + } else { + /* Preserve the return values, in case the migration decides + * to recreate the link on the same subvol that the current + * hased for the link was created on. */ + dht_iatt_merge (this, &local->preparent, + preparent, NULL); + dht_iatt_merge (this, &local->postparent, + postparent, NULL); + if (!stbuf_merged) { + dht_iatt_merge (this, &local->stbuf, + stbuf, NULL); + stbuf_merged = _gf_true; + } + + local->inode = inode_ref (inode); + } + + local->op_errno = op_errno; + local->rebalance.target_op_fn = dht_link2; + /* Check if the rebalance phase2 is true */ + if (IS_DHT_MIGRATION_PHASE2 (stbuf)) { + ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol); + if (!subvol) { + /* Phase 2 of migration */ + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + return 0; + } else { + dht_link2 (this, frame, 0); + return 0; + } + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { + ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol); + if (subvol) { + dht_link2 (this, frame, 0); + return 0; + } + ret = dht_rebalance_in_progress_check (this, frame); + if (!ret) + return 0; + } out: DHT_STRIP_PHASE1_FLAGS (stbuf); - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent, NULL); + + DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, + preparent, postparent, NULL); return 0; } int +dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto err; + + op_errno = local->op_errno; + if (op_ret == -1) + goto err; + + dht_inode_ctx_get1 (this, local->loc.inode, &subvol); + if (!subvol) { + subvol = local->cached_subvol; + if (!subvol) { + op_errno = EINVAL; + goto err; + } + } + + /* Second call to create link file could result in EEXIST as the + * first call created the linkto in the currently + * migrating subvol, which could be the new hashed subvol */ + if (local->link_subvol == subvol) { + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (link, frame, 0, 0, local->inode, + &local->stbuf, &local->preparent, + &local->postparent, NULL); + + return 0; + } + + local->call_cnt = 2; + + STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link, + &local->loc, &local->loc2, NULL); + + return 0; +err: + DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); + + return 0; +} + +int dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, @@ -4588,6 +4692,7 @@ dht_link (call_frame_t *frame, xlator_t *this, goto err; } + local->call_cnt = 1; cached_subvol = local->cached_subvol; if (!cached_subvol) { |