diff options
| -rwxr-xr-x | tests/bugs/bug-1161311.t | 129 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 143 | 
2 files changed, 253 insertions, 19 deletions
diff --git a/tests/bugs/bug-1161311.t b/tests/bugs/bug-1161311.t new file mode 100755 index 00000000000..52ed1555c20 --- /dev/null +++ b/tests/bugs/bug-1161311.t @@ -0,0 +1,129 @@ +#!/bin/bash + +# This tests for hard link preservation for files that are linked, when the +# file is undergoing migration + +# --- Improvements and other tests --- +## Fail rebalance of the large file for which links are created during P1/2 +### phases of migration +## Start with multiple hard links to the file and then create more during P1/2 +### phases of migration +## Test the same with NFS as the mount rather than FUSE +## Create links when file is under P2 of migration specifically +## Test with quota, to error out during hard link creation (if possible) + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +checksticky () { +	i=0; +	while [ ! -k $1 ]; do +		sleep 1 +		i=$((i+1)); +		# Try for 10 seconds to get the sticky bit state +		# else fail the test, as we may never see it +		if [[ $i == 10 ]]; then +			return $i +		fi +		echo "Waiting... $i" +	done +        echo "Done... got out @ $i" +	return 0 +} + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}; + +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT '3' brick_count $V0 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount FUSE with caching disabled (read-write) +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +# Create a directories to hold the links +TEST mkdir $M0/dir1 +TEST mkdir -p $M0/dir2/dir3 + +# Create a large file (1GB), so that rebalance takes time +dd if=/dev/urandom of=$M0/dir1/FILE2 bs=64k count=10240 + +# Rename the file to create a linkto, for rebalance to +# act on the file +## FILE1 and FILE2 hashes are, 678b1c4a e22c1ada, so they fall +## into separate bricks when brick count is 3 +TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1 + +# unmount and remount the volume +TEST umount $M0 +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +# Start the rebalance +TEST $CLI volume rebalance $V0 start force + +# Wait for FILE to get the sticky bit on, so that file is under +# active rebalance, before creating the links +TEST checksticky $B0/${V0}3/dir1/FILE1 + +# Create the links +## FILE3 FILE5 FILE7 have hashes, c8c91469 566d26ce 22ce7eba +## Which fall into separate bricks on a 3 brick layout +cd $M0 +TEST ln ./dir1/FILE1 ./dir1/FILE7 +TEST ln ./dir1/FILE1 ./dir1/FILE5 +TEST ln ./dir1/FILE1 ./dir1/FILE3 + +TEST ln ./dir1/FILE1 ./dir2/FILE7 +TEST ln ./dir1/FILE1 ./dir2/FILE5 +TEST ln ./dir1/FILE1 ./dir2/FILE3 + +TEST ln ./dir1/FILE1 ./dir2/dir3/FILE7 +TEST ln ./dir1/FILE1 ./dir2/dir3/FILE5 +TEST ln ./dir1/FILE1 ./dir2/dir3/FILE3 +cd / + +# Ideally for this test to have done its job, the file should still be +# under migration, so check the sticky bit again +TEST checksticky $B0/${V0}3/dir1/FILE1 + +# Wait for rebalance to complete +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 + +# Check if all files are clean and migrated right +## stat on the original file should show linkcount of 10 +linkcountsrc=$(stat -c %h $M0/dir1/FILE1) +TEST [[ $linkcountsrc == 10 ]] + +## inode and size of every file should be same as original file +inodesrc=$(stat -c %i $M0/dir1/FILE1) +TEST [[ $(stat -c %i $M0/dir1/FILE3) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir1/FILE5) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir1/FILE7) == $inodesrc ]] + +TEST [[ $(stat -c %i $M0/dir2/FILE3) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/FILE5) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/FILE7) == $inodesrc ]] + +TEST [[ $(stat -c %i $M0/dir2/dir3/FILE3) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/dir3/FILE5) == $inodesrc ]] +TEST [[ $(stat -c %i $M0/dir2/dir3/FILE7) == $inodesrc ]] + +# Check, newer link creations +cd $M0 +TEST ln ./dir1/FILE1 ./FILE1 +TEST ln ./dir2/FILE3 ./FILE3 +TEST ln ./dir2/dir3/FILE5 ./FILE5 +TEST ln ./dir1/FILE7 ./FILE7 +cd / +linkcountsrc=$(stat -c %h $M0/dir1/FILE1) +TEST [[ $linkcountsrc == 14 ]] + +cleanup; diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index d396c2ee4ab..bc9d04d36f8 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -28,6 +28,8 @@  #include <libgen.h>  #include <signal.h> +int dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret); +  int  dht_aggregate (dict_t *this, char *key, data_t *value, void *data)  { @@ -4490,54 +4492,156 @@ err:          return 0;  } -  int  dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                int op_ret, int op_errno,                inode_t *inode, struct iatt *stbuf, struct iatt *preparent,                struct iatt *postparent, dict_t *xdata)  { -        call_frame_t *prev = NULL; -        dht_layout_t *layout = NULL;          dht_local_t  *local = NULL; - -        prev = cookie; +        int           ret = -1; +        gf_boolean_t  stbuf_merged = _gf_false; +        xlator_t     *subvol = NULL;          local = frame->local; -        if (op_ret == -1) -                goto out; - -        layout = dht_layout_for_subvol (this, prev->this); -        if (!layout) { -                gf_msg_debug (this->name, 0, -                              "no pre-set layout for subvolume %s", -                              prev->this->name); -                op_ret   = -1; -                op_errno = EINVAL; +        if (op_ret == -1) { +                /* No continuation on DHT inode missing errors, as we should +                 * then have a good stbuf that states P2 happened. We would +                 * get inode missing if, the file completed migrated between +                 * the lookup and the link call */                  goto out;          } +        /* Update parent on success, even if P1/2 checks are positve. +         * The second call on success will further update the parent */          if (local->loc.parent) {                  dht_inode_ctx_time_update (local->loc.parent, this,                                             preparent, 0);                  dht_inode_ctx_time_update (local->loc.parent, this,                                             postparent, 1);          } -        if (local->linked == _gf_true) { -                local->stbuf = *stbuf; + +        /* Update linkto attrs, if this is the first call and non-P2, +         * if we detect P2 then we need to trust the attrs from the +         * second call, not the first */ +        if (local->linked == _gf_true && +            ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2 (stbuf)) +             || (local->call_cnt != 1 && +                 IS_DHT_MIGRATION_PHASE2 (&local->stbuf)))) { +                dht_iatt_merge (this, &local->stbuf, stbuf, NULL); +                stbuf_merged = _gf_true;                  dht_linkfile_attr_heal (frame, this);          } + +        /* No further P1/2 checks if we are in the second iteration of +         * the call */ +        if (local->call_cnt != 1) { +                goto out; +        } else { +                /* Preserve the return values, in case the migration decides +                 * to recreate the link on the same subvol that the current +                 * hased for the link was created on. */ +                dht_iatt_merge (this, &local->preparent, +                                preparent, NULL); +                dht_iatt_merge (this, &local->postparent, +                                postparent, NULL); +                if (!stbuf_merged) { +                        dht_iatt_merge (this, &local->stbuf, +                                        stbuf, NULL); +                        stbuf_merged = _gf_true; +                } + +                local->inode = inode_ref (inode); +        } + +        local->op_errno = op_errno; +        local->rebalance.target_op_fn = dht_link2; +        /* Check if the rebalance phase2 is true */ +        if (IS_DHT_MIGRATION_PHASE2 (stbuf)) { +                ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol); +                if (!subvol) { +                        /* Phase 2 of migration */ +                        ret = dht_rebalance_complete_check (this, frame); +                        if (!ret) +                                return 0; +                } else { +                        dht_link2 (this, frame, 0); +                        return 0; +                } +        } + +        /* Check if the rebalance phase1 is true */ +        if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { +                ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol); +                if (subvol) { +                        dht_link2 (this, frame, 0); +                        return 0; +                } +                ret = dht_rebalance_in_progress_check (this, frame); +                if (!ret) +                        return 0; +        }  out:          DHT_STRIP_PHASE1_FLAGS (stbuf); -        DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, -                          postparent, NULL); + +        DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, +                          preparent, postparent, NULL);          return 0;  }  int +dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t *local  = NULL; +        xlator_t    *subvol = NULL; +        int          op_errno = EINVAL; + +        local = frame->local; +        if (!local) +                goto err; + +        op_errno = local->op_errno; +        if (op_ret == -1) +                goto err; + +        dht_inode_ctx_get1 (this, local->loc.inode, &subvol); +        if (!subvol) { +                subvol = local->cached_subvol; +                if (!subvol) { +                        op_errno = EINVAL; +                        goto err; +                } +        } + +        /* Second call to create link file could result in EEXIST as the +         * first call created the linkto in the currently +         * migrating subvol, which could be the new hashed subvol */ +        if (local->link_subvol == subvol) { +                DHT_STRIP_PHASE1_FLAGS (&local->stbuf); +                DHT_STACK_UNWIND (link, frame, 0, 0, local->inode, +                                  &local->stbuf, &local->preparent, +                                  &local->postparent, NULL); + +                return 0; +        } + +        local->call_cnt = 2; + +        STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link, +                    &local->loc, &local->loc2, NULL); + +        return 0; +err: +        DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, +                          NULL, NULL); + +        return 0; +} + +int  dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                         int op_ret, int op_errno,                         inode_t *inode, struct iatt *stbuf, @@ -4588,6 +4692,7 @@ dht_link (call_frame_t *frame, xlator_t *this,                  goto err;          } +        local->call_cnt = 1;          cached_subvol = local->cached_subvol;          if (!cached_subvol) {  | 
