summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xtests/bugs/bug-1161311.t129
-rw-r--r--xlators/cluster/dht/src/dht-common.c143
2 files changed, 253 insertions, 19 deletions
diff --git a/tests/bugs/bug-1161311.t b/tests/bugs/bug-1161311.t
new file mode 100755
index 00000000000..52ed1555c20
--- /dev/null
+++ b/tests/bugs/bug-1161311.t
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+# This tests for hard link preservation for files that are linked, when the
+# file is undergoing migration
+
+# --- Improvements and other tests ---
+## Fail rebalance of the large file for which links are created during P1/2
+### phases of migration
+## Start with multiple hard links to the file and then create more during P1/2
+### phases of migration
+## Test the same with NFS as the mount rather than FUSE
+## Create links when file is under P2 of migration specifically
+## Test with quota, to error out during hard link creation (if possible)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+checksticky () {
+ i=0;
+ while [ ! -k $1 ]; do
+ sleep 1
+ i=$((i+1));
+ # Try for 10 seconds to get the sticky bit state
+ # else fail the test, as we may never see it
+ if [[ $i == 10 ]]; then
+ return $i
+ fi
+ echo "Waiting... $i"
+ done
+ echo "Done... got out @ $i"
+ return 0
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+# Create a directories to hold the links
+TEST mkdir $M0/dir1
+TEST mkdir -p $M0/dir2/dir3
+
+# Create a large file (1GB), so that rebalance takes time
+dd if=/dev/urandom of=$M0/dir1/FILE2 bs=64k count=10240
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+## FILE1 and FILE2 hashes are, 678b1c4a e22c1ada, so they fall
+## into separate bricks when brick count is 3
+TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+
+# unmount and remount the volume
+TEST umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+# Start the rebalance
+TEST $CLI volume rebalance $V0 start force
+
+# Wait for FILE to get the sticky bit on, so that file is under
+# active rebalance, before creating the links
+TEST checksticky $B0/${V0}3/dir1/FILE1
+
+# Create the links
+## FILE3 FILE5 FILE7 have hashes, c8c91469 566d26ce 22ce7eba
+## Which fall into separate bricks on a 3 brick layout
+cd $M0
+TEST ln ./dir1/FILE1 ./dir1/FILE7
+TEST ln ./dir1/FILE1 ./dir1/FILE5
+TEST ln ./dir1/FILE1 ./dir1/FILE3
+
+TEST ln ./dir1/FILE1 ./dir2/FILE7
+TEST ln ./dir1/FILE1 ./dir2/FILE5
+TEST ln ./dir1/FILE1 ./dir2/FILE3
+
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE7
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE5
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE3
+cd /
+
+# Ideally for this test to have done its job, the file should still be
+# under migration, so check the sticky bit again
+TEST checksticky $B0/${V0}3/dir1/FILE1
+
+# Wait for rebalance to complete
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+# Check if all files are clean and migrated right
+## stat on the original file should show linkcount of 10
+linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
+TEST [[ $linkcountsrc == 10 ]]
+
+## inode and size of every file should be same as original file
+inodesrc=$(stat -c %i $M0/dir1/FILE1)
+TEST [[ $(stat -c %i $M0/dir1/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir1/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir1/FILE7) == $inodesrc ]]
+
+TEST [[ $(stat -c %i $M0/dir2/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/FILE7) == $inodesrc ]]
+
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE7) == $inodesrc ]]
+
+# Check, newer link creations
+cd $M0
+TEST ln ./dir1/FILE1 ./FILE1
+TEST ln ./dir2/FILE3 ./FILE3
+TEST ln ./dir2/dir3/FILE5 ./FILE5
+TEST ln ./dir1/FILE7 ./FILE7
+cd /
+linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
+TEST [[ $linkcountsrc == 14 ]]
+
+cleanup;
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index d396c2ee4ab..bc9d04d36f8 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -28,6 +28,8 @@
#include <libgen.h>
#include <signal.h>
+int dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret);
+
int
dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
{
@@ -4490,54 +4492,156 @@ err:
return 0;
}
-
int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
dht_local_t *local = NULL;
-
- prev = cookie;
+ int ret = -1;
+ gf_boolean_t stbuf_merged = _gf_false;
+ xlator_t *subvol = NULL;
local = frame->local;
- if (op_ret == -1)
- goto out;
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ if (op_ret == -1) {
+ /* No continuation on DHT inode missing errors, as we should
+ * then have a good stbuf that states P2 happened. We would
+ * get inode missing if, the file completed migrated between
+ * the lookup and the link call */
goto out;
}
+ /* Update parent on success, even if P1/2 checks are positve.
+ * The second call on success will further update the parent */
if (local->loc.parent) {
dht_inode_ctx_time_update (local->loc.parent, this,
preparent, 0);
dht_inode_ctx_time_update (local->loc.parent, this,
postparent, 1);
}
- if (local->linked == _gf_true) {
- local->stbuf = *stbuf;
+
+ /* Update linkto attrs, if this is the first call and non-P2,
+ * if we detect P2 then we need to trust the attrs from the
+ * second call, not the first */
+ if (local->linked == _gf_true &&
+ ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2 (stbuf))
+ || (local->call_cnt != 1 &&
+ IS_DHT_MIGRATION_PHASE2 (&local->stbuf)))) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, NULL);
+ stbuf_merged = _gf_true;
dht_linkfile_attr_heal (frame, this);
}
+
+ /* No further P1/2 checks if we are in the second iteration of
+ * the call */
+ if (local->call_cnt != 1) {
+ goto out;
+ } else {
+ /* Preserve the return values, in case the migration decides
+ * to recreate the link on the same subvol that the current
+ * hased for the link was created on. */
+ dht_iatt_merge (this, &local->preparent,
+ preparent, NULL);
+ dht_iatt_merge (this, &local->postparent,
+ postparent, NULL);
+ if (!stbuf_merged) {
+ dht_iatt_merge (this, &local->stbuf,
+ stbuf, NULL);
+ stbuf_merged = _gf_true;
+ }
+
+ local->inode = inode_ref (inode);
+ }
+
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_link2;
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol);
+ if (!subvol) {
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_link2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) {
+ ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol);
+ if (subvol) {
+ dht_link2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
+
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, NULL);
return 0;
}
int
+dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto err;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto err;
+
+ dht_inode_ctx_get1 (this, local->loc.inode, &subvol);
+ if (!subvol) {
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ }
+
+ /* Second call to create link file could result in EEXIST as the
+ * first call created the linkto in the currently
+ * migrating subvol, which could be the new hashed subvol */
+ if (local->link_subvol == subvol) {
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (link, frame, 0, 0, local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+ }
+
+ local->call_cnt = 2;
+
+ STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link,
+ &local->loc, &local->loc2, NULL);
+
+ return 0;
+err:
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -4588,6 +4692,7 @@ dht_link (call_frame_t *frame, xlator_t *this,
goto err;
}
+ local->call_cnt = 1;
cached_subvol = local->cached_subvol;
if (!cached_subvol) {