diff options
-rwxr-xr-x | tests/basic/tier/fops-during-migration.t | 121 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 128 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 13 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 105 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-read.c | 139 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 162 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-messages.h | 17 |
7 files changed, 596 insertions, 89 deletions
diff --git a/tests/basic/tier/fops-during-migration.t b/tests/basic/tier/fops-during-migration.t new file mode 100755 index 00000000000..ce25f0acc32 --- /dev/null +++ b/tests/basic/tier/fops-during-migration.t @@ -0,0 +1,121 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + + +NUM_BRICKS=3 +DEMOTE_FREQ=5 +PROMOTE_FREQ=5 + +TEST_STR="Testing write and truncate fops on tier migration" + +function is_sticky_set () { + echo $1 + if [ -k $1 ]; + then + echo "yes" + else + echo "no" + fi +} + + +# Creates a tiered volume with pure distribute hot and cold tiers +# Both hot and cold tiers will have an equal number of bricks. + +function create_dist_tier_vol () { + mkdir $B0/cold + mkdir $B0/hot + TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} + TEST $CLI volume set $V0 performance.quick-read off + TEST $CLI volume set $V0 performance.io-cache off + TEST $CLI volume set $V0 features.ctr-enabled on + TEST $CLI volume start $V0 + TEST $CLI volume attach-tier $V0 $H0:$B0/hot/${V0}{0..$1} + TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ + TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ + TEST $CLI volume set $V0 cluster.read-freq-threshold 0 + TEST $CLI volume set $V0 cluster.write-freq-threshold 0 +} + + +# Checks that the contents of the file matches the input string +#$1 : file_path +#$2 : comparison string + +function check_file_content () { + contents=`cat $1` + echo $contents + if [ "$contents" = "$2" ]; then + echo "1" + else + echo "0" + fi +} + + +cleanup; + +#Basic checks +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + + +#Create and start a tiered volume +create_dist_tier_vol $NUM_BRICKS + +# Mount FUSE +TEST glusterfs -s $H0 --volfile-id $V0 $M0 + +TEST mkdir $M0/dir1 + +# Create a large file (200MB), so that rebalance takes time +# The file will be created on the hot tier + +dd if=/dev/zero of=$M0/dir1/FILE1 bs=64k count=5120 + +# Get the path of the file on the hot tier +HPATH=`find $B0/hot/ -name FILE1` +echo "File path on hot tier: "$HPATH + + +# Wait for the tier process to demote the file +EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH + +# Get the path of the file on the cold tier +CPATH=`find $B0/cold/ -name FILE1` +echo "File path on cold tier: "$CPATH + +# Test setxattr +TEST setfattr -n "user.test_xattr" -v "qwerty" $M0/dir1/FILE1 + +# Test hard link creation +TEST ln $M0/dir1/FILE1 $M0/dir1/lnk1 +TEST ln $M0/dir1/FILE1 $M0/lnk2 + +# Change the file contents while it is being migrated +echo $TEST_STR > $M0/dir1/FILE1 + + +# The file contents should have changed even if the file +# is not done migrating +EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR" + + +# Wait for the tier process to finish migrating the file +EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $CPATH + +# The file contents should have changed +EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR" + + +linkcountsrc=$(stat -c %h $M0/dir1/FILE1) +echo $linkcountsrc +TEST [[ $linkcountsrc == 3 ]] + +TEST getfattr -n "user.test_xattr" $M0/dir1/FILE1 + +cleanup; + diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index e41e17a2d4f..4f84507162b 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -31,15 +31,16 @@ int run_defrag = 0; -int -dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); - +int dht_link2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame, + int ret); int -dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); +dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret); int -dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); +dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret); int dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value) @@ -3386,9 +3387,10 @@ dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - local->op_ret = 0; - + local->op_ret = op_ret; local->rebalance.target_op_fn = dht_setxattr2; + if (xdata) + local->rebalance.xdata = dict_ref (xdata); /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { @@ -3405,7 +3407,7 @@ dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &subvol1, &subvol2); if (!dht_mig_info_is_invalid (local->cached_subvol, subvol1, subvol2)) { - dht_setxattr2 (this, subvol2, frame); + dht_setxattr2 (this, subvol2, frame, 0); return 0; } @@ -3415,8 +3417,6 @@ dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } out: - if (local->rebalance.xdata) - dict_unref (local->rebalance.xdata); if (local->fop == GF_FOP_SETXATTR) { DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); @@ -3489,7 +3489,7 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, } else { local->call_cnt = 1; - local->rebalance.xdata = dict_ref (xattr); + local->rebalance.xattr = dict_ref (xattr); local->rebalance.flags = flags; xdata = xdata ? dict_ref (xdata) : dict_new (); @@ -3572,27 +3572,42 @@ out: int -dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; - if (!frame || !frame->local || !subvol) + if (!frame || !frame->local) goto err; local = frame->local; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (setxattr, frame, local->op_ret, + local->op_errno, local->rebalance.xdata); + return 0; + } + + if (subvol == NULL) + goto err; + + op_errno = local->op_errno; + local->call_cnt = 2; /* This is the second attempt */ if (local->fop == GF_FOP_SETXATTR) { STACK_WIND (frame, dht_file_setxattr_cbk, subvol, subvol->fops->setxattr, &local->loc, - local->rebalance.xdata, local->rebalance.flags, + local->rebalance.xattr, local->rebalance.flags, NULL); } else { STACK_WIND (frame, dht_file_setxattr_cbk, subvol, subvol->fops->fsetxattr, local->fd, - local->rebalance.xdata, local->rebalance.flags, + local->rebalance.xattr, local->rebalance.flags, NULL); } @@ -3839,7 +3854,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, } else { - local->rebalance.xdata = dict_ref (xattr); + local->rebalance.xattr = dict_ref (xattr); local->rebalance.flags = flags; local->call_cnt = 1; @@ -3903,6 +3918,8 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_ret = 0; local->rebalance.target_op_fn = dht_removexattr2; + if (xdata) + local->rebalance.xdata = dict_ref (xdata); /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { @@ -3918,7 +3935,7 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &subvol1, &subvol2); if (!dht_mig_info_is_invalid (local->cached_subvol, subvol1, subvol2)) { - dht_removexattr2 (this, subvol2, frame); + dht_removexattr2 (this, subvol2, frame, 0); return 0; } @@ -3938,7 +3955,8 @@ out: } int -dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; @@ -3950,6 +3968,17 @@ dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local->call_cnt = 2; /* This is the second attempt */ + if (we_are_not_migrating (ret)) { + + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (removexattr, frame, local->op_ret, + local->op_errno, local->rebalance.xdata); + return 0; + } + if (local->fop == GF_FOP_REMOVEXATTR) { STACK_WIND (frame, dht_file_removexattr_cbk, subvol, subvol->fops->removexattr, &local->loc, @@ -5603,8 +5632,12 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->inode = inode_ref (inode); } + local->op_ret = op_ret; local->op_errno = op_errno; local->rebalance.target_op_fn = dht_link2; + dht_set_local_rebalance (this, local, stbuf, preparent, + postparent, xdata); + /* Check if the rebalance phase2 is true */ if (IS_DHT_MIGRATION_PHASE2 (stbuf)) { ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL, @@ -5615,7 +5648,7 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!ret) return 0; } else { - dht_link2 (this, subvol, frame); + dht_link2 (this, subvol, frame, 0); return 0; } } @@ -5625,7 +5658,7 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL, &subvol); if (subvol) { - dht_link2 (this, subvol, frame); + dht_link2 (this, subvol, frame, 0); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -5643,7 +5676,7 @@ out: int -dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; @@ -5653,6 +5686,19 @@ dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) goto err; op_errno = local->op_errno; + + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (link, frame, local->op_ret, op_errno, + local->inode, + &local->stbuf, &local->preparent, + &local->postparent, NULL); + return 0; + } + if (subvol == NULL) { op_errno = EINVAL; goto err; @@ -7855,3 +7901,43 @@ int32_t dht_migration_needed(xlator_t *this) out: return ret; } + + + +/* +This function should not be called more then once during a FOP +handling path. It is valid only for for ops on files +*/ +int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local, + struct iatt *stbuf, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + + if (!local) + return -1; + + if (local->rebalance.set) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_REBAL_STRUCT_SET, + "local->rebalance already set"); + } + + + if (stbuf) + memcpy (&local->rebalance.stbuf, stbuf, sizeof (struct iatt)); + + if (prebuf) + memcpy (&local->rebalance.prebuf, prebuf, sizeof (struct iatt)); + + if (postbuf) + memcpy (&local->rebalance.postbuf, postbuf, + sizeof (struct iatt)); + + if (xdata) + local->rebalance.xdata = dict_ref (xdata); + + local->rebalance.set = 1; + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 1b5a0846c63..b1d12c84a9f 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -42,7 +42,7 @@ typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, int32_t op_ret, int32_t op_errno, dict_t *xdata); typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame); + call_frame_t *frame, int ret); typedef int (*dht_refresh_layout_unlock) (call_frame_t *frame, xlator_t *this, int op_ret); @@ -121,8 +121,12 @@ struct dht_rebalance_ { struct iobref *iobref; struct iovec *vector; struct iatt stbuf; + struct iatt prebuf; + struct iatt postbuf; dht_defrag_cbk_fn_t target_op_fn; dict_t *xdata; + dict_t *xattr; + int32_t set; }; /** @@ -553,6 +557,8 @@ typedef struct dht_migrate_info { #define layout_is_sane(layout) ((layout) && (layout->cnt > 0)) +#define we_are_not_migrating(x) ((x) == 1) + #define DHT_STACK_UNWIND(fop, frame, params ...) do { \ dht_local_t *__local = NULL; \ xlator_t *__xl = NULL; \ @@ -1083,4 +1089,9 @@ int dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); +int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local, + struct iatt *stbuf, + struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata); + #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 8cd8617e1ee..221fe033ed6 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -137,17 +137,6 @@ dht_frame_return (call_frame_t *frame) return this_call_cnt; } -/* - * A slightly "updated" version of the algorithm described in the commit log - * is used here. - * - * The only enhancement is that: - * - * - The number of bits used by the backend filesystem for HUGE d_off which - * is described as 63, and - * - The number of bits used by the d_off presented by the transformation - * upwards which is described as 64, are both made "configurable." - */ int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, @@ -460,6 +449,12 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) GF_FREE (local->key); + if (local->rebalance.xdata) + dict_unref (local->rebalance.xdata); + + if (local->rebalance.xattr) + dict_unref (local->rebalance.xattr); + GF_FREE (local->rebalance.vector); if (local->rebalance.iobref) @@ -891,7 +886,12 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) } - +/* + op_ret values : + 0 : Success. + -1 : Failure. + 1 : File is being migrated but not by this DHT layer. +*/ static int dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data) @@ -901,7 +901,7 @@ dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data) local = frame->local; - if (op_ret == -1) + if (op_ret != 0) goto out; if (local->cached_subvol == NULL) { @@ -912,7 +912,7 @@ dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data) subvol = local->cached_subvol; out: - local->rebalance.target_op_fn (THIS, subvol, frame); + local->rebalance.target_op_fn (THIS, subvol, frame, op_ret); return 0; } @@ -965,15 +965,35 @@ dht_migration_complete_check_task (void *data) SYNCTASK_SETID (frame->root->uid, frame->root->gid); } + /* - * temporary check related to tier promoting/demoting the file; - * the lower level DHT detects the migration (due to sticky - * bits) when it is the responsibility of the tier translator - * to complete the rebalance transaction. It will be corrected - * when rebalance and tier migration are fixed to work together. + * Each DHT xlator layer has its own name for the linkto xattr. + * If the file mode bits indicate the the file is being migrated but + * this layer's linkto xattr is not set, it means that another + * DHT layer is migrating the file. In this case, return 1 so + * the mode bits can be passed on to the higher layer for appropriate + * action. */ - if (strcmp(this->parents->xlator->type, "cluster/tier") == 0) { - ret = 0; + if (-ret == ENODATA) { + /* This DHT translator is not migrating this file */ + + ret = inode_ctx_reset1 (inode, this, &tmp_miginfo); + if (tmp_miginfo) { + + /* This can be a problem if the file was + * migrated by two different layers. Raise + * a warning here. + */ + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_HAS_MIGINFO, + "%s: Found miginfo in the inode ctx", + tmp_loc.path ? tmp_loc.path : + uuid_utoa (tmp_loc.gfid)); + + miginfo = (void *)tmp_miginfo; + GF_REF_PUT (miginfo); + } + ret = 1; goto out; } @@ -1096,7 +1116,14 @@ dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame) return ret; } + /* During 'in-progress' state, both nodes should have the file */ +/* + op_ret values : + 0 : Success + -1 : Failure. + 1 : File is being migrated but not by this DHT layer. +*/ static int dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data) { @@ -1106,7 +1133,7 @@ dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data) local = frame->local; - if (op_ret == -1) + if (op_ret != 0) goto out; inode = local->loc.inode ? local->loc.inode : local->fd->inode; @@ -1122,7 +1149,7 @@ dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data) } out: - local->rebalance.target_op_fn (THIS, dst_subvol, frame); + local->rebalance.target_op_fn (THIS, dst_subvol, frame, op_ret); return 0; } @@ -1144,6 +1171,9 @@ dht_rebalance_inprogress_task (void *data) inode_t *inode = NULL; fd_t *iter_fd = NULL; int open_failed = 0; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; + this = THIS; frame = data; @@ -1169,6 +1199,35 @@ dht_rebalance_inprogress_task (void *data) conf->link_xattr_name, NULL, NULL); } + /* + * Each DHT xlator layer has its own name for the linkto xattr. + * If the file mode bits indicate the the file is being migrated but + * this layer's linkto xattr is not present, it means that another + * DHT layer is migrating the file. In this case, return 1 so + * the mode bits can be passed on to the higher layer for appropriate + * action. + */ + + if (-ret == ENODATA) { + /* This DHT layer is not migrating this file */ + ret = inode_ctx_reset1 (inode, this, &tmp_miginfo); + if (tmp_miginfo) { + /* This can be a problem if the file was + * migrated by two different layers. Raise + * a warning here. + */ + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_HAS_MIGINFO, + "%s: Found miginfo in the inode ctx", + tmp_loc.path ? tmp_loc.path : + uuid_utoa (tmp_loc.gfid)); + miginfo = (void *)tmp_miginfo; + GF_REF_PUT (miginfo); + } + ret = 1; + goto out; + } + if (ret < 0) { gf_msg (this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED, diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 53ad4e7ff8c..3480151734f 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -15,13 +15,22 @@ #include "dht-common.h" -int dht_access2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); -int dht_readv2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); -int dht_attr2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); -int dht_open2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); -int dht_flush2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); -int dht_lk2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); -int dht_fsync2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); +int dht_access2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +int dht_readv2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +int dht_attr2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +int dht_open2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +int dht_flush2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +int dht_lk2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +int dht_fsync2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); + + int dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -58,7 +67,7 @@ out: } int -dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; @@ -69,6 +78,14 @@ dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = ENOENT; + if (we_are_not_migrating (ret)) { + /* This DHT layer is not migrating the file */ + DHT_STACK_UNWIND (open, frame, -1, local->op_errno, + NULL, NULL); + return 0; + + } + if (subvol == NULL) goto out; @@ -80,10 +97,11 @@ dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) return 0; out: - DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); + DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); return 0; } + int dht_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, dict_t *xdata) @@ -155,6 +173,8 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; local->op_errno = op_errno; + local->op_ret = op_ret; + /* Check if the rebalance phase2 is true */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { inode = (local->fd) ? local->fd->inode : local->loc.inode; @@ -162,13 +182,15 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!subvol) { /* Phase 2 of migration */ local->rebalance.target_op_fn = dht_attr2; + dht_set_local_rebalance (this, local, NULL, NULL, + stbuf, xdata); ret = dht_rebalance_complete_check (this, frame); if (!ret) return 0; } else { /* value is already set in fd_ctx, that means no need to check for whether its complete or not. */ - dht_attr2 (this, subvol, frame); + dht_attr2 (this, subvol, frame, 0); return 0; } } @@ -181,7 +203,7 @@ err: } int -dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; @@ -191,6 +213,19 @@ dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) goto out; op_errno = local->op_errno; + + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (stat, frame, local->op_ret, op_errno, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + + if (subvol == NULL) goto out; @@ -382,7 +417,8 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { dht_local_t *local = NULL; int ret = 0; - xlator_t *subvol = 0; + xlator_t *src_subvol = 0; + xlator_t *dst_subvol = 0; local = frame->local; if (!local) { @@ -401,23 +437,31 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { /* File would be migrated to other node */ - ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, - &subvol); - if (!subvol) { + ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, + &src_subvol, + &dst_subvol); + + if (dht_mig_info_is_invalid (local->cached_subvol, + src_subvol, dst_subvol)) { + local->op_ret = op_ret; local->rebalance.target_op_fn = dht_readv2; + dht_set_local_rebalance (this, local, NULL, NULL, + stbuf, xdata); + ret = dht_rebalance_complete_check (this, frame); if (!ret) return 0; } else { /* value is already set in fd_ctx, that means no need to check for whether its complete or not. */ - dht_readv2 (this, subvol, frame); + dht_readv2 (this, dst_subvol, frame, 0); return 0; } } out: DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref, xdata); @@ -425,7 +469,7 @@ out: } int -dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; @@ -435,6 +479,18 @@ dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) goto out; op_errno = local->op_errno; + + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (readv, frame, local->op_ret, op_errno, + NULL, 0, &local->rebalance.postbuf, + NULL, local->rebalance.xdata); + return 0; + } + if (subvol == NULL) goto out; @@ -451,6 +507,7 @@ out: return 0; } + int dht_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata) @@ -543,7 +600,7 @@ out: } int -dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int op_errno = EINVAL; @@ -553,6 +610,17 @@ dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) goto out; op_errno = local->op_errno; + + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + + DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL); + return 0; + } + if (subvol == NULL) goto out; @@ -629,7 +697,7 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* If context is set, then send flush() it to the destination */ dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol); if (subvol) { - dht_flush2 (this, subvol, frame); + dht_flush2 (this, subvol, frame, 0); return 0; } @@ -640,7 +708,7 @@ out: } int -dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -652,7 +720,6 @@ dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) op_errno = local->op_errno; - dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol); if (subvol == NULL) goto out; @@ -718,7 +785,8 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, call_frame_t *prev = NULL; int ret = -1; inode_t *inode = NULL; - xlator_t *subvol = 0; + xlator_t *src_subvol = 0; + xlator_t *dst_subvol = 0; local = frame->local; prev = cookie; @@ -741,9 +809,15 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, local->op_errno = op_errno; inode = local->fd->inode; - dht_inode_ctx_get_mig_info (this, inode, NULL, &subvol); - if (!subvol) { + + dht_inode_ctx_get_mig_info (this, inode, &src_subvol, &dst_subvol); + + if (dht_mig_info_is_invalid (local->cached_subvol, + src_subvol, dst_subvol)) { + local->rebalance.target_op_fn = dht_fsync2; + dht_set_local_rebalance (this, local, NULL, prebuf, + postbuf, xdata); /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { @@ -760,13 +834,14 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, if (!ret) return 0; } else { - dht_fsync2 (this, subvol, frame); + dht_fsync2 (this, dst_subvol, frame, 0); return 0; } out: DHT_STRIP_PHASE1_FLAGS (postbuf); DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); @@ -774,7 +849,7 @@ out: } int -dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -785,6 +860,18 @@ dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = local->op_errno; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (fsync, frame, local->op_ret, + op_errno, &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + if (subvol == NULL) goto out; diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index b48fd7926f1..e57056b4f77 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -16,12 +16,18 @@ #include "dht-common.h" -int dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); -int dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); -int dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); -int dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); -int dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); -int dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); +int dht_writev2 (xlator_t *this, xlator_t *subvol, + call_frame_t *frame, int ret); +int dht_truncate2 (xlator_t *this, xlator_t *subvol, + call_frame_t *frame, int ret); +int dht_setattr2 (xlator_t *this, xlator_t *subvol, + call_frame_t *frame, int ret); +int dht_fallocate2 (xlator_t *this, xlator_t *subvol, + call_frame_t *frame, int ret); +int dht_discard2 (xlator_t *this, xlator_t *subvol, + call_frame_t *frame, int ret); +int dht_zerofill2 (xlator_t *this, xlator_t *subvol, + call_frame_t *frame, int ret); int dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -63,7 +69,15 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->rebalance.target_op_fn = dht_writev2; + local->op_ret = op_ret; local->op_errno = op_errno; + + /* We might need to pass the stbuf information to the higher DHT + * layer for appropriate handling. + */ + + dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { ret = dht_rebalance_complete_check (this, frame); @@ -80,7 +94,7 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &subvol1, &subvol2); if (!dht_mig_info_is_invalid (local->cached_subvol, subvol1, subvol2)) { - dht_writev2 (this, subvol2, frame); + dht_writev2 (this, subvol2, frame, 0); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -99,7 +113,7 @@ out: } int -dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -110,6 +124,19 @@ dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = local->op_errno; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (writev, frame, local->op_ret, + local->op_errno, &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + + if (subvol == NULL) goto out; @@ -220,7 +247,15 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->rebalance.target_op_fn = dht_truncate2; + local->op_ret = op_ret; local->op_errno = op_errno; + + /* We might need to pass the stbuf information to the higher DHT + * layer for appropriate handling. + */ + + dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { ret = dht_rebalance_complete_check (this, frame); @@ -238,7 +273,7 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &dst_subvol); if (!dht_mig_info_is_invalid (local->cached_subvol, src_subvol, dst_subvol)) { - dht_truncate2 (this, dst_subvol, frame); + dht_truncate2 (this, dst_subvol, frame, 0); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -249,6 +284,7 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, out: DHT_STRIP_PHASE1_FLAGS (postbuf); DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: @@ -257,7 +293,7 @@ err: int -dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -268,6 +304,16 @@ dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = local->op_errno; + /* This dht xlator is not migrating the file */ + if (we_are_not_migrating (ret)) { + + DHT_STACK_UNWIND (truncate, frame, local->op_ret, + local->op_errno, &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + if (subvol == NULL) goto out; @@ -411,8 +457,13 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } goto out; } + + local->op_ret = op_ret; + local->op_errno = op_errno; local->rebalance.target_op_fn = dht_fallocate2; + dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { ret = dht_rebalance_complete_check (this, frame); @@ -429,7 +480,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, &dst_subvol); if (!dht_mig_info_is_invalid (local->cached_subvol, src_subvol, dst_subvol)) { - dht_fallocate2 (this, dst_subvol, frame); + dht_fallocate2 (this, dst_subvol, frame, 0); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -440,6 +491,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, out: DHT_STRIP_PHASE1_FLAGS (postbuf); DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: @@ -447,7 +499,7 @@ err: } int -dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -458,6 +510,19 @@ dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = local->op_errno; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (fallocate, frame, local->op_ret, + local->op_errno, + &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + if (subvol == NULL) goto out; @@ -555,7 +620,12 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } goto out; } + local->rebalance.target_op_fn = dht_discard2; + local->op_ret = op_ret; + local->op_errno = op_errno; + + dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { @@ -573,7 +643,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, &dst_subvol); if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol, dst_subvol)) { - dht_discard2 (this, dst_subvol, frame); + dht_discard2 (this, dst_subvol, frame, 0); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -584,6 +654,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, out: DHT_STRIP_PHASE1_FLAGS (postbuf); DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (discard, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: @@ -591,7 +662,7 @@ err: } int -dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -602,6 +673,19 @@ dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = local->op_errno; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (discard, frame, local->op_ret, + local->op_errno, + &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + if (subvol == NULL) goto out; @@ -694,7 +778,13 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } goto out; } + local->rebalance.target_op_fn = dht_zerofill2; + local->op_ret = op_ret; + local->op_errno = op_errno; + + dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { ret = dht_rebalance_complete_check (this, frame); @@ -711,7 +801,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, &subvol1, &subvol2); if (!dht_mig_info_is_invalid (local->cached_subvol, subvol1, subvol2)) { - dht_zerofill2 (this, subvol2, frame); + dht_zerofill2 (this, subvol2, frame, 0); return 0; } @@ -723,6 +813,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, out: DHT_STRIP_PHASE1_FLAGS (postbuf); DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: @@ -730,7 +821,7 @@ err: } int -dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -742,6 +833,20 @@ dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) op_errno = local->op_errno; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (zerofill, frame, local->op_ret, + local->op_errno, + &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + + return 0; + } + if (subvol == NULL) goto out; @@ -754,6 +859,7 @@ dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) return 0; out: + DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -826,10 +932,18 @@ dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (local->call_cnt != 1) goto out; + local->op_ret = op_ret; + local->op_errno = op_errno; + local->rebalance.target_op_fn = dht_setattr2; + /* Phase 2 of migration */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { + + dht_set_local_rebalance (this, local, NULL, prebuf, + postbuf, xdata); + ret = dht_rebalance_complete_check (this, frame); if (!ret) return 0; @@ -842,6 +956,7 @@ dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, out: DHT_STRIP_PHASE1_FLAGS (postbuf); DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata); @@ -849,7 +964,7 @@ out: } int -dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) +dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; int32_t op_errno = EINVAL; @@ -860,6 +975,19 @@ dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) local = frame->local; op_errno = local->op_errno; + if (we_are_not_migrating (ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND (setattr, frame, local->op_ret, + local->op_errno, + &local->rebalance.prebuf, + &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } + if (subvol == NULL) goto out; diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index e0f051087fc..d6300d3741f 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -45,7 +45,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 104 +#define GLFS_DHT_NUM_MESSAGES 106 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -983,5 +983,20 @@ #define DHT_MSG_COMMIT_HASH_INFO (GLFS_DHT_BASE + 104) +/* + * @messageid 109105 + * @diagnosis + * @recommendedaction None + */ + +#define DHT_MSG_REBAL_STRUCT_SET (GLFS_DHT_BASE + 105) + +/* + * @messageid 109106 + * @diagnosis + * @recommendedaction None + */ + +#define DHT_MSG_HAS_MIGINFO (GLFS_DHT_BASE + 106) #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ |