From bb2370514598a99e6ab268af81df57dc16caa2c5 Mon Sep 17 00:00:00 2001 From: Mohammed Rafi KC Date: Tue, 21 Jul 2015 18:14:22 +0530 Subject: tier/dht: unlink fails after lookup in a directory unlink fails with invalid argument for files that are being present on cold tier, before attaching. All of the fops will be hashed to hot_tier after attach-tier (unless explicitly set the "rule" option). Lookups sent to directory, will eventually search the directory using readdirp, and will populate inode_ctx for the inodes based on the output, in respective dht_xlators. So the readdirp will populate inodes_ctx for the files (that is already present in volume before attaching) in cold-dht only because it got the entries from the cold-tier. So when an unlink comes on such an inode, the lookup associated with the unlink will be send as a re validate request to cold-tier only, since already a lookup was performed on the inode, and the new lookup will succeed. So from the unlink of dht, it will hash to cold-tier but the cached_subvol will be cold, since there is a mismatch in hash and cach , it chose hashed subvolume and will sent the fop to hot dht, and the fops fail with EINVAL from the hot-dht since it does not have inode_ctx stored for that inode (because, no lookup was performed from hot-dht). Change-Id: Ib7c14a9297a22d615f7a890a060be4809b5a745a BUG: 1236032 Signed-off-by: Mohammed Rafi KC Signed-off-by: Dan Lambright Reviewed-on: http://review.gluster.org/11675 Tested-by: NetBSD Build System Tested-by: Gluster Build System Reviewed-by: Raghavendra G --- tests/basic/tier/tier.t | 11 +++++++---- xlators/cluster/dht/src/dht-common.c | 31 +++++++++++++++++-------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t index 289fc2935b0..b8aefdfa305 100755 --- a/tests/basic/tier/tier.t +++ b/tests/basic/tier/tier.t @@ -137,6 +137,11 @@ TEST ! $CLI volume set $V0 cluster.tier-demote-frequency 4 TEST ! $CLI volume tier $V0 detach commit force TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST +# create a file, make sure it can be deleted after attach tier. +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +cd $M0 +TEST touch delete_me.txt + # stop the volume and restart it. The rebalance daemon should restart. TEST $CLI volume stop $V0 @@ -156,10 +161,7 @@ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 TEST $CLI volume set $V0 cluster.write-freq-threshold 0 -TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; - # Basic operations. -cd $M0 TEST stat . TEST mkdir d1 TEST [ -d d1 ] @@ -167,6 +169,7 @@ TEST touch d1/file1 TEST mkdir d1/d2 TEST [ -d d1/d2 ] TEST find d1 +TEST rm --interactive=never delete_me.txt mkdir /tmp/d1 # Create a file. It should be on the fast tier. @@ -199,7 +202,7 @@ cat d1/data3.txt sleep $PROMOTE_TIMEOUT sleep $DEMOTE_FREQ -EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 2 6 +EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 2 7 # stop gluster, when it comes back info file should have tiered volume killall glusterd diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 4a55905f36a..ef1776beb30 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -4485,6 +4485,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, xlator_t *hashed_subvol = 0; int ret = 0; int readdir_optimize = 0; + dht_inode_ctx_t *ctx = NULL; INIT_LIST_HEAD (&entries.list); prev = cookie; @@ -4578,20 +4579,22 @@ list: if (orig_entry->dict) entry->dict = dict_ref (orig_entry->dict); - /* making sure we set the inode ctx right with layout, - currently possible only for non-directories, so for - directories don't set entry inodes */ - if (!IA_ISDIR(entry->d_stat.ia_type) && orig_entry->inode) { - ret = dht_layout_preset (this, prev->this, - orig_entry->inode); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout in inode"); - entry->inode = inode_ref (orig_entry->inode); - } else if (orig_entry->inode) { - dht_inode_ctx_time_update (orig_entry->inode, this, - &entry->d_stat, 1); + /* For non-directories don't set inode ctx from readdirp cbk, + * let them populate on first lookup, for directories + * don't set entry inodes */ + if (orig_entry->inode) { + ret = dht_inode_ctx_get (orig_entry->inode, this, &ctx); + if (ret == -1) { + entry->inode = NULL; + } else { + entry->inode = inode_ref (orig_entry->inode); + if (IA_ISDIR (entry->d_stat.ia_type)) { + dht_inode_ctx_time_update (orig_entry->inode, + this, &entry->d_stat, 1); + + } + + } } list_add_tail (&entry->list, &entries.list); -- cgit