diff options
author | vmallika <vmallika@redhat.com> | 2014-12-24 15:13:36 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2014-12-27 05:52:07 -0800 |
commit | b6ea761969f85fbb0f22810eb3a3bf1476c8792c (patch) | |
tree | 935f71888ebcd80b5ff428a02167e2aac45c5483 | |
parent | 2947752836bd3ddbc572b59cecd24557050ec2a5 (diff) |
quota: For a rename operation, do quota_check_limit only till the
common ancestor of src and dst file
Example:
set quota limit set to 1GB on /
create a file /a1/b1/file1 of 600MB
mv /a1/b1/file1 /a1/b1/file2
This rename fails as it takes delta into account which sums up to 1.2BG.
Though we are not creating new file, we still get quota exceeded error.
So quota enforce should happen only till b1.
Similarly:
mv /a/b/c/file /a/b/x/y/file
quota enforce should happen only till dir 'b'
Change-Id: Ia1e5363da876c3d71bd424e67a8bb28b7ac1c7c1
BUG: 1153964
Signed-off-by: vmallika <vmallika@redhat.com>
Reviewed-on: http://review.gluster.org/8940
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Tested-by: Raghavendra G <rgowdapp@redhat.com>
-rwxr-xr-x | tests/basic/quota-anon-fd-nfs.t | 84 | ||||
-rw-r--r-- | tests/basic/quota-nfs.c (renamed from tests/basic/quota-anon-fd-nfs.c) | 0 | ||||
-rwxr-xr-x | tests/basic/quota-nfs.t | 52 | ||||
-rwxr-xr-x | tests/bugs/bug-1161156.t | 10 | ||||
-rw-r--r-- | xlators/features/quota/src/quota.c | 325 | ||||
-rw-r--r-- | xlators/features/quota/src/quota.h | 20 |
6 files changed, 410 insertions, 81 deletions
diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t index 0f7a9aac52e..5841580f9a8 100755 --- a/tests/basic/quota-anon-fd-nfs.t +++ b/tests/basic/quota-anon-fd-nfs.t @@ -1,17 +1,11 @@ #!/bin/bash . $(dirname $0)/../include.rc -. $(dirname $0)/../nfs.rc - -function usage() -{ - local QUOTA_PATH=$1; - $CLI volume quota $V0 list $QUOTA_PATH | \ - grep "$QUOTA_PATH" | awk '{print $4}' -} +. $(dirname $0)/../fileio.rc cleanup; +TESTS_EXPECTED_IN_LOOP=16 TEST glusterd TEST pidof glusterd TEST $CLI volume info; @@ -19,34 +13,74 @@ TEST $CLI volume info; TEST $CLI volume create $V0 $H0:$B0/brick1; EXPECT 'Created' volinfo_field $V0 'Status'; + +# The test makes use of inode-lru-limit to hit a scenario, where we +# find an inode whose ancestry is not there. Following is the +# hypothesis (which is confirmed by seeing logs indicating that +# codepath has been executed, but not through a good understanding of +# NFS internals). + +# At the end of an fop, the reference count of an inode would be +# zero. The inode (and its ancestry) persists in memory only +# because of non-zero lookup count. These looked up inodes are put +# in an lru queue of size 1 (here). So, there can be at most one +# such inode in memory. + +# NFS Server makes use of anonymous fds. So, if it cannot find +# valid fd, it does a nameless lookup. This gives us an inode +# whose ancestry is NULL. When a write happens on this inode, +# quota-enforcer/marker finds a NULL ancestry and asks +# storage/posix to build it. + TEST $CLI volume set $V0 network.inode-lru-limit 1 +TEST $CLI volume set $V0 performance.nfs.write-behind off TEST $CLI volume start $V0; EXPECT 'Started' volinfo_field $V0 'Status'; -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available -TEST mount_nfs $H0:/$V0 $N0 +TEST $CLI volume quota $V0 enable +TEST $CLI volume quota $V0 limit-usage / 1 +TEST $CLI volume quota $V0 soft-timeout 0 +TEST $CLI volume quota $V0 hard-timeout 0 + +TEST mount -t nfs -o noac,soft,nolock,vers=3 $H0:/$V0 $N0 deep=/0/1/2/3/4/5/6/7/8/9 TEST mkdir -p $N0/$deep -TEST dd if=/dev/zero of=$N0/$deep/file bs=1k count=10240 +TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4 -TEST $CLI volume quota $V0 enable -TEST $CLI volume quota $V0 limit-usage / 20MB -TEST $CLI volume quota $V0 soft-timeout 0 -TEST $CLI volume quota $V0 hard-timeout 0 +TEST fd_open 3 'w' "$N0/$deep/file1" +TEST fd_open 4 'w' "$N0/$deep/file2" +TEST fd_open 5 'w' "$N0/$deep/file3" +TEST fd_open 6 'w' "$N0/$deep/file4" + +# consume all quota +TEST ! dd if=/dev/zero of="$N0/$deep/file" bs=1MB count=1 + +# At the end of each fop in server, reference count of the +# inode associated with each of the file above drops to zero and hence +# put into lru queue. Since lru-limit is set to 1, an fop next file +# will displace the current inode from itable. This will ensure that +# when writes happens on same fd, fd resolution results in +# nameless lookup from server and quota_writev encounters an fd +# associated with an inode whose parent is not present in itable. + +for j in $(seq 1 2); do + for i in $(seq 3 6); do + # failing writes indicate that we are enforcing quota set on / + # even with anonymous fds. + TEST_IN_LOOP ! fd_write $i "content" + TEST_IN_LOOP sync + done +done -TEST dd if=/dev/zero of=$N0/$deep/newfile_1 bs=512 count=10240 -# wait for write behind to complete. -EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/" +exec 3>&- +exec 4>&- +exec 5>&- +exec 6>&- -# compile the test write program and run it -TEST $CC $(dirname $0)/quota-anon-fd-nfs.c -o $(dirname $0)/quota-anon-fd-nfs; -# Try to create a 100Mb file which should fail -TEST ! $(dirname $0)/quota-anon-fd-nfs $N0/$deep/newfile_2 "104857600" -TEST rm -f $N0/$deep/newfile_2 +$CLI volume statedump $V0 all -## Before killing daemon to avoid deadlocks -umount_nfs $N0 +TEST umount -l $N0 cleanup; diff --git a/tests/basic/quota-anon-fd-nfs.c b/tests/basic/quota-nfs.c index 4cc0322e132..4cc0322e132 100644 --- a/tests/basic/quota-anon-fd-nfs.c +++ b/tests/basic/quota-nfs.c diff --git a/tests/basic/quota-nfs.t b/tests/basic/quota-nfs.t new file mode 100755 index 00000000000..501d8ab6381 --- /dev/null +++ b/tests/basic/quota-nfs.t @@ -0,0 +1,52 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../nfs.rc + +function usage() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | \ + grep "$QUOTA_PATH" | awk '{print $4}' +} + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume set $V0 network.inode-lru-limit 1 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +TEST mount_nfs $H0:/$V0 $N0 +deep=/0/1/2/3/4/5/6/7/8/9 +TEST mkdir -p $N0/$deep + +TEST dd if=/dev/zero of=$N0/$deep/file bs=1k count=10240 + +TEST $CLI volume quota $V0 enable +TEST $CLI volume quota $V0 limit-usage / 20MB +TEST $CLI volume quota $V0 soft-timeout 0 +TEST $CLI volume quota $V0 hard-timeout 0 + +TEST dd if=/dev/zero of=$N0/$deep/newfile_1 bs=512 count=10240 +# wait for write behind to complete. +EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/" + +# compile the test write program and run it +TEST $CC $(dirname $0)/quota-nfs.c -o $(dirname $0)/quota-nfs; +# Try to create a 100Mb file which should fail +TEST ! $(dirname $0)/quota-nfs $N0/$deep/newfile_2 "104857600" +TEST rm -f $N0/$deep/newfile_2 + +## Before killing daemon to avoid deadlocks +umount_nfs $N0 + +cleanup; diff --git a/tests/bugs/bug-1161156.t b/tests/bugs/bug-1161156.t index 12ebc45bdd3..9f33391d744 100755 --- a/tests/bugs/bug-1161156.t +++ b/tests/bugs/bug-1161156.t @@ -27,11 +27,13 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available TEST mount_nfs $H0:/$V0 $N0 mydir="dir" TEST mkdir -p $N0/$mydir +TEST mkdir -p $N0/newdir TEST dd if=/dev/zero of=$N0/$mydir/file bs=1k count=10240 TEST $CLI volume quota $V0 enable TEST $CLI volume quota $V0 limit-usage / 20MB +TEST $CLI volume quota $V0 limit-usage /newdir 5MB TEST $CLI volume quota $V0 soft-timeout 0 TEST $CLI volume quota $V0 hard-timeout 0 @@ -40,8 +42,12 @@ TEST dd if=/dev/zero of=$N0/$mydir/newfile_1 bs=512 count=10240 EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/" TEST ! dd if=/dev/zero of=$N0/$mydir/newfile_2 bs=1k count=10240 -# Test the rename, it should fail due to space restrictions -TEST ! mv $N0/dir/file $N0/dir/newfile_3 +# Test rename within a directory. It should pass even when the +# corresponding directory quota is filled. +TEST mv $N0/dir/file $N0/dir/newfile_3 + +# rename should fail here with disk quota exceeded +TEST ! mv $N0/dir/newfile_3 $N0/newdir/ # cleanup umount_nfs $N0 diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 3c1b8e09c5c..f903b4e57b7 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -14,17 +14,6 @@ #include "defaults.h" #include "statedump.h" -void -quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this); - -int32_t -quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, - char *name, uuid_t par); - -int -quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict, - loc_t *loc, struct iatt *buf, int32_t *op_errno); - struct volume_options options[]; static int32_t @@ -251,6 +240,164 @@ out: return; } +static inline inode_t* +__quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name) +{ + inode_t *parent = NULL; + + parent = inode_parent (inode, pargfid, name); + inode_unref (inode); + return parent; +} + +static inline inode_t* +quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name) +{ + inode_t *parent = NULL; + + parent = __quota_inode_parent (inode, pargfid, name); + if (!parent) + gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to find " + "ancestor for inode (%s)", + uuid_utoa(inode->gfid)); + + return parent; +} + +int32_t +quota_inode_depth (inode_t *inode) +{ + int depth = 0; + inode_t *cur_inode = NULL; + + cur_inode = inode_ref (inode); + while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { + depth++; + cur_inode = quota_inode_parent (cur_inode, 0 , NULL); + if (!cur_inode) + depth = -1; + } + + if (cur_inode) + inode_unref (cur_inode); + + return depth; +} + +int32_t quota_find_common_ancestor (inode_t *inode1, inode_t *inode2, + uuid_t *common_ancestor) +{ + int32_t depth1 = 0; + int32_t depth2 = 0; + int32_t ret = -1; + inode_t *cur_inode1 = NULL; + inode_t *cur_inode2 = NULL; + + depth1 = quota_inode_depth (inode1); + if (depth1 < 0) + goto out; + + depth2 = quota_inode_depth (inode2); + if (depth2 < 0) + goto out; + + cur_inode1 = inode_ref (inode1); + cur_inode2 = inode_ref (inode2); + + while (cur_inode1 && depth1 > depth2) { + cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL); + depth1--; + } + + while (cur_inode2 && depth2 > depth1) { + cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL); + depth2--; + } + + while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) { + cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL); + cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL); + depth1--; + } + + if (cur_inode1 && cur_inode2) { + uuid_copy (*common_ancestor, cur_inode1->gfid); + ret = 0; + } +out: + if (cur_inode1) + inode_unref (cur_inode1); + + if (cur_inode2) + inode_unref (cur_inode2); + + return ret; + } + +void +check_ancestory_continue (struct list_head *parents, inode_t *inode, + int32_t op_ret, int32_t op_errno, void *data) +{ + call_frame_t *frame = NULL; + quota_local_t *local = NULL; + uint32_t link_count = 0; + + frame = data; + local = frame->local; + + if (op_ret < 0 || (parents && list_empty (parents))) { + if (op_ret >= 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "Couldn't build ancestry for inode (gfid:%s). " + "Without knowing ancestors till root, quota " + "cannot be enforced. " + "Hence, failing fop with EIO", + uuid_utoa (inode->gfid)); + op_errno = EIO; + op_ret = -1; + } + } + + LOCK (&local->lock); + { + link_count = --local->link_count; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + } + UNLOCK (&local->lock); + + if (link_count == 0) + local->fop_continue_cbk (frame); +} + +void +check_ancestory (call_frame_t *frame, inode_t *inode) +{ + inode_t *cur_inode = NULL; + inode_t *parent = NULL; + + cur_inode = inode_ref (inode); + while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { + parent = inode_parent (cur_inode, 0, NULL); + if (!parent) { + quota_build_ancestry (cur_inode, + check_ancestory_continue, frame); + return; + } + inode_unref (cur_inode); + cur_inode = parent; + } + + if (cur_inode) { + inode_unref (cur_inode); + check_ancestory_continue (NULL, NULL, 0, 0, frame); + } else { + check_ancestory_continue (NULL, NULL, -1, ESTALE, frame); + } +} + static inline void quota_link_count_decrement (quota_local_t *local) { @@ -827,6 +974,14 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, } do { + /* In a rename operation, enforce should be stopped at common + ancestor */ + if (!uuid_is_null (local->common_ancestor) && + !uuid_compare (_inode->gfid, local->common_ancestor)) { + quota_link_count_decrement (local); + break; + } + if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) { wouldbe_size = ctx->size + delta; @@ -2046,63 +2201,51 @@ out: return 0; } -int32_t -quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +void +quota_rename_continue (call_frame_t *frame) { - quota_priv_t *priv = NULL; - int32_t ret = -1, op_errno = ENOMEM; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - local = quota_local_new (); - if (local == NULL) { - goto err; - } - - frame->local = local; + int32_t ret = -1; + int32_t op_errno = EIO; + quota_local_t *local = NULL; + uuid_t common_ancestor = {0}; + xlator_t *this = NULL; + quota_inode_ctx_t *ctx = NULL; - ret = loc_copy (&local->oldloc, oldloc); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); - goto err; - } + local = frame->local; + this = THIS; - ret = loc_copy (&local->newloc, newloc); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + if (local->op_ret < 0) { + op_errno = local->op_errno; goto err; } - stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc, - xdata); - if (stub == NULL) { + ret = quota_find_common_ancestor (local->oldloc.parent, + local->newloc.parent, + &common_ancestor); + if (ret < 0 || uuid_is_null(common_ancestor)) { + gf_log (this->name, GF_LOG_ERROR, "failed to get " + "common_ancestor for %s and %s", + local->oldloc.path, local->newloc.path); + op_errno = ESTALE; goto err; } LOCK (&local->lock); { local->link_count = 1; - local->stub = stub; + uuid_copy (local->common_ancestor, common_ancestor); } UNLOCK (&local->lock); - if (QUOTA_REG_OR_LNK_FILE (oldloc->inode->ia_type)) { - ret = quota_inode_ctx_get (oldloc->inode, this, &ctx, 0); + if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) { + ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0); if (ctx == NULL) { gf_log (this->name, GF_LOG_WARNING, "quota context not set in inode (gfid:%s), " "considering file size as zero while enforcing " "quota on new ancestry", - oldloc->inode ? uuid_utoa (oldloc->inode->gfid) - : "0"); + uuid_utoa (local->oldloc.inode->gfid)); local->delta = 0; - } else { /* FIXME: We need to account for the size occupied by this @@ -2112,25 +2255,99 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, * directory inode*/ /* FIXME: The following code assumes that regular files and - *linkfiles are present, in their entirety, in a single - brick. This *assumption is invalid in the case of - stripe.*/ + * linkfiles are present, in their entirety, in a single + * brick. This *assumption is invalid in the case of + * stripe.*/ local->delta = ctx->buf.ia_blocks * 512; } - } else if (IA_ISDIR (oldloc->inode->ia_type)) { - ret = quota_validate (frame, oldloc->inode, this, + } else if (IA_ISDIR (local->oldloc.inode->ia_type)) { + ret = quota_validate (frame, local->oldloc.inode, this, quota_rename_get_size_cbk); if (ret){ op_errno = -ret; goto err; } - return 0; + return; } - quota_check_limit (frame, newloc->parent, this, NULL, NULL); + quota_check_limit (frame, local->newloc.parent, this, NULL, NULL); + return; + +err: + if (local && local->stub) + call_stub_destroy (local->stub); + + QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, + NULL, NULL, NULL, NULL, NULL); + return; + +} + +int32_t +quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + quota_priv_t *priv = NULL; + int32_t ret = -1; + int32_t op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + uuid_t common_ancestor = {0}; + + priv = this->private; + + WIND_IF_QUOTAOFF (priv->is_quota_on, off); + + /* No need to check quota limit if src and dst parents are same */ + if (oldloc->parent && newloc->parent && + !uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) { + gf_log (this->name, GF_LOG_DEBUG, "rename %s -> %s are " + "in the same directory, so skip check limit", + oldloc->path, newloc->path); + goto off; + } + + local = quota_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->oldloc, oldloc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + ret = loc_copy (&local->newloc, newloc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc, + xdata); + if (stub == NULL) { + goto err; + } + + LOCK (&local->lock); + { + /* link_count here tell how many check_ancestory should be done + * before continuing the FOP + */ + local->link_count = 2; + local->stub = stub; + local->fop_continue_cbk = quota_rename_continue; + } + UNLOCK (&local->lock); + + check_ancestory (frame, newloc->parent); + check_ancestory (frame, oldloc->parent); return 0; err: diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index 5a4bcb2b1e0..3d6c65f8fb6 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -181,6 +181,9 @@ typedef void (*quota_ancestry_built_t) (struct list_head *parents, inode_t *inode, int32_t op_ret, int32_t op_errno, void *data); +typedef void +(*quota_fop_continue_t) (call_frame_t *frame); + struct quota_local { gf_lock_t lock; uint32_t validate_count; @@ -196,7 +199,9 @@ struct quota_local { gf_boolean_t skip_check; char just_validated; fop_lookup_cbk_t validate_cbk; + quota_fop_continue_t fop_continue_cbk; inode_t *inode; + uuid_t common_ancestor; /* Used by quota_rename */ call_stub_t *stub; struct iobref *iobref; quota_limit_t limit; @@ -235,4 +240,19 @@ void quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, int64_t delta); +int +quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk, + void *data); + +void +quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this); + +int32_t +quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, + char *name, uuid_t par); + +int +quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict, + loc_t *loc, struct iatt *buf, int32_t *op_errno); + #endif |