diff options
-rwxr-xr-x | tests/basic/quota-anon-fd-nfs.t | 84 | ||||
-rw-r--r-- | tests/basic/quota-nfs.c (renamed from tests/basic/quota-anon-fd-nfs.c) | 0 | ||||
-rwxr-xr-x | tests/basic/quota-nfs.t | 52 | ||||
-rwxr-xr-x | tests/bugs/bug-1161156.t | 10 | ||||
-rw-r--r-- | xlators/features/quota/src/quota.c | 325 | ||||
-rw-r--r-- | xlators/features/quota/src/quota.h | 20 |
6 files changed, 410 insertions, 81 deletions
diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t index 0f7a9aac52e..5841580f9a8 100755 --- a/tests/basic/quota-anon-fd-nfs.t +++ b/tests/basic/quota-anon-fd-nfs.t @@ -1,17 +1,11 @@ #!/bin/bash . $(dirname $0)/../include.rc -. $(dirname $0)/../nfs.rc - -function usage() -{ - local QUOTA_PATH=$1; - $CLI volume quota $V0 list $QUOTA_PATH | \ - grep "$QUOTA_PATH" | awk '{print $4}' -} +. $(dirname $0)/../fileio.rc cleanup; +TESTS_EXPECTED_IN_LOOP=16 TEST glusterd TEST pidof glusterd TEST $CLI volume info; @@ -19,34 +13,74 @@ TEST $CLI volume info; TEST $CLI volume create $V0 $H0:$B0/brick1; EXPECT 'Created' volinfo_field $V0 'Status'; + +# The test makes use of inode-lru-limit to hit a scenario, where we +# find an inode whose ancestry is not there. Following is the +# hypothesis (which is confirmed by seeing logs indicating that +# codepath has been executed, but not through a good understanding of +# NFS internals). + +# At the end of an fop, the reference count of an inode would be +# zero. The inode (and its ancestry) persists in memory only +# because of non-zero lookup count. These looked up inodes are put +# in an lru queue of size 1 (here). So, there can be at most one +# such inode in memory. + +# NFS Server makes use of anonymous fds. So, if it cannot find +# valid fd, it does a nameless lookup. This gives us an inode +# whose ancestry is NULL. When a write happens on this inode, +# quota-enforcer/marker finds a NULL ancestry and asks +# storage/posix to build it. + TEST $CLI volume set $V0 network.inode-lru-limit 1 +TEST $CLI volume set $V0 performance.nfs.write-behind off TEST $CLI volume start $V0; EXPECT 'Started' volinfo_field $V0 'Status'; -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available -TEST mount_nfs $H0:/$V0 $N0 +TEST $CLI volume quota $V0 enable +TEST $CLI volume quota $V0 limit-usage / 1 +TEST $CLI volume quota $V0 soft-timeout 0 +TEST $CLI volume quota $V0 hard-timeout 0 + +TEST mount -t nfs -o noac,soft,nolock,vers=3 $H0:/$V0 $N0 deep=/0/1/2/3/4/5/6/7/8/9 TEST mkdir -p $N0/$deep -TEST dd if=/dev/zero of=$N0/$deep/file bs=1k count=10240 +TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4 -TEST $CLI volume quota $V0 enable -TEST $CLI volume quota $V0 limit-usage / 20MB -TEST $CLI volume quota $V0 soft-timeout 0 -TEST $CLI volume quota $V0 hard-timeout 0 +TEST fd_open 3 'w' "$N0/$deep/file1" +TEST fd_open 4 'w' "$N0/$deep/file2" +TEST fd_open 5 'w' "$N0/$deep/file3" +TEST fd_open 6 'w' "$N0/$deep/file4" + +# consume all quota +TEST ! dd if=/dev/zero of="$N0/$deep/file" bs=1MB count=1 + +# At the end of each fop in server, reference count of the +# inode associated with each of the file above drops to zero and hence +# put into lru queue. Since lru-limit is set to 1, an fop next file +# will displace the current inode from itable. This will ensure that +# when writes happens on same fd, fd resolution results in +# nameless lookup from server and quota_writev encounters an fd +# associated with an inode whose parent is not present in itable. + +for j in $(seq 1 2); do + for i in $(seq 3 6); do + # failing writes indicate that we are enforcing quota set on / + # even with anonymous fds. + TEST_IN_LOOP ! fd_write $i "content" + TEST_IN_LOOP sync + done +done -TEST dd if=/dev/zero of=$N0/$deep/newfile_1 bs=512 count=10240 -# wait for write behind to complete. -EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/" +exec 3>&- +exec 4>&- +exec 5>&- +exec 6>&- -# compile the test write program and run it -TEST $CC $(dirname $0)/quota-anon-fd-nfs.c -o $(dirname $0)/quota-anon-fd-nfs; -# Try to create a 100Mb file which should fail -TEST ! $(dirname $0)/quota-anon-fd-nfs $N0/$deep/newfile_2 "104857600" -TEST rm -f $N0/$deep/newfile_2 +$CLI volume statedump $V0 all -## Before killing daemon to avoid deadlocks -umount_nfs $N0 +TEST umount -l $N0 cleanup; diff --git a/tests/basic/quota-anon-fd-nfs.c b/tests/basic/quota-nfs.c index 4cc0322e132..4cc0322e132 100644 --- a/tests/basic/quota-anon-fd-nfs.c +++ b/tests/basic/quota-nfs.c diff --git a/tests/basic/quota-nfs.t b/tests/basic/quota-nfs.t new file mode 100755 index 00000000000..501d8ab6381 --- /dev/null +++ b/tests/basic/quota-nfs.t @@ -0,0 +1,52 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../nfs.rc + +function usage() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | \ + grep "$QUOTA_PATH" | awk '{print $4}' +} + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume set $V0 network.inode-lru-limit 1 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +TEST mount_nfs $H0:/$V0 $N0 +deep=/0/1/2/3/4/5/6/7/8/9 +TEST mkdir -p $N0/$deep + +TEST dd if=/dev/zero of=$N0/$deep/file bs=1k count=10240 + +TEST $CLI volume quota $V0 enable +TEST $CLI volume quota $V0 limit-usage / 20MB +TEST $CLI volume quota $V0 soft-timeout 0 +TEST $CLI volume quota $V0 hard-timeout 0 + +TEST dd if=/dev/zero of=$N0/$deep/newfile_1 bs=512 count=10240 +# wait for write behind to complete. +EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/" + +# compile the test write program and run it +TEST $CC $(dirname $0)/quota-nfs.c -o $(dirname $0)/quota-nfs; +# Try to create a 100Mb file which should fail +TEST ! $(dirname $0)/quota-nfs $N0/$deep/newfile_2 "104857600" +TEST rm -f $N0/$deep/newfile_2 + +## Before killing daemon to avoid deadlocks +umount_nfs $N0 + +cleanup; diff --git a/tests/bugs/bug-1161156.t b/tests/bugs/bug-1161156.t index 12ebc45bdd3..9f33391d744 100755 --- a/tests/bugs/bug-1161156.t +++ b/tests/bugs/bug-1161156.t @@ -27,11 +27,13 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available TEST mount_nfs $H0:/$V0 $N0 mydir="dir" TEST mkdir -p $N0/$mydir +TEST mkdir -p $N0/newdir TEST dd if=/dev/zero of=$N0/$mydir/file bs=1k count=10240 TEST $CLI volume quota $V0 enable TEST $CLI volume quota $V0 limit-usage / 20MB +TEST $CLI volume quota $V0 limit-usage /newdir 5MB TEST $CLI volume quota $V0 soft-timeout 0 TEST $CLI volume quota $V0 hard-timeout 0 @@ -40,8 +42,12 @@ TEST dd if=/dev/zero of=$N0/$mydir/newfile_1 bs=512 count=10240 EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/" TEST ! dd if=/dev/zero of=$N0/$mydir/newfile_2 bs=1k count=10240 -# Test the rename, it should fail due to space restrictions -TEST ! mv $N0/dir/file $N0/dir/newfile_3 +# Test rename within a directory. It should pass even when the +# corresponding directory quota is filled. +TEST mv $N0/dir/file $N0/dir/newfile_3 + +# rename should fail here with disk quota exceeded +TEST ! mv $N0/dir/newfile_3 $N0/newdir/ # cleanup umount_nfs $N0 diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 3c1b8e09c5c..f903b4e57b7 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -14,17 +14,6 @@ #include "defaults.h" #include "statedump.h" -void -quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this); - -int32_t -quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, - char *name, uuid_t par); - -int -quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict, - loc_t *loc, struct iatt *buf, int32_t *op_errno); - struct volume_options options[]; static int32_t @@ -251,6 +240,164 @@ out: return; } +static inline inode_t* +__quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name) +{ + inode_t *parent = NULL; + + parent = inode_parent (inode, pargfid, name); + inode_unref (inode); + return parent; +} + +static inline inode_t* +quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name) +{ + inode_t *parent = NULL; + + parent = __quota_inode_parent (inode, pargfid, name); + if (!parent) + gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to find " + "ancestor for inode (%s)", + uuid_utoa(inode->gfid)); + + return parent; +} + +int32_t +quota_inode_depth (inode_t *inode) +{ + int depth = 0; + inode_t *cur_inode = NULL; + + cur_inode = inode_ref (inode); + while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { + depth++; + cur_inode = quota_inode_parent (cur_inode, 0 , NULL); + if (!cur_inode) + depth = -1; + } + + if (cur_inode) + inode_unref (cur_inode); + + return depth; +} + +int32_t quota_find_common_ancestor (inode_t *inode1, inode_t *inode2, + uuid_t *common_ancestor) +{ + int32_t depth1 = 0; + int32_t depth2 = 0; + int32_t ret = -1; + inode_t *cur_inode1 = NULL; + inode_t *cur_inode2 = NULL; + + depth1 = quota_inode_depth (inode1); + if (depth1 < 0) + goto out; + + depth2 = quota_inode_depth (inode2); + if (depth2 < 0) + goto out; + + cur_inode1 = inode_ref (inode1); + cur_inode2 = inode_ref (inode2); + + while (cur_inode1 && depth1 > depth2) { + cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL); + depth1--; + } + + while (cur_inode2 && depth2 > depth1) { + cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL); + depth2--; + } + + while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) { + cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL); + cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL); + depth1--; + } + + if (cur_inode1 && cur_inode2) { + uuid_copy (*common_ancestor, cur_inode1->gfid); + ret = 0; + } +out: + if (cur_inode1) + inode_unref (cur_inode1); + + if (cur_inode2) + inode_unref (cur_inode2); + + return ret; + } + +void +check_ancestory_continue (struct list_head *parents, inode_t *inode, + int32_t op_ret, int32_t op_errno, void *data) +{ + call_frame_t *frame = NULL; + quota_local_t *local = NULL; + uint32_t link_count = 0; + + frame = data; + local = frame->local; + + if (op_ret < 0 || (parents && list_empty (parents))) { + if (op_ret >= 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "Couldn't build ancestry for inode (gfid:%s). " + "Without knowing ancestors till root, quota " + "cannot be enforced. " + "Hence, failing fop with EIO", + uuid_utoa (inode->gfid)); + op_errno = EIO; + op_ret = -1; + } + } + + LOCK (&local->lock); + { + link_count = --local->link_count; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + } + UNLOCK (&local->lock); + + if (link_count == 0) + local->fop_continue_cbk (frame); +} + +void +check_ancestory (call_frame_t *frame, inode_t *inode) +{ + inode_t *cur_inode = NULL; + inode_t *parent = NULL; + + cur_inode = inode_ref (inode); + while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { + parent = inode_parent (cur_inode, 0, NULL); + if (!parent) { + quota_build_ancestry (cur_inode, + check_ancestory_continue, frame); + return; + } + inode_unref (cur_inode); + cur_inode = parent; + } + + if (cur_inode) { + inode_unref (cur_inode); + check_ancestory_continue (NULL, NULL, 0, 0, frame); + } else { + check_ancestory_continue (NULL, NULL, -1, ESTALE, frame); + } +} + static inline void quota_link_count_decrement (quota_local_t *local) { @@ -827,6 +974,14 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, } do { + /* In a rename operation, enforce should be stopped at common + ancestor */ + if (!uuid_is_null (local->common_ancestor) && + !uuid_compare (_inode->gfid, local->common_ancestor)) { + quota_link_count_decrement (local); + break; + } + if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) { wouldbe_size = ctx->size + delta; @@ -2046,63 +2201,51 @@ out: return 0; } -int32_t -quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +void +quota_rename_continue (call_frame_t *frame) { - quota_priv_t *priv = NULL; - int32_t ret = -1, op_errno = ENOMEM; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - local = quota_local_new (); - if (local == NULL) { - goto err; - } - - frame->local = local; + int32_t ret = -1; + int32_t op_errno = EIO; + quota_local_t *local = NULL; + uuid_t common_ancestor = {0}; + xlator_t *this = NULL; + quota_inode_ctx_t *ctx = NULL; - ret = loc_copy (&local->oldloc, oldloc); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); - goto err; - } + local = frame->local; + this = THIS; - ret = loc_copy (&local->newloc, newloc); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + if (local->op_ret < 0) { + op_errno = local->op_errno; goto err; } - stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc, - xdata); - if (stub == NULL) { + ret = quota_find_common_ancestor (local->oldloc.parent, + local->newloc.parent, + &common_ancestor); + if (ret < 0 || uuid_is_null(common_ancestor)) { + gf_log (this->name, GF_LOG_ERROR, "failed to get " + "common_ancestor for %s and %s", + local->oldloc.path, local->newloc.path); + op_errno = ESTALE; goto err; } LOCK (&local->lock); { local->link_count = 1; - local->stub = stub; + uuid_copy (local->common_ancestor, common_ancestor); } UNLOCK (&local->lock); - if (QUOTA_REG_OR_LNK_FILE (oldloc->inode->ia_type)) { - ret = quota_inode_ctx_get (oldloc->inode, this, &ctx, 0); + if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) { + ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0); if (ctx == NULL) { gf_log (this->name, GF_LOG_WARNING, "quota context not set in inode (gfid:%s), " "considering file size as zero while enforcing " "quota on new ancestry", - oldloc->inode ? uuid_utoa (oldloc->inode->gfid) - : "0"); + uuid_utoa (local->oldloc.inode->gfid)); local->delta = 0; - } else { /* FIXME: We need to account for the size occupied by this @@ -2112,25 +2255,99 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, * directory inode*/ /* FIXME: The following code assumes that regular files and - *linkfiles are present, in their entirety, in a single - brick. This *assumption is invalid in the case of - stripe.*/ + * linkfiles are present, in their entirety, in a single + * brick. This *assumption is invalid in the case of + * stripe.*/ local->delta = ctx->buf.ia_blocks * 512; } - } else if (IA_ISDIR (oldloc->inode->ia_type)) { - ret = quota_validate (frame, oldloc->inode, this, + } else if (IA_ISDIR (local->oldloc.inode->ia_type)) { + ret = quota_validate (frame, local->oldloc.inode, this, quota_rename_get_size_cbk); if (ret){ op_errno = -ret; goto err; } - return 0; + return; } - quota_check_limit (frame, newloc->parent, this, NULL, NULL); + quota_check_limit (frame, local->newloc.parent, this, NULL, NULL); + return; + +err: + if (local && local->stub) + call_stub_destroy (local->stub); + + QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, + NULL, NULL, NULL, NULL, NULL); + return; + +} + +int32_t +quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + quota_priv_t *priv = NULL; + int32_t ret = -1; + int32_t op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + uuid_t common_ancestor = {0}; + + priv = this->private; + + WIND_IF_QUOTAOFF (priv->is_quota_on, off); + + /* No need to check quota limit if src and dst parents are same */ + if (oldloc->parent && newloc->parent && + !uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) { + gf_log (this->name, GF_LOG_DEBUG, "rename %s -> %s are " + "in the same directory, so skip check limit", + oldloc->path, newloc->path); + goto off; + } + + local = quota_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->oldloc, oldloc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + ret = loc_copy (&local->newloc, newloc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc, + xdata); + if (stub == NULL) { + goto err; + } + + LOCK (&local->lock); + { + /* link_count here tell how many check_ancestory should be done + * before continuing the FOP + */ + local->link_count = 2; + local->stub = stub; + local->fop_continue_cbk = quota_rename_continue; + } + UNLOCK (&local->lock); + + check_ancestory (frame, newloc->parent); + check_ancestory (frame, oldloc->parent); return 0; err: diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index 5a4bcb2b1e0..3d6c65f8fb6 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -181,6 +181,9 @@ typedef void (*quota_ancestry_built_t) (struct list_head *parents, inode_t *inode, int32_t op_ret, int32_t op_errno, void *data); +typedef void +(*quota_fop_continue_t) (call_frame_t *frame); + struct quota_local { gf_lock_t lock; uint32_t validate_count; @@ -196,7 +199,9 @@ struct quota_local { gf_boolean_t skip_check; char just_validated; fop_lookup_cbk_t validate_cbk; + quota_fop_continue_t fop_continue_cbk; inode_t *inode; + uuid_t common_ancestor; /* Used by quota_rename */ call_stub_t *stub; struct iobref *iobref; quota_limit_t limit; @@ -235,4 +240,19 @@ void quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, int64_t delta); +int +quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk, + void *data); + +void +quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this); + +int32_t +quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, + char *name, uuid_t par); + +int +quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict, + loc_t *loc, struct iatt *buf, int32_t *op_errno); + #endif |