diff options
author | Jeff Darcy <jdarcy@redhat.com> | 2016-04-25 12:17:12 -0400 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2016-04-26 01:21:47 -0700 |
commit | 7241756f94c975c89f6dc1023d2403f531e6f5ac (patch) | |
tree | 4485385624c7b851d5c6ae6dc07a2dbe03987ad4 | |
parent | 256959e9e4e98a8da041778a9607ee279c46d2e5 (diff) |
dht: add "nuke" functionality for efficient server-side deletion
This is a backport of the following two patches (of which the second is a
trivial adjustment to a timeout for a test added by the first).
http://review.gluster.org/13878
http://review.gluster.org/13935
This turns a special xattr into an rmdir with flags set. When that hits
the posix translator on the server side, that causes the file/directory
to be moved into the special "landfill" directory. From there, the
posix janitor thread will take care of deleting it entirely on the
server side - traversing it recursively if necessary. A couple of
secondary issues were fixed to make this effective.
* FUSE now ensures that setxattr values are NUL terminated.
* The janitor thread now gets woken up immediately when something is
placed in 'landfill' instead of only when file descriptors need to be
closed.
* The default landfill-emptying interval was reduced to 10s.
To use the feature, issue a setxattr something like this:
setfattr -n glusterfs.dht.nuke -v "" /mnt/glusterfs/vol/some_dir
The value doesn't actually matter; the mere receipt of a request with
this key is sufficient. Some day it might be useful to allow setting a
required value as a sort of password, so that only those who know it can
access the underlying special functionality.
Change-Id: I4132a30d1faa53a6682399ad1d9041e2c4519951
BUG: 1330241
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/14065
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: N Balachandran <nbalacha@redhat.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r-- | tests/features/nuke.t | 41 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 45 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 9 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 25 |
4 files changed, 106 insertions, 14 deletions
diff --git a/tests/features/nuke.t b/tests/features/nuke.t new file mode 100644 index 00000000000..ace847bc99b --- /dev/null +++ b/tests/features/nuke.t @@ -0,0 +1,41 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +create_files () { + mkdir $1 + for i in $(seq 0 99); do + mkdir $1/dir$i + for j in $(seq 0 99); do + touch $1/dir$i/file$j + done + done +} + +count_files () { + ls $1 | wc -l +} + +LANDFILL=$B0/${V0}1/.glusterfs/landfill + +TEST glusterd + +TEST $CLI volume create $V0 $H0:$B0/${V0}1 +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +TEST create_files $M0/foo +TEST [ $(count_files $LANDFILL) = "0" ] + +# This should immediately send the whole directory to the landfill. +TEST setfattr -n glusterfs.dht.nuke -v trinity $M0/foo + +# Make sure the directory's not visible on the mountpoint, and is visible in +# the brick's landfill. +TEST ! ls $M0/foo +TEST [ $(count_files $LANDFILL) = "1" ] + +# Make sure the janitor thread cleans it up in a timely fashion. +EXPECT_WITHIN 60 "0" count_files $LANDFILL + +cleanup diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 96f540d721c..3421c1de657 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3736,6 +3736,42 @@ err: return 0; } +int +dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp) +{ + if (!IA_ISDIR(loc->inode->ia_type)) { + DHT_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL); + return 0; + } + + /* Setxattr didn't need the parent, but rmdir does. */ + loc->parent = inode_parent (loc->inode, NULL, NULL); + if (!loc->parent) { + DHT_STACK_UNWIND (setxattr, frame, -1, ENOENT, NULL); + return 0; + } + gf_uuid_copy (loc->pargfid, loc->parent->gfid); + + if (!loc->name && loc->path) { + loc->name = strrchr (loc->path, '/'); + if (loc->name) { + ++(loc->name); + } + } + + /* + * We do this instead of calling dht_rmdir_do directly for two reasons. + * The first is that we want to reuse all of the initialization that + * dht_rmdir does, so if it ever changes we'll just follow along. The + * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't + * obscure the fact that we came in via this path instead of a genuine + * rmdir. That makes debugging just a tiny bit easier. + */ + STACK_WIND (frame, default_rmdir_cbk, this, this->fops->rmdir, + loc, 1, NULL); + + return 0; +} int dht_setxattr (call_frame_t *frame, xlator_t *this, @@ -3960,6 +3996,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, goto err; } + tmp = dict_get (xattr, "glusterfs.dht.nuke"); + if (tmp) { + return dht_nuke_dir (frame, this, loc, tmp); + } + if (IA_ISDIR (loc->inode->ia_type)) { for (i = 0; i < call_cnt; i++) { @@ -8231,6 +8272,10 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, goto err; } + if (flags) { + return dht_rmdir_do (frame, this); + } + for (i = 0; i < conf->subvolume_cnt; i++) { STACK_WIND (frame, dht_rmdir_opendir_cbk, conf->subvolumes[i], diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 2fbea1383d8..3dac22a6976 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3234,7 +3234,14 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) } if (fsi->size > 0) { - dict_value = memdup (value, fsi->size); + /* + * Many translators expect setxattr values to be strings, but + * neither dict_get_str nor data_to_str do any checking or + * fixups to make sure that's the case. To avoid nasty + * surprises, allocate an extra byte and add a NUL here. + */ + dict_value = memdup (value, fsi->size+1); + dict_value[fsi->size] = '\0'; } dict_set (state->xattr, newkey, data_from_dynptr ((void *)dict_value, fsi->size)); diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 3ffeb5b30ba..474c47c911f 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,11 +1,11 @@ /* Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. + This file is part of GlusterFS This file is licensed to you under your choice of the GNU Lesser General Public License, version 3 or any later version (LGPLv3 or later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. + cases as published by the Free Software Foundation */ #ifndef _CONFIG_H #define _CONFIG_H @@ -2027,6 +2027,7 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, } else { sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str); op_ret = rename (real_path, tmp_path); + pthread_cond_signal (&priv->janitor_cond); } } else { op_ret = rmdir (real_path); @@ -6554,7 +6555,6 @@ init (xlator_t *this) int ret = 0; int op_ret = -1; ssize_t size = -1; - int32_t janitor_sleep = 0; uuid_t old_uuid = {0,}; uuid_t dict_uuid = {0,}; uuid_t gfid = {0,}; @@ -6883,16 +6883,9 @@ init (xlator_t *this) } ret = 0; - _private->janitor_sleep_duration = 600; + GF_OPTION_INIT ("janitor-sleep-duration", + _private->janitor_sleep_duration, int32, out); - dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", - &janitor_sleep); - if (dict_ret == 0) { - gf_msg_debug (this->name, 0, "Setting janitor sleep duration " - "to %d.", janitor_sleep); - - _private->janitor_sleep_duration = janitor_sleep; - } /* performing open dir on brick dir locks the brick dir * and prevents it from being unmounted */ @@ -7128,7 +7121,13 @@ struct volume_options options[] = { { .key = {"background-unlink"}, .type = GF_OPTION_TYPE_BOOL }, { .key = {"janitor-sleep-duration"}, - .type = GF_OPTION_TYPE_INT }, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "10", + .description = "Interval (in seconds) between times the internal " + "'landfill' directory is emptied." + }, { .key = {"volume-id"}, .type = GF_OPTION_TYPE_ANY }, { .key = {"glusterd-uuid"}, |