diff options
| author | Jeff Darcy <jdarcy@redhat.com> | 2016-03-31 17:15:37 -0400 | 
|---|---|---|
| committer | Jeff Darcy <jdarcy@redhat.com> | 2016-04-07 08:07:55 -0700 | 
| commit | 6602376e3e9e6d9f4f695475569322b61ccc2411 (patch) | |
| tree | 58d4352c641b43615cccce363ce9f764e4cdd8eb | |
| parent | e6c7da3769105f0e6fc8b6627f3b11727a2a216d (diff) | |
dht: add "nuke" functionality for efficient server-side deletion
This turns a special xattr into an rmdir with flags set.  When that hits
the posix translator on the server side, that causes the file/directory
to be moved into the special "landfill" directory.  From there, the
posix janitor thread will take care of deleting it entirely on the
server side - traversing it recursively if necessary.  A couple of
secondary issues were fixed to make this effective.
 * FUSE now ensures that setxattr values are NUL terminated.
 * The janitor thread now gets woken up immediately when something is
   placed in 'landfill' instead of only when file descriptors need to be
   closed.
 * The default landfill-emptying interval was reduced to 10s.
To use the feature, issue a setxattr something like this:
   setfattr -n glusterfs.dht.nuke -v "" /mnt/glusterfs/vol/some_dir
The value doesn't actually matter; the mere receipt of a request with
this key is sufficient.  Some day it might be useful to allow setting a
required value as a sort of password, so that only those who know it can
access the underlying special functionality.
Change-Id: I8a343c2cdb40a76d5a06c707191fb67babb8514f
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/13878
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
| -rwxr-xr-x | tests/features/nuke.t | 41 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 45 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 9 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 21 | 
4 files changed, 104 insertions, 12 deletions
diff --git a/tests/features/nuke.t b/tests/features/nuke.t new file mode 100755 index 00000000000..ad9479ffaa5 --- /dev/null +++ b/tests/features/nuke.t @@ -0,0 +1,41 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +create_files () { +	mkdir $1 +	for i in $(seq 0 99); do +		mkdir $1/dir$i +		for j in $(seq 0 99); do +			touch $1/dir$i/file$j +		done +	done +} + +count_files () { +	ls $1 | wc -l +} + +LANDFILL=$B0/${V0}1/.glusterfs/landfill + +TEST glusterd + +TEST $CLI volume create $V0 $H0:$B0/${V0}1 +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +TEST create_files $M0/foo +TEST [ $(count_files $LANDFILL) = "0" ] + +# This should immediately send the whole directory to the landfill. +TEST setfattr -n glusterfs.dht.nuke -v trinity $M0/foo + +# Make sure the directory's not visible on the mountpoint, and is visible in +# the brick's landfill. +TEST ! ls $M0/foo +TEST [ $(count_files $LANDFILL) = "1" ] + +# Make sure the janitor thread cleans it up in a timely fashion. +EXPECT_WITHIN 20 "0" count_files $LANDFILL + +cleanup diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 4c93084ec82..b14f20bcc65 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3731,6 +3731,42 @@ err:          return 0;  } +int +dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp) +{ +        if (!IA_ISDIR(loc->inode->ia_type)) { +                DHT_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL); +                return 0; +        } + +        /* Setxattr didn't need the parent, but rmdir does. */ +        loc->parent = inode_parent (loc->inode, NULL, NULL); +        if (!loc->parent) { +                DHT_STACK_UNWIND (setxattr, frame, -1, ENOENT, NULL); +                return 0; +        } +        gf_uuid_copy (loc->pargfid, loc->parent->gfid); + +        if (!loc->name && loc->path) { +                loc->name = strrchr (loc->path, '/'); +                if (loc->name) { +                        ++(loc->name); +                } +        } + +        /* +         * We do this instead of calling dht_rmdir_do directly for two reasons. +         * The first is that we want to reuse all of the initialization that +         * dht_rmdir does, so if it ever changes we'll just follow along.  The +         * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't +         * obscure the fact that we came in via this path instead of a genuine +         * rmdir.  That makes debugging just a tiny bit easier. +         */ +        STACK_WIND (frame, default_rmdir_cbk, this, this->fops->rmdir, +                    loc, 1, NULL); + +        return 0; +}  int  dht_setxattr (call_frame_t *frame, xlator_t *this, @@ -3955,6 +3991,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,                  goto err;          } +        tmp = dict_get (xattr, "glusterfs.dht.nuke"); +        if (tmp) { +                return dht_nuke_dir (frame, this, loc, tmp); +        } +          if (IA_ISDIR (loc->inode->ia_type)) {                  for (i = 0; i < call_cnt; i++) { @@ -7645,6 +7686,10 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,                  goto err;          } +        if (flags) { +                return dht_rmdir_do (frame, this); +        } +          for (i = 0; i < conf->subvolume_cnt; i++) {                  STACK_WIND (frame, dht_rmdir_opendir_cbk,                              conf->subvolumes[i], diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index c6c38f3145b..aca1b3d14fd 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3292,7 +3292,14 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)          }          if (fsi->size > 0) { -                dict_value = memdup (value, fsi->size); +                /* +                 * Many translators expect setxattr values to be strings, but +                 * neither dict_get_str nor data_to_str do any checking or +                 * fixups to make sure that's the case.  To avoid nasty +                 * surprises, allocate an extra byte and add a NUL here. +                 */ +                dict_value = memdup (value, fsi->size+1); +                dict_value[fsi->size] = '\0';          }          dict_set (state->xattr, newkey,                    data_from_dynptr ((void *)dict_value, fsi->size)); diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index b5df1d082ee..f72c13d9066 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1994,6 +1994,7 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,                  } else {                          sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str);                          op_ret = sys_rename (real_path, tmp_path); +                        pthread_cond_signal (&priv->janitor_cond);                  }          } else {                  op_ret = sys_rmdir (real_path); @@ -6528,7 +6529,6 @@ init (xlator_t *this)          int                   ret           = 0;          int                   op_ret        = -1;          ssize_t               size          = -1; -        int32_t               janitor_sleep = 0;          uuid_t                old_uuid      = {0,};          uuid_t                dict_uuid     = {0,};          uuid_t                gfid          = {0,}; @@ -6857,16 +6857,9 @@ init (xlator_t *this)          }          ret = 0; -        _private->janitor_sleep_duration = 600; +        GF_OPTION_INIT ("janitor-sleep-duration", +                        _private->janitor_sleep_duration, int32, out); -        dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", -                                   &janitor_sleep); -        if (dict_ret == 0) { -                gf_msg_debug (this->name, 0, "Setting janitor sleep duration " -                              "to %d.", janitor_sleep); - -                _private->janitor_sleep_duration = janitor_sleep; -        }          /* performing open dir on brick dir locks the brick dir           * and prevents it from being unmounted           */ @@ -7105,7 +7098,13 @@ struct volume_options options[] = {          { .key  = {"background-unlink"},            .type = GF_OPTION_TYPE_BOOL },          { .key  = {"janitor-sleep-duration"}, -          .type = GF_OPTION_TYPE_INT }, +          .type = GF_OPTION_TYPE_INT, +          .min = 1, +          .validate = GF_OPT_VALIDATE_MIN, +          .default_value = "10", +          .description = "Interval (in seconds) between times the internal " +                         "'landfill' directory is emptied." +        },          { .key  = {"volume-id"},            .type = GF_OPTION_TYPE_ANY },          { .key  = {"glusterd-uuid"},  | 
