From e404e9b81f18c0a36a5c67ebf041ac0a00ca2eb1 Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Tue, 7 Aug 2012 12:55:31 +0530 Subject: cluster/dht: Optimize readdirp calls in DHT Bring in option which is supported by posix xlator to filter out directory's entries from being returned. DHT would now request non-first subvols to filter out directory entries. dht xlator-option readdir-optimize will enable this optimization Change-Id: I35224bc81c9657f54f952efac02790276c35ded5 BUG: 838199 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.com/3772 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- libglusterfs/src/gf-dirent.h | 1 + libglusterfs/src/glusterfs.h | 2 ++ xlators/cluster/dht/src/dht-common.c | 23 +++++++++++++++++++ xlators/cluster/dht/src/dht-common.h | 4 ++++ xlators/cluster/dht/src/dht.c | 8 +++++++ xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 + xlators/storage/posix/src/posix.c | 34 +++++++++++++++++++++++++++-- 7 files changed, 71 insertions(+), 2 deletions(-) diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h index 36a5a629c..26cb5a668 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/gf-dirent.h @@ -48,6 +48,7 @@ struct _gf_dirent_t { char d_name[0]; }; +#define DT_ISDIR(mode) (mode == DT_DIR) gf_dirent_t *gf_dirent_for_name (const char *name); void gf_dirent_free (gf_dirent_t *entries); diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 5c2843851..8fc49d3d2 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -82,6 +82,8 @@ #define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid" #define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id" +#define GF_READDIR_SKIP_DIRS "readdir-filter-directories" + #define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \ strlen (GF_XATTR_PATHINFO_KEY)) == 0) #define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5b57552c3..fbe18959a 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2858,6 +2858,16 @@ done: goto unwind; } + if (conf->readdir_optimize == _gf_true) { + if (next_subvol != dht_first_up_subvol (this)) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "dict set failed"); + } + } + STACK_WIND (frame, dht_readdirp_cbk, next_subvol, next_subvol->fops->readdirp, local->fd, local->size, next_offset, @@ -2983,11 +2993,14 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, xlator_t *xvol = NULL; off_t xoff = 0; int ret = 0; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + conf = this->private; + local = dht_local_init (frame, NULL, NULL, whichop); if (!local) { op_errno = ENOMEM; @@ -3015,6 +3028,16 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, gf_log (this->name, GF_LOG_WARNING, "failed to set 'glusterfs.dht.linkto'" " key"); + if (conf->readdir_optimize == _gf_true) { + if (xvol != dht_first_up_subvol (this)) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, + GF_LOG_ERROR, + "Dict set failed"); + } + } } STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index d3ccacdb8..c02c0b83d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -249,6 +249,10 @@ struct dht_conf { /* defrag related */ gf_defrag_info_t *defrag; + + /* Request to filter directory entries in readdir request */ + + gf_boolean_t readdir_optimize; }; typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 6b156c1d3..a802d895d 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -329,6 +329,8 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); + GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options, + bool, out); if (conf->defrag) { GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats, options, bool, out); @@ -435,6 +437,8 @@ init (xlator_t *this) GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down, bool, err); + GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err); + if (defrag) { GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err); } @@ -603,6 +607,10 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "off", }, + { .key = {"readdir-optimize"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 7444db0b5..1bdf5a19a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -121,6 +121,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.min-free-inodes", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.rebalance-stats", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.subvols-per-directory", "cluster/distribute", "directory-layout-spread", NULL, NO_DOC, 0 }, + {"cluster.readdir-optimize", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.read-subvolume", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index e2f1bbd5f..6d0f8caa3 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3547,7 +3547,7 @@ posix_fentrylk (call_frame_t *frame, xlator_t *this, int posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, - gf_dirent_t *entries) + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { off_t in_case = -1; size_t filled = 0; @@ -3557,6 +3557,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, int32_t this_size = -1; gf_dirent_t *this_entry = NULL; uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; + + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + } if (!off) { rewinddir (dir); @@ -3607,6 +3619,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, continue; } + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } + } + this_size = max (sizeof (gf_dirent_t), sizeof (gfs3_dirplist)) + strlen (entry->d_name) + 1; @@ -3728,6 +3751,7 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; gf_dirent_t entries; + int32_t skip_dirs = 0; VALIDATE_OR_GOTO (frame, out); @@ -3753,7 +3777,13 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, goto out; } - count = posix_fill_readdir (fd, dir, off, size, &entries); + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); /* pick ENOENT to indicate EOF */ op_errno = errno; -- cgit