diff options
author | Sakshi <sabansal@redhat.com> | 2017-01-23 12:11:49 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2017-05-31 14:13:01 +0000 |
commit | b9406e210717621bc672a63c1cbd1b0183834056 (patch) | |
tree | 8bf3d1787d928c121e52f4b9820a27a47957361c /xlators/cluster | |
parent | 57b0a4a374a604079f37a9d9d9fffb09a718c010 (diff) |
cluster/dht: Make optimal usage of buffer provided with readdir(p)
dht_readdirp must unwind with list of entries only after
the entire buffer requested by kernel is filled to avoid
extra syscalls occuring when returning partially filled
buffer. Also wind readdir call to next subvol on reaching
EOD for directory on that subvol to avoid extra network call.
Change-Id: If2e1a2722f813d95457c7542bff25fef56c7a041
BUG: 1356453
Signed-off-by: Sakshi <sabansal@redhat.com>
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: https://review.gluster.org/12271
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Amar Tumballi <amarts@redhat.com>
Reviewed-by: Susant Palai <spalai@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 105 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 4 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-lock.c | 4 |
4 files changed, 62 insertions, 54 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index c4718997681..109106082e6 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -5002,7 +5002,6 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { dht_local_t *local = NULL; - gf_dirent_t entries; gf_dirent_t *orig_entry = NULL; gf_dirent_t *entry = NULL; xlator_t *prev = NULL; @@ -5019,7 +5018,6 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, inode_table_t *itable = NULL; inode_t *inode = NULL; - INIT_LIST_HEAD (&entries.list); prev = cookie; local = frame->local; itable = local->fd ? local->fd->inode->table : NULL; @@ -5029,9 +5027,14 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, methods = &(conf->methods); + local->op_errno = op_errno; + if (op_ret < 0) goto done; + if (local->op_ret < 0) + local->op_ret = 0; + if (!local->layout) local->layout = dht_layout_get (this, local->fd->inode); @@ -5046,11 +5049,10 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto done; if (conf->readdir_optimize == _gf_true) - readdir_optimize = 1; + readdir_optimize = 1; list_for_each_entry (orig_entry, (&orig_entries->list), list) { next_offset = orig_entry->d_off; - if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { /*stat failed somewhere- ignore this entry*/ gf_msg_debug (this->name, EINVAL, @@ -5083,8 +5085,8 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, if (prev == hashed_subvol) goto list; if ((hashed_subvol - && dht_subvol_status (conf, hashed_subvol)) - || (prev != local->first_up_subvol)) + && dht_subvol_status (conf, hashed_subvol)) + || (prev != local->first_up_subvol)) continue; goto list; @@ -5095,10 +5097,10 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, conf->link_xattr_name)) { continue; } + list: entry = gf_dirent_for_name (orig_entry->d_name); if (!entry) { - goto unwind; } @@ -5170,26 +5172,17 @@ list: } } } - list_add_tail (&entry->list, &entries.list); + + list_add_tail (&entry->list, &local->entries.list); + local->filled += gf_dirent_size (entry->d_name); count++; + local->op_ret++; } - op_ret = count; - /* We need to ensure that only the last subvolume's end-of-directory - * notification is respected so that directory reading does not stop - * before all subvolumes have been read. That could happen because the - * posix for each subvolume sends a ENOENT on end-of-directory but in - * distribute we're not concerned only with a posix's view of the - * directory but the aggregated namespace' view of the directory. - */ - if (prev != dht_last_up_subvol (this)) - op_errno = 0; done: - if (count == 0) { - /* non-zero next_offset means that - EOF is not yet hit on the current subvol - */ - if (next_offset == 0) { + if ((count == 0) || (local && (local->filled < local->size))) { + if ((next_offset == 0) || (op_errno == ENOENT)) { + next_offset = 0; next_subvol = dht_subvol_next (this, prev); } else { next_subvol = prev; @@ -5223,25 +5216,30 @@ done: } unwind: - if (op_ret < 0) - op_ret = 0; + /* We need to ensure that only the last subvolume's end-of-directory + * notification is respected so that directory reading does not stop + * before all subvolumes have been read. That could happen because the + * posix for each subvolume sends a ENOENT on end-of-directory but in + * distribute we're not concerned only with a posix's view of the + * directory but the aggregated namespace' view of the directory. + */ + if ((local->op_ret >= 0) && (prev != dht_last_up_subvol (this))) + local->op_errno = 0; - DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + DHT_STACK_UNWIND (readdirp, frame, local->op_ret, local->op_errno, + &local->entries, NULL); return 0; } - int dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { dht_local_t *local = NULL; - gf_dirent_t entries; gf_dirent_t *orig_entry = NULL; gf_dirent_t *entry = NULL; xlator_t *prev = NULL; @@ -5253,7 +5251,6 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_conf_t *conf = NULL; dht_methods_t *methods = NULL; - INIT_LIST_HEAD (&entries.list); prev = cookie; local = frame->local; @@ -5262,8 +5259,14 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, methods = &(conf->methods); - if (op_ret < 0) + local->op_errno = op_errno; + + if (op_ret < 0) { goto done; + } + + if (local->op_ret < 0) + local->op_ret = 0; if (!local->layout) local->layout = dht_layout_get (this, local->fd->inode); @@ -5290,27 +5293,16 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, entry->d_type = orig_entry->d_type; entry->d_len = orig_entry->d_len; - list_add_tail (&entry->list, &entries.list); + list_add_tail (&entry->list, &local->entries.list); count++; + local->filled += gf_dirent_size (entry->d_name); + local->op_ret++; } } - op_ret = count; - /* We need to ensure that only the last subvolume's end-of-directory - * notification is respected so that directory reading does not stop - * before all subvolumes have been read. That could happen because the - * posix for each subvolume sends a ENOENT on end-of-directory but in - * distribute we're not concerned only with a posix's view of the - * directory but the aggregated namespace' view of the directory. - */ - if (prev != dht_last_up_subvol (this)) - op_errno = 0; done: - if (count == 0) { - /* non-zero next_offset means that - EOF is not yet hit on the current subvol - */ - if (next_offset == 0) { + if ((count == 0) || (local && (local->filled < local->size))) { + if ((op_ret <= 0) || (op_errno == ENOENT)) { next_subvol = dht_subvol_next (this, prev); } else { next_subvol = prev; @@ -5327,12 +5319,19 @@ done: } unwind: - if (op_ret < 0) - op_ret = 0; + /* We need to ensure that only the last subvolume's end-of-directory + * notification is respected so that directory reading does not stop + * before all subvolumes have been read. That could happen because the + * posix for each subvolume sends a ENOENT on end-of-directory but in + * distribute we're not concerned only with a posix's view of the + * directory but the aggregated namespace' view of the directory. + */ + if ((local->op_ret >= 0) && (prev != dht_last_up_subvol (this))) + local->op_errno = 0; - DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + DHT_STACK_UNWIND (readdir, frame, local->op_ret, local->op_errno, + &local->entries, NULL); return 0; } @@ -5365,6 +5364,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->size = size; local->xattr_req = (dict)? dict_ref (dict) : NULL; local->first_up_subvol = dht_first_up_subvol (this); + local->op_ret = -1; dht_deitransform (this, yoff, &xvol); @@ -8372,7 +8372,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, goto err; } - lookup_local = mem_get0 (this->local_pool); + lookup_local = dht_local_init (lookup_frame, NULL, NULL, + GF_FOP_LOOKUP); if (!lookup_local) { goto err; } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 786db020427..333fae8c894 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -346,6 +346,10 @@ struct dht_local { call_stub_t *stub; int32_t parent_disk_layout[4]; + /* To hold dentries of readdir spawning across subvols */ + gf_dirent_t entries; + size_t filled; + /* rename rollback */ int *ret_cache ; }; diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 38965298325..08825cf0b93 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -484,6 +484,8 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) if (local->ret_cache) GF_FREE (local->ret_cache); + gf_dirent_free (&local->entries); + mem_put (local); } @@ -523,6 +525,7 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop) inode); } + INIT_LIST_HEAD (&local->entries.list); frame->local = local; out: diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c index 0a198a17db4..45ebeec99e8 100644 --- a/xlators/cluster/dht/src/dht-lock.c +++ b/xlators/cluster/dht/src/dht-lock.c @@ -455,7 +455,7 @@ dht_unlock_entrylk_wrapper (call_frame_t *frame, dht_elock_wrap_t *entrylk) goto done; } - lock_local = mem_get0 (THIS->local_pool); + lock_local = dht_local_init (lock_frame, NULL, NULL, 0); if (lock_local == NULL) { gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_PARENT_LAYOUT_CHANGED, @@ -848,7 +848,7 @@ dht_unlock_inodelk_wrapper (call_frame_t *frame, dht_ilock_wrap_t *inodelk) goto done; } - lock_local = mem_get0 (THIS->local_pool); + lock_local = dht_local_init (lock_frame, NULL, NULL, 0); if (lock_local == NULL) { gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_PARENT_LAYOUT_CHANGED, |