diff options
Diffstat (limited to 'xlators/performance')
48 files changed, 1869 insertions, 2909 deletions
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am index 4ec0f78e1b4..e95725acb8c 100644 --- a/xlators/performance/Makefile.am +++ b/xlators/performance/Makefile.am @@ -1,4 +1,4 @@ SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache \ - symlink-cache quick-read md-cache open-behind decompounder nl-cache + quick-read md-cache open-behind nl-cache CLEANFILES = diff --git a/xlators/performance/decompounder/Makefile.am b/xlators/performance/decompounder/Makefile.am deleted file mode 100644 index af437a64d6d..00000000000 --- a/xlators/performance/decompounder/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -SUBDIRS = src diff --git a/xlators/performance/decompounder/src/Makefile.am b/xlators/performance/decompounder/src/Makefile.am deleted file mode 100644 index 7823774c0d1..00000000000 --- a/xlators/performance/decompounder/src/Makefile.am +++ /dev/null @@ -1,19 +0,0 @@ -if WITH_SERVER -xlator_LTLIBRARIES = decompounder.la -endif -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance - -decompounder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) - -decompounder_la_SOURCES = decompounder.c -decompounder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = decompounder-mem-types.h decompounder-messages.h \ - decompounder.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = diff --git a/xlators/performance/decompounder/src/decompounder-mem-types.h b/xlators/performance/decompounder/src/decompounder-mem-types.h deleted file mode 100644 index 8b3049ffb18..00000000000 --- a/xlators/performance/decompounder/src/decompounder-mem-types.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __DC_MEM_TYPES_H__ -#define __DC_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gf_dc_mem_types_ { gf_dc_mt_rsp_t = gf_common_mt_end + 1, gf_dc_mt_end }; -#endif diff --git a/xlators/performance/decompounder/src/decompounder-messages.h b/xlators/performance/decompounder/src/decompounder-messages.h deleted file mode 100644 index 60674f32e78..00000000000 --- a/xlators/performance/decompounder/src/decompounder-messages.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _DC_MESSAGES_H_ -#define _DC_MESSAGES_H_ - -#include "glfs-message-id.h" - -/* To add new message IDs, append new identifiers at the end of the list. - * - * Never remove a message ID. If it's not used anymore, you can rename it or - * leave it as it is, but not delete it. This is to prevent reutilization of - * IDs by other messages. - * - * The component name must match one of the entries defined in - * glfs-message-id.h. - */ - -GLFS_MSGID(DC, DC_MSG_VOL_MISCONFIGURED, DC_MSG_ERROR_RECEIVED); - -#endif /* !_DC_MESSAGES_H_ */ diff --git a/xlators/performance/decompounder/src/decompounder.c b/xlators/performance/decompounder/src/decompounder.c deleted file mode 100644 index 8bc390923e0..00000000000 --- a/xlators/performance/decompounder/src/decompounder.c +++ /dev/null @@ -1,833 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "decompounder.h" -#include "mem-types.h" -#include "compound-fop-utils.h" - -void -dc_local_cleanup(dc_local_t *local) -{ - compound_args_cbk_cleanup(local->compound_rsp); - return; -} - -int32_t -dc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(stat, frame, op_ret, op_errno, buf, - xdata); - return 0; -} - -int32_t -dc_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(readlink, frame, op_ret, op_errno, path, - buf, xdata); - return 0; -} - -int32_t -dc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(mknod, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; -} - -int32_t -dc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; -} - -int32_t -dc_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(unlink, frame, op_ret, op_errno, - preparent, postparent, xdata); - return 0; -} - -int32_t -dc_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(rmdir, frame, op_ret, op_errno, - preparent, postparent, xdata); - return 0; -} - -int32_t -dc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(symlink, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; -} - -int32_t -dc_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, - struct iatt *postoldparent, struct iatt *prenewparent, - struct iatt *postnewparent, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(rename, frame, op_ret, op_errno, buf, - preoldparent, postoldparent, - prenewparent, postnewparent, xdata); - return 0; -} - -int32_t -dc_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(link, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; -} - -int32_t -dc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(truncate, frame, op_ret, op_errno, - prebuf, postbuf, xdata); - return 0; -} - -int32_t -dc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(open, frame, op_ret, op_errno, fd, - xdata); - return 0; -} - -int32_t -dc_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(readv, frame, op_ret, op_errno, vector, - count, stbuf, iobref, xdata); - return 0; -} - -int32_t -dc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(writev, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; -} - -int32_t -dc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct statvfs *buf, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(statfs, frame, op_ret, op_errno, buf, - xdata); - return 0; -} - -int32_t -dc_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(flush, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -dc_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fsync, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; -} - -int32_t -dc_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(setxattr, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(getxattr, frame, op_ret, op_errno, dict, - xdata); - return 0; -} - -int32_t -dc_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(removexattr, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(opendir, frame, op_ret, op_errno, fd, - xdata); - return 0; -} - -int32_t -dc_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fsyncdir, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(access, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -dc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(create, frame, op_ret, op_errno, fd, - inode, buf, preparent, postparent, - xdata); - return 0; -} - -int32_t -dc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(ftruncate, frame, op_ret, op_errno, - prebuf, postbuf, xdata); - return 0; -} - -int32_t -dc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fstat, frame, op_ret, op_errno, buf, - xdata); - return 0; -} - -int32_t -dc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(lk, frame, op_ret, op_errno, lock, - xdata); - return 0; -} - -int32_t -dc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(lookup, frame, op_ret, op_errno, inode, - buf, xdata, postparent); - return 0; -} - -int32_t -dc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(readdir, frame, op_ret, op_errno, - entries, xdata); - return 0; -} - -int32_t -dc_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(inodelk, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(finodelk, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(entrylk, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fentrylk, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(xattrop, frame, op_ret, op_errno, dict, - xdata); - return 0; -} - -int32_t -dc_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fxattrop, frame, op_ret, op_errno, dict, - xdata); - return 0; -} - -int32_t -dc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fgetxattr, frame, op_ret, op_errno, - dict, xdata); - return 0; -} - -int32_t -dc_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fsetxattr, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, uint32_t weak_cksum, - uint8_t *strong_cksum, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(rchecksum, frame, op_ret, op_errno, - weak_cksum, strong_cksum, xdata); - return 0; -} - -int32_t -dc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(setattr, frame, op_ret, op_errno, - statpre, statpost, xdata); - return 0; -} - -int32_t -dc_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fsetattr, frame, op_ret, op_errno, - statpre, statpost, xdata); - return 0; -} - -int32_t -dc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(readdirp, frame, op_ret, op_errno, - entries, xdata); - return 0; -} - -int32_t -dc_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fremovexattr, frame, op_ret, op_errno, - xdata); - return 0; -} - -int32_t -dc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fallocate, frame, op_ret, op_errno, pre, - post, xdata); - return 0; -} - -int32_t -dc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(discard, frame, op_ret, op_errno, pre, - post, xdata); - return 0; -} - -int32_t -dc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(zerofill, frame, op_ret, op_errno, pre, - post, xdata); - return 0; -} - -int32_t -dc_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(ipc, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -dc_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, off_t offset, dict_t *xdata) -{ - DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(seek, frame, op_ret, op_errno, offset, - xdata); - return 0; -} - -int32_t -dc_compound_fop_wind(call_frame_t *frame, xlator_t *this) -{ - dc_local_t *local = frame->local; - compound_args_t *c_req = local->compound_req; - compound_args_cbk_t *c_rsp = local->compound_rsp; - int counter = local->counter; - default_args_t *curr_fop = &c_req->req_list[counter]; - int op_ret = 0; - int op_errno = ENOMEM; - - if (local->counter == local->length) - goto done; - - c_rsp->enum_list[counter] = c_req->enum_list[counter]; - - switch (c_req->enum_list[counter]) { - case GF_FOP_STAT: - STACK_WIND(frame, dc_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, &curr_fop->loc, - curr_fop->xdata); - break; - case GF_FOP_READLINK: - STACK_WIND(frame, dc_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, &curr_fop->loc, - curr_fop->size, curr_fop->xdata); - break; - case GF_FOP_MKNOD: - STACK_WIND(frame, dc_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, &curr_fop->loc, - curr_fop->mode, curr_fop->rdev, curr_fop->umask, - curr_fop->xdata); - break; - case GF_FOP_MKDIR: - STACK_WIND(frame, dc_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, &curr_fop->loc, - curr_fop->mode, curr_fop->umask, curr_fop->xdata); - break; - case GF_FOP_UNLINK: - STACK_WIND(frame, dc_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &curr_fop->loc, - curr_fop->xflag, curr_fop->xdata); - break; - case GF_FOP_RMDIR: - STACK_WIND(frame, dc_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, &curr_fop->loc, - curr_fop->flags, curr_fop->xdata); - break; - case GF_FOP_SYMLINK: - STACK_WIND(frame, dc_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, curr_fop->linkname, - &curr_fop->loc, curr_fop->umask, curr_fop->xdata); - break; - case GF_FOP_RENAME: - STACK_WIND(frame, dc_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &curr_fop->loc, - &curr_fop->loc2, curr_fop->xdata); - break; - case GF_FOP_LINK: - STACK_WIND(frame, dc_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, &curr_fop->loc, - &curr_fop->loc2, curr_fop->xdata); - break; - case GF_FOP_TRUNCATE: - STACK_WIND(frame, dc_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &curr_fop->loc, - curr_fop->offset, curr_fop->xdata); - break; - case GF_FOP_OPEN: - STACK_WIND(frame, dc_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &curr_fop->loc, - curr_fop->flags, curr_fop->fd, curr_fop->xdata); - break; - case GF_FOP_READ: - STACK_WIND(frame, dc_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, curr_fop->fd, - curr_fop->size, curr_fop->offset, curr_fop->flags, - curr_fop->xdata); - break; - case GF_FOP_WRITE: - STACK_WIND(frame, dc_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, curr_fop->fd, - curr_fop->vector, curr_fop->count, curr_fop->offset, - curr_fop->flags, curr_fop->iobref, curr_fop->xdata); - break; - case GF_FOP_STATFS: - STACK_WIND(frame, dc_statfs_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, &curr_fop->loc, - curr_fop->xdata); - break; - case GF_FOP_FLUSH: - STACK_WIND(frame, dc_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, curr_fop->fd, - curr_fop->xdata); - break; - case GF_FOP_FSYNC: - STACK_WIND(frame, dc_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, curr_fop->fd, - curr_fop->datasync, curr_fop->xdata); - break; - case GF_FOP_SETXATTR: - STACK_WIND(frame, dc_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, &curr_fop->loc, - curr_fop->xattr, curr_fop->flags, curr_fop->xdata); - break; - case GF_FOP_GETXATTR: - STACK_WIND(frame, dc_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, &curr_fop->loc, - curr_fop->name, curr_fop->xdata); - break; - case GF_FOP_REMOVEXATTR: - STACK_WIND(frame, dc_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, &curr_fop->loc, - curr_fop->name, curr_fop->xdata); - break; - case GF_FOP_OPENDIR: - STACK_WIND(frame, dc_opendir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, &curr_fop->loc, - curr_fop->fd, curr_fop->xdata); - break; - case GF_FOP_FSYNCDIR: - STACK_WIND(frame, dc_fsyncdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsyncdir, curr_fop->fd, - curr_fop->datasync, curr_fop->xdata); - break; - case GF_FOP_ACCESS: - STACK_WIND(frame, dc_access_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, &curr_fop->loc, - curr_fop->mask, curr_fop->xdata); - break; - case GF_FOP_CREATE: - STACK_WIND(frame, dc_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, &curr_fop->loc, - curr_fop->flags, curr_fop->mode, curr_fop->umask, - curr_fop->fd, curr_fop->xdata); - break; - case GF_FOP_FTRUNCATE: - STACK_WIND(frame, dc_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, curr_fop->fd, - curr_fop->offset, curr_fop->xdata); - break; - case GF_FOP_FSTAT: - STACK_WIND(frame, dc_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, curr_fop->fd, - curr_fop->xdata); - break; - case GF_FOP_LK: - STACK_WIND(frame, dc_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, curr_fop->fd, curr_fop->cmd, - &curr_fop->lock, curr_fop->xdata); - break; - case GF_FOP_LOOKUP: - STACK_WIND(frame, dc_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &curr_fop->loc, - curr_fop->xdata); - break; - case GF_FOP_READDIR: - STACK_WIND(frame, dc_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, curr_fop->fd, - curr_fop->size, curr_fop->offset, curr_fop->xdata); - break; - case GF_FOP_INODELK: - STACK_WIND(frame, dc_inodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, curr_fop->volume, - &curr_fop->loc, curr_fop->cmd, &curr_fop->lock, - curr_fop->xdata); - break; - case GF_FOP_FINODELK: - STACK_WIND(frame, dc_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, curr_fop->volume, - curr_fop->fd, curr_fop->cmd, &curr_fop->lock, - curr_fop->xdata); - break; - case GF_FOP_ENTRYLK: - STACK_WIND(frame, dc_entrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, curr_fop->volume, - &curr_fop->loc, curr_fop->name, curr_fop->entrylkcmd, - curr_fop->entrylktype, curr_fop->xdata); - break; - case GF_FOP_FENTRYLK: - STACK_WIND(frame, dc_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, curr_fop->volume, - curr_fop->fd, curr_fop->name, curr_fop->entrylkcmd, - curr_fop->entrylktype, curr_fop->xdata); - break; - case GF_FOP_XATTROP: - STACK_WIND(frame, dc_xattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, &curr_fop->loc, - curr_fop->optype, curr_fop->xattr, curr_fop->xdata); - break; - case GF_FOP_FXATTROP: - STACK_WIND(frame, dc_fxattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, curr_fop->fd, - curr_fop->optype, curr_fop->xattr, curr_fop->xdata); - break; - case GF_FOP_FGETXATTR: - STACK_WIND(frame, dc_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, curr_fop->fd, - curr_fop->name, curr_fop->xdata); - break; - case GF_FOP_FSETXATTR: - STACK_WIND(frame, dc_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, curr_fop->fd, - curr_fop->xattr, curr_fop->flags, curr_fop->xdata); - break; - case GF_FOP_RCHECKSUM: - STACK_WIND(frame, dc_rchecksum_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rchecksum, curr_fop->fd, - curr_fop->offset, curr_fop->size, curr_fop->xdata); - break; - case GF_FOP_SETATTR: - STACK_WIND(frame, dc_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, &curr_fop->loc, - &curr_fop->stat, curr_fop->valid, curr_fop->xdata); - break; - case GF_FOP_FSETATTR: - STACK_WIND(frame, dc_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, curr_fop->fd, - &curr_fop->stat, curr_fop->valid, curr_fop->xdata); - break; - case GF_FOP_READDIRP: - STACK_WIND(frame, dc_readdirp_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, curr_fop->fd, - curr_fop->size, curr_fop->offset, curr_fop->xdata); - break; - case GF_FOP_FREMOVEXATTR: - STACK_WIND(frame, dc_fremovexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, curr_fop->fd, - curr_fop->name, curr_fop->xdata); - break; - case GF_FOP_FALLOCATE: - STACK_WIND(frame, dc_fallocate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, curr_fop->fd, - curr_fop->flags, curr_fop->offset, curr_fop->size, - curr_fop->xdata); - break; - case GF_FOP_DISCARD: - STACK_WIND(frame, dc_discard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->discard, curr_fop->fd, - curr_fop->offset, curr_fop->size, curr_fop->xdata); - break; - case GF_FOP_ZEROFILL: - STACK_WIND(frame, dc_zerofill_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->zerofill, curr_fop->fd, - curr_fop->offset, curr_fop->size, curr_fop->xdata); - break; - case GF_FOP_IPC: - STACK_WIND(frame, dc_ipc_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, curr_fop->cmd, - curr_fop->xdata); - break; - case GF_FOP_SEEK: - STACK_WIND(frame, dc_seek_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->seek, curr_fop->fd, - curr_fop->offset, curr_fop->what, curr_fop->xdata); - break; - default: - return -ENOTSUP; - } - return 0; -done: - DC_STACK_UNWIND(frame, op_ret, op_errno, c_rsp, NULL); - return 0; -} - -int32_t -dc_compound(call_frame_t *frame, xlator_t *this, void *data, dict_t *xdata) -{ - compound_args_t *compound_req = NULL; - compound_args_cbk_t *compound_rsp = NULL; - int ret = 0; - int op_errno = ENOMEM; - dc_local_t *local = NULL; - - compound_req = data; - - GF_ASSERT_AND_GOTO_WITH_ERROR(this, compound_req, out, op_errno, EINVAL); - - local = mem_get0(this->local_pool); - if (!local) - goto out; - - frame->local = local; - - local->compound_rsp = compound_args_cbk_alloc(compound_req->fop_length, - NULL); - if (!local->compound_rsp) - goto out; - - compound_rsp = local->compound_rsp; - - local->length = compound_req->fop_length; - local->counter = 0; - local->compound_req = compound_req; - - if (!local->length) { - op_errno = EINVAL; - goto out; - } - - ret = dc_compound_fop_wind(frame, this); - if (ret < 0) { - op_errno = -ret; - goto out; - } - return 0; -out: - DC_STACK_UNWIND(frame, -1, op_errno, compound_rsp, NULL); - return 0; -} - -struct xlator_cbks cbks = {}; - -struct volume_options options[] = { - {.key = {NULL}}, -}; - -struct xlator_fops fops = { - .compound = dc_compound, -}; - -int32_t -mem_acct_init(xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init(this, gf_dc_mt_end + 1); - - return ret; -} - -int32_t -init(xlator_t *this) -{ - int ret = -1; - - if (!this->children) { - gf_msg(this->name, GF_LOG_WARNING, 0, DC_MSG_VOL_MISCONFIGURED, - "Decompounder must have" - " a subvol."); - goto out; - } - - if (!this->parents) { - gf_msg(this->name, GF_LOG_WARNING, 0, DC_MSG_VOL_MISCONFIGURED, - "Volume is dangling."); - goto out; - } - - this->local_pool = mem_pool_new(dc_local_t, 128); - if (!this->local_pool) { - goto out; - } - - ret = 0; -out: - return ret; -} - -int32_t -fini(xlator_t *this) -{ - if (!this) - return 0; - - if (this->local_pool) { - mem_pool_destroy(this->local_pool); - this->local_pool = NULL; - } - return 0; -} diff --git a/xlators/performance/decompounder/src/decompounder.h b/xlators/performance/decompounder/src/decompounder.h deleted file mode 100644 index 486d0caf369..00000000000 --- a/xlators/performance/decompounder/src/decompounder.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __DC_H__ -#define __DC_H__ - -#include "defaults.h" -#include "xlator.h" -#include "call-stub.h" -#include "decompounder-mem-types.h" -#include "decompounder-messages.h" - -typedef struct { - compound_args_t *compound_req; - compound_args_cbk_t *compound_rsp; - int counter; - int length; -} dc_local_t; - -#define DC_STACK_UNWIND(frame, op_ret, op_errno, rsp, xdata) \ - do { \ - dc_local_t *__local = NULL; \ - \ - if (frame) { \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT(compound, frame, op_ret, op_errno, (void *)rsp, \ - xdata); \ - if (__local) { \ - dc_local_cleanup(__local); \ - mem_put(__local); \ - } \ - } while (0) - -int32_t -dc_compound_fop_wind(call_frame_t *frame, xlator_t *this); - -void -dc_local_cleanup(dc_local_t *local); - -#define DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fop, frame, op_ret, op_errno, \ - params...) \ - do { \ - dc_local_t *__local = frame->local; \ - xlator_t *__this = frame->this; \ - int __ret = 0; \ - int __counter = __local->counter; \ - compound_args_cbk_t *__compound_rsp = __local->compound_rsp; \ - default_args_cbk_t *__fop_rsp = &__local->compound_rsp \ - ->rsp_list[__counter]; \ - \ - if (op_ret < 0) { \ - gf_msg(__this->name, GF_LOG_ERROR, op_errno, \ - DC_MSG_ERROR_RECEIVED, "fop number %d failed. Unwinding.", \ - __counter + 1); \ - args_##fop##_cbk_store(__fop_rsp, op_ret, op_errno, params); \ - /*TODO : Fill the rest of the responses to -1 or NULL*/ \ - DC_STACK_UNWIND(frame, op_ret, op_errno, (void *)__compound_rsp, \ - NULL); \ - } else { \ - args_##fop##_cbk_store(__fop_rsp, op_ret, op_errno, params); \ - __local->counter++; \ - __ret = dc_compound_fop_wind(frame, __this); \ - if (__ret < 0) { \ - DC_STACK_UNWIND(frame, -1, -__ret, (void *)__compound_rsp, \ - NULL); \ - } \ - } \ - } while (0) -#endif /* DC_H__ */ diff --git a/xlators/performance/io-cache/src/io-cache-messages.h b/xlators/performance/io-cache/src/io-cache-messages.h index 09c5439ca71..38ad0b14d0e 100644 --- a/xlators/performance/io-cache/src/io-cache-messages.h +++ b/xlators/performance/io-cache/src/io-cache-messages.h @@ -10,7 +10,7 @@ #ifndef _IO_CACHE_MESSAGES_H_ #define _IO_CACHE_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * @@ -27,6 +27,43 @@ GLFS_MSGID(IO_CACHE, IO_CACHE_MSG_ENFORCEMENT_FAILED, IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, IO_CACHE_MSG_NO_MEMORY, IO_CACHE_MSG_VOL_MISCONFIGURED, IO_CACHE_MSG_INODE_NULL, IO_CACHE_MSG_PAGE_WAIT_VALIDATE, IO_CACHE_MSG_STR_COVERSION_FAILED, - IO_CACHE_MSG_WASTED_COPY); + IO_CACHE_MSG_WASTED_COPY, IO_CACHE_MSG_SET_FD_FAILED, + IO_CACHE_MSG_TABLE_NULL, IO_CACHE_MSG_MEMORY_INIT_FAILED, + IO_CACHE_MSG_NO_CACHE_SIZE_OPT, IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, + IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, + IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, IO_CACHE_MSG_NULL_PAGE_WAIT, + IO_CACHE_MSG_FRAME_NULL, IO_CACHE_MSG_PAGE_FAULT, + IO_CACHE_MSG_SERVE_READ_REQUEST, IO_CACHE_MSG_LOCAL_NULL, + IO_CACHE_MSG_DEFAULTING_TO_OLD); +#define IO_CACHE_MSG_NO_MEMORY_STR "out of memory" +#define IO_CACHE_MSG_ENFORCEMENT_FAILED_STR "inode context is NULL" +#define IO_CACHE_MSG_SET_FD_FAILED_STR "failed to set fd ctx" +#define IO_CACHE_MSG_TABLE_NULL_STR "table is NULL" +#define IO_CACHE_MSG_MEMORY_INIT_FAILED_STR "Memory accounting init failed" +#define IO_CACHE_MSG_NO_CACHE_SIZE_OPT_STR "could not get cache-size option" +#define IO_CACHE_MSG_INVALID_ARGUMENT_STR \ + "file size is greater than the max size" +#define IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE_STR "Not reconfiguring cache-size" +#define IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED_STR \ + "FATAL: io-cache not configured with exactly one child" +#define IO_CACHE_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile" +#define IO_CACHE_MSG_CREATE_MEM_POOL_FAILED_STR \ + "failed to create local_t's memory pool" +#define IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED_STR "Unable to allocate mem_pool" +#define IO_CACHE_MSG_STR_COVERSION_FAILED_STR \ + "asprintf failed while converting prt to str" +#define IO_CACHE_MSG_INODE_NULL_STR "ioc_inode is NULL" +#define IO_CACHE_MSG_PAGE_WAIT_VALIDATE_STR \ + "cache validate called without any page waiting to be validated" +#define IO_CACHE_MSG_NULL_PAGE_WAIT_STR "asked to wait on a NULL page" +#define IO_CACHE_MSG_WASTED_COPY_STR "wasted copy" +#define IO_CACHE_MSG_FRAME_NULL_STR "frame>root>rsp_refs is null" +#define IO_CACHE_MSG_PAGE_FAULT_STR "page fault on a NULL frame" +#define IO_CACHE_MSG_SERVE_READ_REQUEST_STR \ + "NULL page has been provided to serve read request" +#define IO_CACHE_MSG_LOCAL_NULL_STR "local is NULL" +#define IO_CACHE_MSG_DEFAULTING_TO_OLD_STR \ + "minimum size of file that can be cached is greater than maximum size. " \ + "Hence Defaulting to old value" #endif /* _IO_CACHE_MESSAGES_H_ */ diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 36d5c791278..9375d29c17f 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -9,13 +9,13 @@ */ #include <math.h> -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "io-cache.h" #include "ioc-mem-types.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include <assert.h> #include <sys/time.h> #include "io-cache-messages.h" @@ -78,23 +78,72 @@ ioc_get_inode (dict_t *dict, char *name) } */ -int32_t +int +ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode, + struct iovec *vector, int32_t count, int op_ret, off_t offset) +{ + size_t size = 0; + off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0, + write_offset = 0; + off_t page_offset = 0, page_end = 0; + ioc_page_t *trav = NULL; + + size = iov_length(vector, count); + size = min(size, op_ret); + + rounded_offset = gf_floor(offset, ioc_inode->table->page_size); + rounded_end = gf_roof(offset + size, ioc_inode->table->page_size); + + trav_offset = rounded_offset; + ioc_inode_lock(ioc_inode); + { + while (trav_offset < rounded_end) { + trav = __ioc_page_get(ioc_inode, trav_offset); + if (trav && trav->ready) { + if (trav_offset == rounded_offset) + page_offset = offset - rounded_offset; + else + page_offset = 0; + + if ((trav_offset + ioc_inode->table->page_size) >= + rounded_end) { + page_end = trav->size - (rounded_end - (offset + size)); + } else { + page_end = trav->size; + } + + iov_range_copy(trav->vector, trav->count, page_offset, vector, + count, write_offset, page_end - page_offset); + } else if (trav) { + if (!trav->waitq) + ioc_inode->table->cache_used -= __ioc_page_destroy(trav); + } + + if (trav_offset == rounded_offset) + write_offset += (ioc_inode->table->page_size - + (offset - rounded_offset)); + else + write_offset += ioc_inode->table->page_size; + + trav_offset += ioc_inode->table->page_size; + } + } + ioc_inode_unlock(ioc_inode); + + return 0; +} + +static gf_boolean_t ioc_inode_need_revalidate(ioc_inode_t *ioc_inode) { - int8_t need_revalidate = 0; - struct timeval tv = { - 0, - }; ioc_table_t *table = NULL; + GF_ASSERT(ioc_inode); table = ioc_inode->table; + GF_ASSERT(table); - gettimeofday(&tv, NULL); - - if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout) - need_revalidate = 1; - - return need_revalidate; + return (gf_time() - ioc_inode->cache.last_revalidate >= + table->cache_timeout); } /* @@ -273,16 +322,14 @@ ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) local = mem_get0(this->local_pool); if (local == NULL) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } ret = loc_copy(&local->file_loc, loc); if (ret != 0) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } @@ -328,14 +375,12 @@ ioc_forget(xlator_t *this, inode_t *inode) static int32_t ioc_invalidate(xlator_t *this, inode_t *inode) { - uint64_t ioc_addr = 0; - ioc_inode_t *ioc_inode = NULL; + uint64_t ioc_inode = 0; - inode_ctx_get(inode, this, (uint64_t *)&ioc_addr); - ioc_inode = (void *)ioc_addr; + inode_ctx_get(inode, this, &ioc_inode); if (ioc_inode) - ioc_inode_flush(ioc_inode); + ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode); return 0; } @@ -399,7 +444,7 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ioc_inode_lock(ioc_inode); { - gettimeofday(&ioc_inode->cache.tv, NULL); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -436,8 +481,8 @@ ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page) if (!page_found) { waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t); if (waiter == NULL) { - gf_msg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, "out of memory"); + gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_NO_MEMORY, NULL); ret = -ENOMEM; goto out; } @@ -474,8 +519,8 @@ ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, ret = -1; local->op_ret = -1; local->op_errno = ENOMEM; - gf_msg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, - IO_CACHE_MSG_NO_MEMORY, "out of memory"); + gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_NO_MEMORY, NULL); goto out; } @@ -485,8 +530,8 @@ ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, local->op_ret = -1; local->op_errno = ENOMEM; mem_put(validate_local); - gf_msg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, - IO_CACHE_MSG_NO_MEMORY, "out of memory"); + gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_NO_MEMORY, NULL); goto out; } @@ -568,9 +613,9 @@ ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, // TODO: see why inode context is NULL and handle it. if (!ioc_inode) { - gf_msg(this->name, GF_LOG_ERROR, EINVAL, - IO_CACHE_MSG_ENFORCEMENT_FAILED, - "inode context is NULL (%s)", uuid_utoa(fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); goto out; } @@ -662,9 +707,9 @@ ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, (table->max_file_size < ioc_inode->ia_size))) { ret = fd_ctx_set(fd, this, 1); if (ret) - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, "%s: failed to set fd ctx", - local->file_loc.path); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_SET_FD_FAILED, "path=%s", + local->file_loc.path, NULL); } } ioc_inode_unlock(ioc_inode); @@ -678,9 +723,9 @@ ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * as a whole */ ret = fd_ctx_set(fd, this, 1); if (ret) - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, "%s: failed to set fd ctx", - local->file_loc.path); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_SET_FD_FAILED, "path=%s", + local->file_loc.path, NULL); } /* if weight == 0, we disable caching on it */ @@ -688,9 +733,9 @@ ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, /* we allow a pattern-matched cache disable this way */ ret = fd_ctx_set(fd, this, 1); if (ret) - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, "%s: failed to set fd ctx", - local->file_loc.path); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_SET_FD_FAILED, "path=%s", + local->file_loc.path, NULL); } } @@ -763,16 +808,14 @@ ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, local = mem_get0(this->local_pool); if (local == NULL) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } ret = loc_copy(&local->file_loc, loc); if (ret != 0) { op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); goto unwind; } @@ -810,8 +853,7 @@ ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, local = mem_get0(this->local_pool); if (local == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL); return 0; } @@ -846,8 +888,7 @@ ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, local = mem_get0(this->local_pool); if (local == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, NULL); return 0; @@ -926,8 +967,8 @@ ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, local = frame->local; table = ioc_inode->table; - rounded_offset = floor(offset, table->page_size); - rounded_end = roof(offset + size, table->page_size); + rounded_offset = gf_floor(offset, table->page_size); + rounded_end = gf_roof(offset + size, table->page_size); trav_offset = rounded_offset; /* once a frame does read, it should be waiting on something */ @@ -958,8 +999,8 @@ ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, trav = __ioc_page_create(ioc_inode, trav_offset); fault = 1; if (!trav) { - gf_msg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, "out of memory"); + gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, + IO_CACHE_MSG_NO_MEMORY, NULL); local->op_ret = -1; local->op_errno = ENOMEM; ioc_inode_unlock(ioc_inode); @@ -1095,8 +1136,8 @@ ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, table = this->private; if (!table) { - gf_msg(this->name, GF_LOG_ERROR, EINVAL, - IO_CACHE_MSG_ENFORCEMENT_FAILED, "table is null"); + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL, + NULL); op_errno = EINVAL; goto out; } @@ -1127,8 +1168,7 @@ ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local = mem_get0(this->local_pool); if (local == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); op_errno = ENOMEM; goto out; } @@ -1184,13 +1224,22 @@ ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, uint64_t ioc_inode = 0; local = frame->local; + frame->local = NULL; inode_ctx_get(local->fd->inode, this, &ioc_inode); - if (ioc_inode) - ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); + if (op_ret >= 0) { + ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector, + local->op_ret, op_ret, local->offset); + } STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); + if (local->iobref) { + iobref_unref(local->iobref); + GF_FREE(local->vector); + } + + mem_put(local); return 0; } @@ -1215,8 +1264,7 @@ ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, local = mem_get0(this->local_pool); if (local == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; @@ -1227,8 +1275,12 @@ ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, frame->local = local; inode_ctx_get(fd->inode, this, &ioc_inode); - if (ioc_inode) - ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); + if (ioc_inode) { + local->iobref = iobref_ref(iobref); + local->vector = iov_dup(vector, count); + local->op_ret = count; + local->offset = offset; + } STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, @@ -1355,7 +1407,7 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, ioc_inode_lock(ioc_inode); { - gettimeofday(&ioc_inode->cache.tv, NULL); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1560,8 +1612,8 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_ioc_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "Memory accounting init failed"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL); return ret; } @@ -1580,9 +1632,8 @@ check_cache_size_ok(xlator_t *this, uint64_t cache_size) opt = xlator_volume_option_get(this, "cache-size"); if (!opt) { ret = _gf_false; - gf_msg(this->name, GF_LOG_ERROR, EINVAL, - IO_CACHE_MSG_ENFORCEMENT_FAILED, - "could not get cache-size option"); + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL); goto out; } @@ -1596,10 +1647,9 @@ check_cache_size_ok(xlator_t *this, uint64_t cache_size) if (cache_size > max_cache_size) { ret = _gf_false; - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, - "Cache size %" PRIu64 - " is greater than the max size of %" PRIu64, - cache_size, max_cache_size); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, + "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64, + max_cache_size, NULL); goto out; } out: @@ -1649,13 +1699,9 @@ reconfigure(xlator_t *this, dict_t *options) if ((table->max_file_size <= UINT64_MAX) && (table->min_file_size > table->max_file_size)) { - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, - "minimum size (%" PRIu64 - ") of a file that can be cached is " - "greater than maximum size (%" PRIu64 - "). " - "Hence Defaulting to old value", - table->min_file_size, table->max_file_size); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD, + "minimum-size=%" PRIu64, table->min_file_size, + "maximum-size=%" PRIu64, table->max_file_size, NULL); goto unlock; } @@ -1663,8 +1709,8 @@ reconfigure(xlator_t *this, dict_t *options) unlock); if (!check_cache_size_ok(this, cache_size_new)) { ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, - "Not reconfiguring cache-size"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL); goto unlock; } table->cache_size = cache_size_new; @@ -1696,22 +1742,19 @@ init(xlator_t *this) xl_options = this->options; if (!this->children || this->children->next) { - gf_msg(this->name, GF_LOG_ERROR, 0, - IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, - "FATAL: io-cache not configured with exactly " - "one child"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL); goto out; } if (!this->parents) { - gf_msg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED, - "dangling volume. check volfile "); + gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED, + NULL); } table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t); if (table == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); goto out; } @@ -1753,11 +1796,9 @@ init(xlator_t *this) if ((table->max_file_size <= UINT64_MAX) && (table->min_file_size > table->max_file_size)) { - gf_msg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, - "minimum size (%" PRIu64 - ") of a file that can be cached is " - "greater than maximum size (%" PRIu64 ")", - table->min_file_size, table->max_file_size); + gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, + "minimum-size=%" PRIu64, table->min_file_size, + "maximum-size=%" PRIu64, table->max_file_size, NULL); goto out; } @@ -1773,8 +1814,8 @@ init(xlator_t *this) this->local_pool = mem_pool_new(ioc_local_t, 64); if (!this->local_pool) { ret = -1; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "failed to create local_t's memory pool"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL); goto out; } @@ -1786,8 +1827,8 @@ init(xlator_t *this) table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages); if (!table->mem_pool) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, - "Unable to allocate mem_pool"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL); goto out; } @@ -1890,7 +1931,7 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; @@ -1900,11 +1941,9 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - if (ioc_inode->cache.tv.tv_sec) { - gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.tv.tv_sec, + if (ioc_inode->cache.last_revalidate) { + gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); gf_proc_dump_write("last-cache-validation-time", "%s", timestr); } @@ -2013,9 +2052,9 @@ ioc_priv_dump(xlator_t *this) if (ret) goto out; { - gf_proc_dump_write("page_size", "%ld", priv->page_size); - gf_proc_dump_write("cache_size", "%ld", priv->cache_size); - gf_proc_dump_write("cache_used", "%ld", priv->cache_used); + gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size); + gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size); + gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used); gf_proc_dump_write("inode_count", "%u", priv->inode_count); gf_proc_dump_write("cache_timeout", "%u", priv->cache_timeout); gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size); @@ -2118,6 +2157,14 @@ struct xlator_cbks cbks = { }; struct volume_options options[] = { + { + .key = {"io-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable io-cache", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, {.key = {"priority"}, .type = GF_OPTION_TYPE_PRIORITY_LIST, .default_value = "", @@ -2168,3 +2215,17 @@ struct volume_options options[] = { .description = "Enable/Disable io cache translator"}, {.key = {NULL}}, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "io-cache", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index cc66fcea714..14923c75edc 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -12,16 +12,12 @@ #define __IO_CACHE_H #include <sys/types.h> -#include "compat-errno.h" - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "common-utils.h" -#include "call-stub.h" -#include "rbthash.h" -#include "hashfn.h" +#include <glusterfs/compat-errno.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/dict.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/rbthash.h> #include <sys/time.h> #include <fnmatch.h> #include "io-cache-messages.h" @@ -91,6 +87,8 @@ struct ioc_local { struct ioc_waitq *waitq; void *stub; fd_t *fd; + struct iovec *vector; + struct iobref *iobref; int32_t need_xattr; dict_t *xattr_req; }; @@ -119,15 +117,13 @@ struct ioc_page { struct ioc_cache { rbthash_table_t *page_table; struct list_head page_lru; - time_t mtime; /* - * seconds component of file mtime - */ - time_t mtime_nsec; /* - * nanosecond component of file mtime - */ - struct timeval tv; /* - * time-stamp at last re-validate - */ + time_t mtime; /* + * seconds component of file mtime + */ + time_t mtime_nsec; /* + * nanosecond component of file mtime + */ + time_t last_revalidate; /* timestamp at last re-validate */ }; struct ioc_inode { @@ -272,17 +268,6 @@ ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size, pthread_mutex_unlock(&page->page_lock); \ } while (0) -static inline uint64_t -time_elapsed(struct timeval *now, struct timeval *then) -{ - uint64_t sec = now->tv_sec - then->tv_sec; - - if (sec) - return sec; - - return 0; -} - ioc_inode_t * ioc_inode_search(ioc_table_t *table, inode_t *inode); diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c index a26e6d35adb..97767d85285 100644 --- a/xlators/performance/io-cache/src/ioc-inode.c +++ b/xlators/performance/io-cache/src/ioc-inode.c @@ -46,8 +46,8 @@ ptr_to_str(void *ptr) ret = gf_asprintf(&str, "%p", ptr); if (-1 == ret) { - gf_msg("io-cache", GF_LOG_WARNING, 0, IO_CACHE_MSG_STR_COVERSION_FAILED, - "asprintf failed while converting ptr to str"); + gf_smsg("io-cache", GF_LOG_WARNING, 0, + IO_CACHE_MSG_STR_COVERSION_FAILED, NULL); str = NULL; goto out; } @@ -75,8 +75,8 @@ ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode, if (ioc_inode == NULL) { local->op_ret = -1; local->op_errno = EINVAL; - gf_msg(frame->this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_INODE_NULL, - "ioc_inode is NULL"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_INODE_NULL, + NULL); goto out; } @@ -89,10 +89,8 @@ ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode, { waiter = ioc_inode->waitq; if (!waiter) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, - IO_CACHE_MSG_PAGE_WAIT_VALIDATE, - "cache validate called without any " - "page waiting to be validated"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_PAGE_WAIT_VALIDATE, NULL); ioc_inode_unlock(ioc_inode); goto out; diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h index 3271840bb43..20c9a12021e 100644 --- a/xlators/performance/io-cache/src/ioc-mem-types.h +++ b/xlators/performance/io-cache/src/ioc-mem-types.h @@ -11,7 +11,7 @@ #ifndef __IOC_MT_H__ #define __IOC_MT_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_ioc_mem_types_ { gf_ioc_mt_iovec = gf_common_mt_end + 1, diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 66cb12bc569..84b1ae6cb20 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -8,10 +8,10 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "io-cache.h" #include "ioc-mem-types.h" #include <assert.h> @@ -42,7 +42,7 @@ __ioc_page_get(ioc_inode_t *ioc_inode, off_t offset) table = ioc_inode->table; GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out); - rounded_offset = floor(offset, table->page_size); + rounded_offset = gf_floor(offset, table->page_size); page = rbthash_get(ioc_inode->cache.page_table, &rounded_offset, sizeof(rounded_offset)); @@ -253,7 +253,7 @@ __ioc_page_create(ioc_inode_t *ioc_inode, off_t offset) table = ioc_inode->table; GF_VALIDATE_OR_GOTO("io-cache", table, out); - rounded_offset = floor(offset, table->page_size); + rounded_offset = gf_floor(offset, table->page_size); newpage = GF_CALLOC(1, sizeof(*newpage), gf_ioc_mt_ioc_newpage_t); if (newpage == NULL) { @@ -307,8 +307,8 @@ __ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset, if (page == NULL) { local->op_ret = -1; local->op_errno = ENOMEM; - gf_msg(frame->this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_NO_MEMORY, - "asked to wait on a NULL page"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_NULL_PAGE_WAIT, NULL); goto out; } @@ -444,7 +444,7 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; } - gettimeofday(&ioc_inode->cache.tv, NULL); + ioc_inode->cache.last_revalidate = gf_time(); if (op_ret < 0) { /* error, readv returned -1 */ @@ -457,12 +457,10 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (!page) { /* page was flushed */ /* some serious bug ? */ - gf_msg(frame->this->name, GF_LOG_WARNING, 0, - IO_CACHE_MSG_WASTED_COPY, - "wasted copy: %" PRId64 "[+%" PRId64 - "] " - "ioc_inode=%p", - offset, table->page_size, ioc_inode); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_WASTED_COPY, "offset=%" PRId64, offset, + "page-size=%" PRId64, table->page_size, "ioc_inode=%p", + ioc_inode, NULL); } else { if (page->vector) { iobref_unref(page->iobref); @@ -486,9 +484,8 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } else { /* TODO: we have got a response to * our request and no data */ - gf_msg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, - "frame>root>rsp_refs is null"); + gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, + IO_CACHE_MSG_FRAME_NULL, NULL); } /* if(frame->root->rsp_refs) */ /* page->size should indicate exactly how @@ -574,8 +571,8 @@ ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, if (frame == NULL) { op_ret = -1; op_errno = EINVAL; - gf_msg("io-cache", GF_LOG_WARNING, EINVAL, - IO_CACHE_MSG_ENFORCEMENT_FAILED, "page fault on a NULL frame"); + gf_smsg("io-cache", GF_LOG_WARNING, EINVAL, IO_CACHE_MSG_PAGE_FAULT, + NULL); goto err; } @@ -658,9 +655,8 @@ __ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, GF_VALIDATE_OR_GOTO(frame->this->name, local, out); if (page == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, - IO_CACHE_MSG_ENFORCEMENT_FAILED, - "NULL page has been provided to serve read request"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_SERVE_READ_REQUEST, NULL); local->op_ret = -1; local->op_errno = EINVAL; goto out; @@ -723,11 +719,8 @@ __ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, new->size = copy_size; new->iobref = iobref_ref(page->iobref); new->count = iov_subset(page->vector, page->count, src_offset, - src_offset + copy_size, NULL); - - new->vector = GF_CALLOC(new->count, sizeof(struct iovec), - gf_ioc_mt_iovec); - if (new->vector == NULL) { + copy_size, &new->vector, 0); + if (new->count < 0) { local->op_ret = -1; local->op_errno = ENOMEM; @@ -736,9 +729,6 @@ __ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, goto out; } - new->count = iov_subset(page->vector, page->count, src_offset, - src_offset + copy_size, new->vector); - /* add the ioc_fill to fill_list for this frame */ if (list_empty(&local->fill_list)) { /* if list is empty, then this is the first @@ -801,8 +791,8 @@ ioc_frame_unwind(call_frame_t *frame) local = frame->local; if (local == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, - IO_CACHE_MSG_NO_MEMORY, "local is NULL"); + gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_LOCAL_NULL, NULL); op_ret = -1; op_errno = ENOMEM; goto unwind; diff --git a/xlators/performance/io-threads/src/io-threads-messages.h b/xlators/performance/io-threads/src/io-threads-messages.h index 25e08f4b687..6229c353f96 100644 --- a/xlators/performance/io-threads/src/io-threads-messages.h +++ b/xlators/performance/io-threads/src/io-threads-messages.h @@ -10,7 +10,7 @@ #ifndef _IO_THREADS_MESSAGES_H_ #define _IO_THREADS_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * @@ -24,6 +24,18 @@ GLFS_MSGID(IO_THREADS, IO_THREADS_MSG_INIT_FAILED, IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, IO_THREADS_MSG_NO_MEMORY, - IO_THREADS_MSG_VOL_MISCONFIGURED, IO_THREADS_MSG_SIZE_NOT_SET); + IO_THREADS_MSG_VOL_MISCONFIGURED, IO_THREADS_MSG_SIZE_NOT_SET, + IO_THREADS_MSG_OUT_OF_MEMORY, IO_THREADS_MSG_PTHREAD_INIT_FAILED, + IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED); +#define IO_THREADS_MSG_INIT_FAILED_STR "Thread attribute initialization failed" +#define IO_THREADS_MSG_SIZE_NOT_SET_STR "Using default thread stack size" +#define IO_THREADS_MSG_NO_MEMORY_STR "Memory accounting init failed" +#define IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED_STR \ + "FATAL: iot not configured with exactly one child" +#define IO_THREADS_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile" +#define IO_THREADS_MSG_OUT_OF_MEMORY_STR "out of memory" +#define IO_THREADS_MSG_PTHREAD_INIT_FAILED_STR "init failed" +#define IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED_STR \ + "cannot initialize worker threads, exiting init" #endif /* _IO_THREADS_MESSAGES_H_ */ diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 8c51f09a01d..3d24cc97f4b 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -8,20 +8,20 @@ cases as published by the Free Software Foundation. */ -#include "call-stub.h" -#include "defaults.h" -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "io-threads.h" #include <signal.h> #include <stdlib.h> #include <sys/time.h> #include <time.h> -#include "locking.h" +#include <glusterfs/locking.h> #include "io-threads-messages.h" -#include "timespec.h" +#include <glusterfs/timespec.h> void * iot_worker(void *arg); @@ -61,7 +61,7 @@ iot_get_ctx(xlator_t *this, client_t *client) int i; if (client_ctx_get(client, this, (void **)&ctx) != 0) { - ctx = GF_CALLOC(GF_FOP_PRI_MAX, sizeof(*ctx), gf_iot_mt_client_ctx_t); + ctx = GF_MALLOC(GF_FOP_PRI_MAX * sizeof(*ctx), gf_iot_mt_client_ctx_t); if (ctx) { for (i = 0; i < GF_FOP_PRI_MAX; ++i) { INIT_LIST_HEAD(&ctx[i].clients); @@ -156,6 +156,7 @@ __iot_enqueue(iot_conf_t *conf, call_stub_t *stub, int pri) list_add_tail(&stub->list, &ctx->reqs); conf->queue_size++; + GF_ATOMIC_INC(conf->stub_cnt); conf->queue_sizes[pri]++; } @@ -230,6 +231,7 @@ iot_worker(void *data) } else { call_resume(stub); } + GF_ATOMIC_DEC(conf->stub_cnt); } stub = NULL; @@ -273,7 +275,7 @@ iot_get_pri_meaning(gf_fop_pri_t pri) name = "slow"; break; case GF_FOP_PRI_LEAST: - name = "least priority"; + name = "least"; break; case GF_FOP_PRI_MAX: name = "invalid"; @@ -292,7 +294,9 @@ iot_schedule(call_frame_t *frame, xlator_t *this, call_stub_t *stub) gf_fop_pri_t pri = GF_FOP_PRI_MAX - 1; iot_conf_t *conf = this->private; - if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) { + if ((frame->root->pid < GF_CLIENT_PID_MAX) && + (frame->root->pid != GF_CLIENT_PID_NO_ROOT_SQUASH) && + conf->least_priority) { pri = GF_FOP_PRI_LEAST; goto out; } @@ -368,7 +372,7 @@ iot_schedule(call_frame_t *frame, xlator_t *this, call_stub_t *stub) return -EINVAL; } out: - gf_msg_debug(this->name, 0, "%s scheduled as %s fop", + gf_msg_debug(this->name, 0, "%s scheduled as %s priority fop", gf_fop_list[stub->fop], iot_get_pri_meaning(pri)); if (this->private) ret = do_iot_schedule(this->private, stub, pri); @@ -610,7 +614,7 @@ iot_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, conf = this->private; - if (conf && name && strcmp(name, IO_THREADS_QUEUE_SIZE_KEY) == 0) { + if (name && strcmp(name, IO_THREADS_QUEUE_SIZE_KEY) == 0) { /* * We explicitly do not want a reference count * for this dict in this translator @@ -811,9 +815,6 @@ __iot_workers_scale(iot_conf_t *conf) pthread_t thread; int ret = 0; int i = 0; - char thread_name[GF_THREAD_NAMEMAX] = { - 0, - }; for (i = 0; i < GF_FOP_PRI_MAX; i++) scale += min(conf->queue_sizes[i], conf->ac_iot_limit[i]); @@ -831,11 +832,10 @@ __iot_workers_scale(iot_conf_t *conf) while (diff) { diff--; - snprintf(thread_name, sizeof(thread_name), "iotwr%03hx", - (conf->curr_count & 0x3ff)); ret = gf_thread_create(&thread, &conf->w_attr, iot_worker, conf, - thread_name); + "iotwr%03hx", conf->curr_count & 0x3ff); if (ret == 0) { + pthread_detach(thread); conf->curr_count++; gf_msg_debug(conf->this->name, 0, "scaled threads to %d (queue_size=%d/%d)", @@ -879,8 +879,8 @@ set_stack_size(iot_conf_t *conf) err = pthread_attr_init(&conf->w_attr); if (err != 0) { - gf_msg(this->name, GF_LOG_ERROR, err, IO_THREADS_MSG_INIT_FAILED, - "Thread attribute initialization failed"); + gf_smsg(this->name, GF_LOG_ERROR, err, IO_THREADS_MSG_INIT_FAILED, + NULL); return err; } @@ -888,11 +888,11 @@ set_stack_size(iot_conf_t *conf) if (err == EINVAL) { err = pthread_attr_getstacksize(&conf->w_attr, &stacksize); if (!err) { - gf_msg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET, - "Using default thread stack size %zd", stacksize); + gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET, + "size=%zd", stacksize, NULL); } else { - gf_msg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET, - "Using default thread stack size"); + gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET, + NULL); err = 0; } } @@ -912,8 +912,8 @@ mem_acct_init(xlator_t *this) ret = xlator_mem_acct_init(this, gf_iot_mt_end + 1); if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_NO_MEMORY, - "Memory accounting init failed"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_NO_MEMORY, + NULL); return ret; } @@ -925,6 +925,8 @@ iot_priv_dump(xlator_t *this) { iot_conf_t *conf = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; if (!this) return 0; @@ -942,14 +944,29 @@ iot_priv_dump(xlator_t *this) gf_proc_dump_write("sleep_count", "%d", conf->sleep_count); gf_proc_dump_write("idle_time", "%d", conf->idle_time); gf_proc_dump_write("stack_size", "%zd", conf->stack_size); - gf_proc_dump_write("high_priority_threads", "%d", + gf_proc_dump_write("max_high_priority_threads", "%d", conf->ac_iot_limit[GF_FOP_PRI_HI]); - gf_proc_dump_write("normal_priority_threads", "%d", + gf_proc_dump_write("max_normal_priority_threads", "%d", conf->ac_iot_limit[GF_FOP_PRI_NORMAL]); - gf_proc_dump_write("low_priority_threads", "%d", + gf_proc_dump_write("max_low_priority_threads", "%d", conf->ac_iot_limit[GF_FOP_PRI_LO]); - gf_proc_dump_write("least_priority_threads", "%d", + gf_proc_dump_write("max_least_priority_threads", "%d", conf->ac_iot_limit[GF_FOP_PRI_LEAST]); + gf_proc_dump_write("current_high_priority_threads", "%d", + conf->ac_iot_count[GF_FOP_PRI_HI]); + gf_proc_dump_write("current_normal_priority_threads", "%d", + conf->ac_iot_count[GF_FOP_PRI_NORMAL]); + gf_proc_dump_write("current_low_priority_threads", "%d", + conf->ac_iot_count[GF_FOP_PRI_LO]); + gf_proc_dump_write("current_least_priority_threads", "%d", + conf->ac_iot_count[GF_FOP_PRI_LEAST]); + for (i = 0; i < GF_FOP_PRI_MAX; i++) { + if (!conf->queue_sizes[i]) + continue; + snprintf(key, sizeof(key), "%s_priority_queue_length", + iot_get_pri_meaning(i)); + gf_proc_dump_write(key, "%d", conf->queue_sizes[i]); + } return 0; } @@ -985,8 +1002,8 @@ iot_priv_dump(xlator_t *this) */ typedef struct { - uint32_t value; time_t update_time; + uint32_t value; } threshold_t; /* * Variables so that I can hack these for testing. @@ -999,16 +1016,13 @@ static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */ static void iot_apply_event(xlator_t *this, threshold_t *thresh) { - struct timespec now; - time_t delta; + time_t delta, now = gf_time(); /* Refresh for manual testing/debugging. It's cheap. */ THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1); - timespec_now(&now); - if (thresh->value && thresh->update_time) { - delta = now.tv_sec - thresh->update_time; + delta = now - thresh->update_time; /* Be careful about underflow. */ if (thresh->value <= delta) { thresh->value = 0; @@ -1029,7 +1043,7 @@ iot_apply_event(xlator_t *this, threshold_t *thresh) kill(getpid(), SIGTRAP); } - thresh->update_time = now.tv_sec; + thresh->update_time = now; } static void * @@ -1169,35 +1183,33 @@ init(xlator_t *this) int i = 0; if (!this->children || this->children->next) { - gf_msg("io-threads", GF_LOG_ERROR, 0, - IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, - "FATAL: iot not configured " - "with exactly one child"); + gf_smsg("io-threads", GF_LOG_ERROR, 0, + IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, NULL); goto out; } if (!this->parents) { - gf_msg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_VOL_MISCONFIGURED, - "dangling volume. check volfile "); + gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_VOL_MISCONFIGURED, + NULL); } conf = (void *)GF_CALLOC(1, sizeof(*conf), gf_iot_mt_iot_conf_t); if (conf == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_NO_MEMORY, - "out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_OUT_OF_MEMORY, + NULL); goto out; } if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_INIT_FAILED, - "pthread_cond_init failed (%d)", ret); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_PTHREAD_INIT_FAILED, + "pthread_cond_init ret=%d", ret, NULL); goto out; } conf->cond_inited = _gf_true; if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_INIT_FAILED, - "pthread_mutex_init failed (%d)", ret); + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_PTHREAD_INIT_FAILED, + "pthread_mutex_init ret=%d", ret, NULL); goto out; } conf->mutex_inited = _gf_true; @@ -1207,6 +1219,8 @@ init(xlator_t *this) if (ret != 0) goto out; + ret = -1; + GF_OPTION_INIT("thread-count", conf->max_count, int32, out); GF_OPTION_INIT("high-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_HI], @@ -1231,6 +1245,7 @@ init(xlator_t *this) GF_OPTION_INIT("pass-through", this->pass_through, bool, out); conf->this = this; + GF_ATOMIC_INIT(conf->stub_cnt, 0); for (i = 0; i < GF_FOP_PRI_MAX; i++) { INIT_LIST_HEAD(&conf->clients[i]); @@ -1238,12 +1253,14 @@ init(xlator_t *this) INIT_LIST_HEAD(&conf->no_client[i].reqs); } - ret = iot_workers_scale(conf); + if (!this->pass_through) { + ret = iot_workers_scale(conf); - if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_INIT_FAILED, - "cannot initialize worker threads, exiting init"); - goto out; + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED, NULL); + goto out; + } } this->private = conf; @@ -1280,9 +1297,44 @@ int notify(xlator_t *this, int32_t event, void *data, ...) { iot_conf_t *conf = this->private; + xlator_t *victim = data; + uint64_t stub_cnt = 0; + struct timespec sleep_till = { + 0, + }; - if (GF_EVENT_PARENT_DOWN == event) - iot_exit_threads(conf); + if (GF_EVENT_PARENT_DOWN == event) { + if (victim->cleanup_starting) { + /* Wait for draining stub from queue before notify PARENT_DOWN */ + stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); + if (stub_cnt) { + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + pthread_mutex_lock(&conf->mutex); + { + while (stub_cnt) { + (void)pthread_cond_timedwait(&conf->cond, &conf->mutex, + &sleep_till); + stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); + } + } + pthread_mutex_unlock(&conf->mutex); + } + + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); + } else { + iot_exit_threads(conf); + } + } + + if (GF_EVENT_CHILD_DOWN == event) { + if (victim->cleanup_starting) { + iot_exit_threads(conf); + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name); + } + } default_notify(this, event, data); @@ -1521,3 +1573,18 @@ struct volume_options options[] = { .key = {NULL}, }, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "io-threads", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index 949d1d1b1e0..f54d2f4912d 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -11,18 +11,18 @@ #ifndef __IOT_H #define __IOT_H -#include "compat-errno.h" -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "common-utils.h" -#include "list.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> #include <stdlib.h> -#include "locking.h" +#include <glusterfs/locking.h> #include "iot-mem-types.h" #include <semaphore.h> -#include "statedump.h" +#include <glusterfs/statedump.h> struct iot_conf; @@ -63,7 +63,8 @@ struct iot_conf { int32_t ac_iot_limit[GF_FOP_PRI_MAX]; int32_t ac_iot_count[GF_FOP_PRI_MAX]; int queue_sizes[GF_FOP_PRI_MAX]; - int queue_size; + int32_t queue_size; + gf_atomic_t stub_cnt; pthread_attr_t w_attr; gf_boolean_t least_priority; /*Enable/Disable least-priority */ diff --git a/xlators/performance/io-threads/src/iot-mem-types.h b/xlators/performance/io-threads/src/iot-mem-types.h index d6b5e7b6899..29565f34dd4 100644 --- a/xlators/performance/io-threads/src/iot-mem-types.h +++ b/xlators/performance/io-threads/src/iot-mem-types.h @@ -11,7 +11,7 @@ #ifndef __IOT_MEM_TYPES_H__ #define __IOT_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_iot_mem_types_ { gf_iot_mt_iot_conf_t = gf_common_mt_end + 1, diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h index e6658577731..47a07005717 100644 --- a/xlators/performance/md-cache/src/md-cache-mem-types.h +++ b/xlators/performance/md-cache/src/md-cache-mem-types.h @@ -11,7 +11,7 @@ #ifndef __MDC_MEM_TYPES_H__ #define __MDC_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_mdc_mem_types_ { gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1, diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h index dfc321372ce..f367bad1991 100644 --- a/xlators/performance/md-cache/src/md-cache-messages.h +++ b/xlators/performance/md-cache/src/md-cache-messages.h @@ -10,7 +10,7 @@ #ifndef _MD_CACHE_MESSAGES_H_ #define _MD_CACHE_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 02fc79e2267..a405be51f02 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -8,23 +8,22 @@ cases as published by the Free Software Foundation. */ -#include "timespec.h" -#include "glusterfs.h" -#include "defaults.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "syncop.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/defaults.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/syncop.h> #include "md-cache-mem-types.h" -#include "compat-errno.h" -#include "glusterfs-acl.h" -#include "defaults.h" -#include "upcall-utils.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/glusterfs-acl.h> +#include <glusterfs/defaults.h> +#include <glusterfs/upcall-utils.h> #include <assert.h> #include <sys/time.h> #include "md-cache-messages.h" -#include "statedump.h" -#include "atomic.h" +#include <glusterfs/statedump.h> +#include <glusterfs/atomic.h> /* TODO: - cache symlink() link names and nuke symlink-cache @@ -33,8 +32,7 @@ struct mdc_statfs_cache { pthread_mutex_t lock; - gf_boolean_t initialized; - struct timespec last_refreshed; + time_t last_refreshed; /* (time_t)-1 if not yet initialized. */ struct statvfs buf; }; @@ -61,8 +59,9 @@ struct mdc_statistics { }; struct mdc_conf { - int timeout; + uint32_t timeout; gf_boolean_t cache_posix_acl; + gf_boolean_t cache_glusterfs_acl; gf_boolean_t cache_selinux; gf_boolean_t cache_capability; gf_boolean_t cache_ima; @@ -70,6 +69,8 @@ struct mdc_conf { gf_boolean_t cache_swift_metadata; gf_boolean_t cache_samba_metadata; gf_boolean_t mdc_invalidation; + gf_boolean_t global_invalidation; + time_t last_child_down; gf_lock_t lock; struct mdc_statistics mdc_counter; @@ -100,16 +101,15 @@ struct md_cache { uint32_t md_nlink; uint32_t md_uid; uint32_t md_gid; - uint32_t md_atime; uint32_t md_atime_nsec; - uint32_t md_mtime; uint32_t md_mtime_nsec; - uint32_t md_ctime; uint32_t md_ctime_nsec; + int64_t md_atime; + int64_t md_mtime; + int64_t md_ctime; uint64_t md_rdev; uint64_t md_size; uint64_t md_blocks; - uint64_t invalidation_time; uint64_t generation; dict_t *xattr; char *linkname; @@ -130,6 +130,7 @@ struct mdc_local { char *key; dict_t *xattr; uint64_t incident_time; + bool update_cache; }; int @@ -166,7 +167,7 @@ out: } uint64_t -__mdc_get_generation(xlator_t *this, struct md_cache *mdc) +__mdc_inc_generation(xlator_t *this, struct md_cache *mdc) { uint64_t gen = 0, rollover; struct mdc_conf *conf = NULL; @@ -179,7 +180,6 @@ __mdc_get_generation(xlator_t *this, struct md_cache *mdc) gen = GF_ATOMIC_INC(conf->generation); mdc->ia_time = 0; mdc->generation = 0; - mdc->invalidation_time = gen - 1; } rollover = mdc->gen_rollover; @@ -188,7 +188,7 @@ __mdc_get_generation(xlator_t *this, struct md_cache *mdc) } uint64_t -mdc_get_generation(xlator_t *this, inode_t *inode) +mdc_inc_generation(xlator_t *this, inode_t *inode) { struct mdc_conf *conf = NULL; uint64_t gen = 0; @@ -201,7 +201,7 @@ mdc_get_generation(xlator_t *this, inode_t *inode) if (mdc) { LOCK(&mdc->lock); { - gen = __mdc_get_generation(this, mdc); + gen = __mdc_inc_generation(this, mdc); } UNLOCK(&mdc->lock); } else { @@ -214,6 +214,29 @@ mdc_get_generation(xlator_t *this, inode_t *inode) return gen; } +uint64_t +mdc_get_generation(xlator_t *this, inode_t *inode) +{ + struct mdc_conf *conf = NULL; + uint64_t gen = 0; + struct md_cache *mdc = NULL; + + conf = this->private; + + mdc_inode_ctx_get(this, inode, &mdc); + + if (mdc) { + LOCK(&mdc->lock); + { + gen = mdc->generation; + } + UNLOCK(&mdc->lock); + } else + gen = GF_ATOMIC_GET(conf->generation); + + return gen; +} + int __mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc) { @@ -351,10 +374,9 @@ unlock: static gf_boolean_t __is_cache_valid(xlator_t *this, time_t mdc_time) { - time_t now = 0; gf_boolean_t ret = _gf_true; struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; time_t last_child_down = 0; conf = this->private; @@ -368,15 +390,13 @@ __is_cache_valid(xlator_t *this, time_t mdc_time) last_child_down = conf->last_child_down; timeout = conf->timeout; - time(&now); - if ((mdc_time == 0) || ((last_child_down != 0) && (mdc_time < last_child_down))) { ret = _gf_false; goto out; } - if (now >= (mdc_time + timeout)) { + if (gf_time() >= (mdc_time + timeout)) { ret = _gf_false; } @@ -397,8 +417,7 @@ is_md_cache_iatt_valid(xlator_t *this, struct md_cache *mdc) ret = __is_cache_valid(this, mdc->ia_time); if (ret == _gf_false) { mdc->ia_time = 0; - mdc->invalidation_time = __mdc_get_generation(this, mdc) & - 0xffffffff; + mdc->generation = 0; } } } @@ -468,6 +487,8 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, struct md_cache *mdc = NULL; uint32_t rollover = 0; uint64_t gen = 0; + gf_boolean_t update_xa_time = _gf_false; + struct mdc_conf *conf = this->private; mdc = mdc_inode_prep(this, inode); if (!mdc) { @@ -488,8 +509,8 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, mdc->ia_time = 0; mdc->valid = 0; - gen = __mdc_get_generation(this, mdc); - mdc->invalidation_time = (gen & 0xffffffff); + gen = __mdc_inc_generation(this, mdc); + mdc->generation = (gen & 0xffffffff); goto unlock; } @@ -527,32 +548,38 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, * TODO: writev returns with a NULL iatt due to * performance/write-behind, causing invalidation on writes. */ - if (IA_ISREG(inode->ia_type) && - ((iatt->ia_mtime != mdc->md_mtime) || - (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) || - (iatt->ia_ctime != mdc->md_ctime) || - (iatt->ia_ctime_nsec != mdc->md_ctime_nsec))) - if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) || - (prebuf->ia_ctime_nsec != mdc->md_ctime_nsec) || - (prebuf->ia_mtime != mdc->md_mtime) || - (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec)) { - gf_msg_trace("md-cache", 0, - "prebuf doesn't " - "match the value we have cached," - " invalidate the inode(%s)", - uuid_utoa(inode->gfid)); - - inode_invalidate(inode); + if ((iatt->ia_mtime != mdc->md_mtime) || + (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) || + (iatt->ia_ctime != mdc->md_ctime) || + (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) { + if (conf->global_invalidation && + (!prebuf || (prebuf->ia_mtime != mdc->md_mtime) || + (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) || + (prebuf->ia_ctime != mdc->md_ctime) || + (prebuf->ia_ctime_nsec != mdc->md_ctime_nsec))) { + if (IA_ISREG(inode->ia_type)) { + gf_msg("md-cache", GF_LOG_TRACE, 0, + MD_CACHE_MSG_DISCARD_UPDATE, + "prebuf doesn't match the value we have cached," + " invalidate the inode(%s)", + uuid_utoa(inode->gfid)); + + inode_invalidate(inode); + } + } else { + update_xa_time = _gf_true; } + } if ((mdc->gen_rollover == rollover) && - ((incident_time > mdc->generation) && - (mdc->valid || (incident_time > mdc->invalidation_time)))) { + (incident_time >= mdc->generation)) { mdc_from_iatt(mdc, iatt); - mdc->generation = incident_time; mdc->valid = _gf_true; - if (update_time) - time(&mdc->ia_time); + if (update_time) { + mdc->ia_time = gf_time(); + if (mdc->xa_time && update_xa_time) + mdc->xa_time = mdc->ia_time; + } gf_msg_callingfn( "md-cache", GF_LOG_TRACE, 0, MD_CACHE_MSG_CACHE_UPDATE, @@ -565,13 +592,11 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, "not updating cache (%s)" "mdc-rollover=%u rollover=%u " "mdc-generation=%llu " - "mdc-ia_time=%llu incident_time=%llu " - "mdc-invalidation-time=%llu", + "mdc-ia_time=%llu incident_time=%llu ", uuid_utoa(iatt->ia_gfid), mdc->gen_rollover, rollover, (unsigned long long)mdc->generation, (unsigned long long)mdc->ia_time, - (unsigned long long)incident_time, - (unsigned long long)mdc->invalidation_time); + (unsigned long long)incident_time); } } unlock: @@ -684,25 +709,6 @@ updatefn(dict_t *dict, char *key, data_t *value, void *data) } } - /* posix xlator as part of listxattr will send both names - * and values of the xattrs in the dict. But as per man page - * listxattr is mainly supposed to send names of the all the - * xattrs. gfapi, as of now will put all the keys it obtained - * in the dict (sent by posix) into a buffer provided by the - * caller (thus the values of those xattrs are lost). If some - * xlator makes gfapi based calls (ex: snapview-server), then - * it has to unwind the calls by putting those names it got - * in the buffer again into the dict. But now it would not be - * having the values for those xattrs. So it might just put - * a 0 byte value ("") into the dict for each xattr and unwind - * the call. So the xlators which cache the xattrs (as of now - * md-cache caches the acl and selinux related xattrs), should - * not update their cache if the value of a xattr is a 0 byte - * data (i.e. ""). - */ - if (value->len == 1 && value->data[0] == '\0') - return 0; - if (dict_set(u->dict, key, value) < 0) { u->ret = -1; return -1; @@ -773,7 +779,7 @@ mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict) if (newdict) mdc->xattr = newdict; - time(&mdc->xa_time); + mdc->xa_time = gf_time(); gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld", uuid_utoa(inode->gfid), (long long)mdc->xa_time); } @@ -922,13 +928,13 @@ mdc_inode_iatt_invalidate(xlator_t *this, inode_t *inode) if (mdc_inode_ctx_get(this, inode, &mdc) != 0) goto out; - gen = mdc_get_generation(this, inode) & 0xffffffff; + gen = mdc_inc_generation(this, inode) & 0xffffffff; LOCK(&mdc->lock); { mdc->ia_time = 0; mdc->valid = _gf_false; - mdc->invalidation_time = gen; + mdc->generation = gen; } UNLOCK(&mdc->lock); @@ -969,12 +975,12 @@ mdc_update_gfid_stat(xlator_t *this, struct iatt *iatt) goto out; } ret = mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true, - mdc_get_generation(this, inode)); + mdc_inc_generation(this, inode)); out: return ret; } -void +static bool mdc_load_reqs(xlator_t *this, dict_t *dict) { struct mdc_conf *conf = this->private; @@ -983,6 +989,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) char *tmp = NULL; char *tmp1 = NULL; int ret = 0; + bool loaded = false; tmp1 = conf->mdc_xattr_str; if (!tmp1) @@ -1000,13 +1007,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) conf->mdc_xattr_str = NULL; gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE, "Disabled cache for xattrs, dict_set failed"); + goto out; } pattern = strtok_r(NULL, ",", &tmp); } - GF_FREE(mdc_xattr_str); + loaded = true; + out: - return; + GF_FREE(mdc_xattr_str); + + return loaded; } struct checkpair { @@ -1046,8 +1057,7 @@ mdc_cache_statfs(xlator_t *this, struct statvfs *buf) pthread_mutex_lock(&conf->statfs_cache.lock); { memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs)); - clock_gettime(CLOCK_MONOTONIC, &conf->statfs_cache.last_refreshed); - conf->statfs_cache.initialized = _gf_true; + conf->statfs_cache.last_refreshed = gf_time(); } pthread_mutex_unlock(&conf->statfs_cache.lock); } @@ -1056,8 +1066,7 @@ int mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) { struct mdc_conf *conf = this->private; - struct timespec now; - double cache_age = 0.0; + uint32_t cache_age = 0; int ret = 0; if (!buf || !conf) { @@ -1065,25 +1074,24 @@ mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) goto err; } + *buf = NULL; + pthread_mutex_lock(&conf->statfs_cache.lock); { - *buf = NULL; - - /* Skip if the cache is not initialized */ - if (!conf->statfs_cache.initialized) { + /* Skip if the cache is not initialized. */ + if (conf->statfs_cache.last_refreshed == (time_t)-1) { ret = -1; goto unlock; } - timespec_now(&now); + cache_age = (gf_time() - conf->statfs_cache.last_refreshed); - cache_age = (now.tv_sec - conf->statfs_cache.last_refreshed.tv_sec); - - gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %lf", cache_age); + gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs", + cache_age); if (cache_age > conf->timeout) { - /* Expire the cache */ + /* Expire the cache. */ gf_log(this->name, GF_LOG_DEBUG, - "Cache age %lf exceeded timeout %d", cache_age, + "Cache age %u secs exceeded timeout %u secs", cache_age, conf->timeout); ret = -1; goto unlock; @@ -1097,6 +1105,31 @@ err: return ret; } +static dict_t * +mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata) +{ + if (xdata != NULL) { + dict_ref(xdata); + } + + if (local == NULL) { + return xdata; + } + + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + local->update_cache = false; + + return NULL; + } + } + + local->update_cache = mdc_load_reqs(this, xdata); + + return xdata; +} + int mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct statvfs *buf, @@ -1165,7 +1198,7 @@ uncached: return 0; out: - STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata); + MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata); return 0; } @@ -1179,6 +1212,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; + if (!local) + goto out; + if (op_ret != 0) { if (op_errno == ENOENT) GF_ATOMIC_INC(conf->mdc_counter.negative_lookup); @@ -1196,9 +1232,6 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - if (!local) - goto out; - if (local->loc.parent) { mdc_inode_iatt_set(this, local->loc.parent, postparent, local->incident_time); @@ -1206,7 +1239,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->loc.inode) { mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, dict); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, dict); + } } out: MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict, @@ -1225,7 +1260,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) 0, }; dict_t *xattr_rsp = NULL; - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; struct mdc_conf *conf = this->private; @@ -1276,18 +1310,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xdata); if (xattr_rsp) dict_unref(xattr_rsp); - if (xattr_alloc) - dict_unref(xattr_alloc); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1310,6 +1344,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, xdata); + } out: MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); @@ -1346,9 +1383,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; uncached: + xdata = mdc_prepare_request(this, local, xdata); + GF_ATOMIC_INC(conf->mdc_counter.stat_miss); STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, loc, xdata); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1371,6 +1415,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->fd->inode, xdata); + } out: MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); @@ -1390,7 +1437,7 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) if (!local) goto uncached; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); ret = mdc_inode_iatt_get(this, fd->inode, &stbuf); if (ret != 0) @@ -1402,9 +1449,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) return 0; uncached: + xdata = mdc_prepare_request(this, local, xdata); + GF_ATOMIC_INC(conf->mdc_counter.stat_miss); STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd, xdata); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1443,8 +1497,9 @@ mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - local->loc.inode = inode_ref(loc->inode); + if (local != NULL) { + local->loc.inode = inode_ref(loc->inode); + } STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); @@ -1487,8 +1542,9 @@ mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); @@ -1522,7 +1578,6 @@ mdc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (local->loc.inode) { mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, local->xattr); } out: MDC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, @@ -1537,9 +1592,10 @@ mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); @@ -1573,7 +1629,6 @@ mdc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (local->loc.inode) { mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, local->xattr); } out: MDC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent, @@ -1588,9 +1643,10 @@ mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); @@ -1647,8 +1703,9 @@ mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); + if (local != NULL) { + loc_copy(&local->loc, loc); + } STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); @@ -1701,8 +1758,9 @@ mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); + if (local != NULL) { + loc_copy(&local->loc, loc); + } STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata); @@ -1749,13 +1807,22 @@ mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { mdc_local_t *local = NULL; + char *name; + name = gf_strdup(linkname); + if (name == NULL) { + goto wind; + } local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + GF_FREE(name); + goto wind; + } loc_copy(&local->loc, loc); + local->linkname = name; - local->linkname = gf_strdup(linkname); - +wind: STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); return 0; @@ -1813,9 +1880,10 @@ mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, mdc_local_t *local = NULL; local = mdc_local_get(frame, oldloc->inode); - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); + if (local != NULL) { + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + } STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); @@ -1864,9 +1932,10 @@ mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, mdc_local_t *local = NULL; local = mdc_local_get(frame, oldloc->inode); - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); + if (local != NULL) { + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + } STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); @@ -1901,7 +1970,6 @@ mdc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->loc.inode) { mdc_inode_iatt_set(this, inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, local->xattr); } out: MDC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, preparent, @@ -1916,9 +1984,10 @@ mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, @@ -1965,8 +2034,9 @@ mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, } local = mdc_local_get(frame, loc->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } out: STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this), @@ -2007,8 +2077,9 @@ mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); @@ -2049,8 +2120,9 @@ mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, @@ -2066,17 +2138,17 @@ mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, mdc_local_t *local = NULL; local = frame->local; + if (!local) + goto out; if (op_ret != 0) { mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time); goto out; } - if (!local) - goto out; - mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf, _gf_true, local->incident_time); + mdc_inode_xatt_update(this, local->loc.inode, xdata); out: MDC_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata); @@ -2089,13 +2161,47 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int valid, dict_t *xdata) { mdc_local_t *local = NULL; + dict_t *xattr_alloc = NULL; + int ret = 0; + struct mdc_conf *conf = this->private; local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + goto wind; + } loc_copy(&local->loc, loc); + if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) { + if (!xdata) + xdata = xattr_alloc = dict_new(); + if (xdata) { + ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0); + if (!ret) + ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0); + if (ret) + mdc_inode_xatt_invalidate(this, local->loc.inode); + } + } + + if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) { + if (!xdata) + xdata = xattr_alloc = dict_new(); + if (xdata) { + ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0); + if (!ret) + ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0); + if (ret) + mdc_inode_xatt_invalidate(this, local->loc.inode); + } + } + +wind: STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + + if (xattr_alloc) + dict_unref(xattr_alloc); return 0; } @@ -2118,6 +2224,7 @@ mdc_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf, _gf_true, local->incident_time); + mdc_inode_xatt_update(this, local->fd->inode, xdata); out: MDC_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, prebuf, postbuf, xdata); @@ -2130,13 +2237,47 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int valid, dict_t *xdata) { mdc_local_t *local = NULL; + dict_t *xattr_alloc = NULL; + int ret = 0; + struct mdc_conf *conf = this->private; local = mdc_local_get(frame, fd->inode); + if (local == NULL) { + goto wind; + } - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); + if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) { + if (!xdata) + xdata = xattr_alloc = dict_new(); + if (xdata) { + ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0); + if (!ret) + ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0); + if (ret) + mdc_inode_xatt_invalidate(this, local->fd->inode); + } + } + + if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) { + if (!xdata) + xdata = xattr_alloc = dict_new(); + if (xdata) { + ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0); + if (!ret) + ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0); + if (ret) + mdc_inode_xatt_invalidate(this, local->fd->inode); + } + } + +wind: STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + + if (xattr_alloc) + dict_unref(xattr_alloc); return 0; } @@ -2173,8 +2314,9 @@ mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); @@ -2229,9 +2371,10 @@ mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xattr); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xattr); + } STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata); @@ -2287,9 +2430,10 @@ mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); - local->xattr = dict_ref(xattr); + if (local != NULL) { + local->fd = __fd_ref(fd); + local->xattr = dict_ref(xattr); + } STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata); @@ -2313,7 +2457,15 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - mdc_inode_xatt_update(this, local->loc.inode, xattr); + if (dict_get(xattr, "glusterfs.skip-cache")) { + gf_msg(this->name, GF_LOG_DEBUG, 0, 0, + "Skipping xattr update due to empty value"); + goto out; + } + + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, xdata); + } out: MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); @@ -2330,15 +2482,19 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, mdc_local_t *local = NULL; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + gf_boolean_t key_satisfied = _gf_false; local = mdc_local_get(frame, loc->inode); - if (!local) + if (!local) { goto uncached; + } loc_copy(&local->loc, loc); - if (!is_mdc_key_satisfied(this, key)) + if (!is_mdc_key_satisfied(this, key)) { goto uncached; + } + key_satisfied = _gf_true; ret = mdc_inode_xatt_get(this, loc->inode, &xattr); if (ret != 0) @@ -2352,12 +2508,24 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, GF_ATOMIC_INC(conf->mdc_counter.xattr_hit); MDC_STACK_UNWIND(getxattr, frame, ret, op_errno, xattr, xdata); + if (xattr) + dict_unref(xattr); + return 0; uncached: + if (key_satisfied) { + xdata = mdc_prepare_request(this, local, xdata); + } + GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, loc, key, xdata); + + if (key_satisfied && (xdata != NULL)) { + dict_unref(xdata); + } + return 0; } @@ -2378,7 +2546,15 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - mdc_inode_xatt_update(this, local->fd->inode, xattr); + if (dict_get(xattr, "glusterfs.skip-cache")) { + gf_msg(this->name, GF_LOG_DEBUG, 0, 0, + "Skipping xattr update due to empty value"); + goto out; + } + + if (local->update_cache) { + mdc_inode_xatt_set(this, local->fd->inode, xdata); + } out: MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata); @@ -2395,15 +2571,18 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, dict_t *xattr = NULL; int op_errno = ENODATA; struct mdc_conf *conf = this->private; + gf_boolean_t key_satisfied = _gf_true; local = mdc_local_get(frame, fd->inode); if (!local) goto uncached; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); - if (!is_mdc_key_satisfied(this, key)) + if (!is_mdc_key_satisfied(this, key)) { + key_satisfied = _gf_false; goto uncached; + } ret = mdc_inode_xatt_get(this, fd->inode, &xattr); if (ret != 0) @@ -2417,12 +2596,24 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, GF_ATOMIC_INC(conf->mdc_counter.xattr_hit); MDC_STACK_UNWIND(fgetxattr, frame, ret, op_errno, xattr, xdata); + if (xattr) + dict_unref(xattr); + return 0; uncached: + if (key_satisfied) { + xdata = mdc_prepare_request(this, local, xdata); + } + GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); + + if (key_satisfied && (xdata != NULL)) { + dict_unref(xdata); + } + return 0; } @@ -2478,12 +2669,21 @@ mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int ret = 0; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + char *name2; + + name2 = gf_strdup(name); + if (name2 == NULL) { + goto uncached; + } local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + GF_FREE(name2); + goto uncached; + } loc_copy(&local->loc, loc); - - local->key = gf_strdup(name); + local->key = name2; if (!is_mdc_key_satisfied(this, name)) goto uncached; @@ -2492,13 +2692,20 @@ mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret != 0) goto uncached; + GF_ATOMIC_INC(conf->mdc_counter.xattr_hit); + if (!xattr || !dict_get(xattr, (char *)name)) { ret = -1; op_errno = ENODATA; + + MDC_STACK_UNWIND(removexattr, frame, ret, op_errno, xdata); + } else { + STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); } - GF_ATOMIC_INC(conf->mdc_counter.xattr_hit); - MDC_STACK_UNWIND(removexattr, frame, ret, op_errno, xdata); + if (xattr) + dict_unref(xattr); return 0; @@ -2562,12 +2769,21 @@ mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = 0; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + char *name2; - local = mdc_local_get(frame, fd->inode); + name2 = gf_strdup(name); + if (name2 == NULL) { + goto uncached; + } - local->fd = fd_ref(fd); + local = mdc_local_get(frame, fd->inode); + if (local == NULL) { + GF_FREE(name2); + goto uncached; + } - local->key = gf_strdup(name); + local->fd = __fd_ref(fd); + local->key = name2; if (!is_mdc_key_satisfied(this, name)) goto uncached; @@ -2576,13 +2792,21 @@ mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret != 0) goto uncached; + GF_ATOMIC_INC(conf->mdc_counter.xattr_hit); + if (!xattr || !dict_get(xattr, (char *)name)) { ret = -1; op_errno = ENODATA; + + MDC_STACK_UNWIND(fremovexattr, frame, ret, op_errno, xdata); + } else { + STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); } - GF_ATOMIC_INC(conf->mdc_counter.xattr_hit); - MDC_STACK_UNWIND(fremovexattr, frame, ret, op_errno, xdata); + if (xattr) + dict_unref(xattr); + return 0; uncached: @@ -2617,27 +2841,23 @@ int mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - - if (!xdata) - xdata = xattr_alloc = dict_new(); - - if (xdata) { - /* Tell readdir-ahead to include these keys in xdata when it - * internally issues readdirp() in it's opendir_cbk */ - mdc_load_reqs(this, xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); } + /* Tell readdir-ahead to include these keys in xdata when it + * internally issues readdirp() in it's opendir_cbk */ + xdata = mdc_prepare_request(this, local, xdata); + STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } return 0; } @@ -2665,7 +2885,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, continue; mdc_inode_iatt_set(this, entry->inode, &entry->d_stat, local->incident_time); - mdc_inode_xatt_set(this, entry->inode, entry->dict); + if (local->update_cache) { + mdc_inode_xatt_set(this, entry->inode, entry->dict); + } } unwind: @@ -2677,27 +2899,26 @@ int mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); if (!local) goto out; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; out: - STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); + MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); return 0; } @@ -2725,7 +2946,6 @@ int mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - int need_unref = 0; mdc_local_t *local = NULL; struct mdc_conf *conf = this->private; @@ -2733,7 +2953,7 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, if (!local) goto unwind; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if (!conf->force_readdirp) { STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this), @@ -2741,19 +2961,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; } - if (!xdata) { - xdata = dict_new(); - need_unref = 1; - } - - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - if (need_unref && xdata) + if (xdata != NULL) { dict_unref(xdata); + } return 0; unwind: @@ -2795,7 +3010,9 @@ mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, @@ -2837,7 +3054,9 @@ mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); @@ -2878,7 +3097,9 @@ mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); @@ -2960,7 +3181,7 @@ mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, if (!local) goto unwind; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata); @@ -3134,9 +3355,11 @@ mdc_xattr_list_populate(struct mdc_conf *conf, char *tmp_str) strcat(mdc_xattr_str, "security.ima,"); if (conf->cache_posix_acl) - strcat(mdc_xattr_str, POSIX_ACL_ACCESS_XATTR - "," POSIX_ACL_DEFAULT_XATTR "," GF_POSIX_ACL_ACCESS - "," GF_POSIX_ACL_DEFAULT ","); + strcat(mdc_xattr_str, + POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR ","); + + if (conf->cache_glusterfs_acl) + strcat(mdc_xattr_str, GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT ","); if (conf->cache_swift_metadata) strcat(mdc_xattr_str, "user.swift.metadata,"); @@ -3227,7 +3450,7 @@ mdc_invalidate(xlator_t *this, void *data) } if (up_ci->flags & IATT_UPDATE_FLAGS) { - gen = mdc_get_generation(this, inode); + gen = mdc_inc_generation(this, inode); ret = mdc_inode_iatt_set_validate(this, inode, NULL, &up_ci->stat, _gf_false, gen); /* one of the scenarios where ret < 0 is when this invalidate @@ -3331,7 +3554,12 @@ mdc_register_xattr_inval(xlator_t *this) goto out; } - mdc_load_reqs(this, xattr); + if (!mdc_load_reqs(this, xattr)) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY, + "failed to populate cache entries"); + ret = -1; + goto out; + } frame = create_frame(this, this->ctx->pool); if (!frame) { @@ -3380,7 +3608,7 @@ int mdc_reconfigure(xlator_t *this, dict_t *options) { struct mdc_conf *conf = NULL; - int timeout = 0; + int timeout = 0, ret = 0; char *tmp_str = NULL; conf = this->private; @@ -3397,6 +3625,9 @@ mdc_reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("cache-posix-acl", conf->cache_posix_acl, options, bool, out); + GF_OPTION_RECONF("cache-glusterfs-acl", conf->cache_glusterfs_acl, options, + bool, out); + GF_OPTION_RECONF("cache-swift-metadata", conf->cache_swift_metadata, options, bool, out); @@ -3409,12 +3640,18 @@ mdc_reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options, bool, out); + GF_OPTION_RECONF("global-cache-invalidation", conf->global_invalidation, + options, bool, out); + GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out); GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out); GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out); - mdc_xattr_list_populate(conf, tmp_str); + + ret = mdc_xattr_list_populate(conf, tmp_str); + if (ret < 0) + goto out; /* If timeout is greater than 60s (default before the patch that added * cache invalidation support was added) then, cache invalidation @@ -3427,25 +3664,22 @@ mdc_reconfigure(xlator_t *this, dict_t *options) } conf->timeout = timeout; - (void)mdc_register_xattr_inval(this); + ret = mdc_register_xattr_inval(this); out: - return 0; + return ret; } int32_t mdc_mem_acct_init(xlator_t *this) { - int ret = -1; - - ret = xlator_mem_acct_init(this, gf_mdc_mt_end + 1); - return ret; + return xlator_mem_acct_init(this, gf_mdc_mt_end + 1); } int mdc_init(xlator_t *this) { struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; char *tmp_str = NULL; conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t); @@ -3457,7 +3691,7 @@ mdc_init(xlator_t *this) LOCK_INIT(&conf->lock); - GF_OPTION_INIT("md-cache-timeout", timeout, int32, out); + GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out); GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out); @@ -3468,6 +3702,8 @@ mdc_init(xlator_t *this) GF_OPTION_INIT("cache-posix-acl", conf->cache_posix_acl, bool, out); + GF_OPTION_INIT("cache-glusterfs-acl", conf->cache_glusterfs_acl, bool, out); + GF_OPTION_INIT("cache-swift-metadata", conf->cache_swift_metadata, bool, out); @@ -3478,6 +3714,9 @@ mdc_init(xlator_t *this) GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out); + GF_OPTION_INIT("global-cache-invalidation", conf->global_invalidation, bool, + out); + GF_OPTION_INIT("pass-through", this->pass_through, bool, out); pthread_mutex_init(&conf->statfs_cache.lock, NULL); @@ -3486,7 +3725,9 @@ mdc_init(xlator_t *this) GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out); mdc_xattr_list_populate(conf, tmp_str); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); + conf->statfs_cache.last_refreshed = (time_t)-1; + /* initialize gf_atomic_t counters */ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0); GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0); @@ -3517,7 +3758,7 @@ out: } void -mdc_update_child_down_time(xlator_t *this, time_t *now) +mdc_update_child_down_time(xlator_t *this, time_t now) { struct mdc_conf *conf = NULL; @@ -3525,7 +3766,7 @@ mdc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); } @@ -3535,14 +3776,12 @@ mdc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; struct mdc_conf *conf = NULL; - time_t now = 0; conf = this->private; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - mdc_update_child_down_time(this, &now); + mdc_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->mdc_invalidation) @@ -3616,6 +3855,14 @@ struct xlator_dumpops mdc_dumpops = { struct volume_options mdc_options[] = { { + .key = {"md-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable md-cache", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { .key = {"cache-selinux"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false", @@ -3644,7 +3891,7 @@ struct volume_options mdc_options[] = { { .key = {"cache-swift-metadata"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "true", + .default_value = "false", .op_version = {GD_OP_VERSION_3_7_10}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, .description = "Cache swift metadata (user.swift.metadata xattr)", @@ -3669,6 +3916,16 @@ struct volume_options mdc_options[] = { "system.posix_acl_default) on client side", }, { + .key = {"cache-glusterfs-acl"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .description = "Cache virtual glusterfs ACL xattrs " + "(glusterfs.posix.acl, glusterfs.posix.default_acl) " + "on client side", + }, + { .key = {"md-cache-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 0, @@ -3697,6 +3954,29 @@ struct volume_options mdc_options[] = { " on receiving the cache-invalidation notifications", }, { + .key = {"global-cache-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .description = + "When \"on\", purges all read caches in kernel and glusterfs stack " + "whenever a stat change is detected. Stat changes can be detected " + "while processing responses to file operations (fop) or through " + "upcall notifications. Since purging caches can be an expensive " + "operation, it's advised to have this option \"on\" only when a " + "file " + "can be accessed from multiple different Glusterfs mounts and " + "caches across these different mounts are required to be coherent. " + "If a file is not accessed across different mounts " + "(simple example is having only one mount for a volume), its " + "advised to keep " + "this option \"off\" as all file modifications go through caches " + "keeping them " + "coherent. This option overrides value of " + "performance.cache-invalidation.", + }, + { .key = {"md-cache-statfs"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c index b057ed4b5d6..29b99b5b8ea 100644 --- a/xlators/performance/nl-cache/src/nl-cache-helper.c +++ b/xlators/performance/nl-cache/src/nl-cache-helper.c @@ -10,7 +10,7 @@ #include "nl-cache.h" #include "timer-wheel.h" -#include "statedump.h" +#include <glusterfs/statedump.h> /* Caching guidelines: * This xlator serves negative lookup(ENOENT lookups) from the cache, @@ -113,7 +113,7 @@ out: } void -nlc_update_child_down_time(xlator_t *this, time_t *now) +nlc_update_child_down_time(xlator_t *this, time_t now) { nlc_conf_t *conf = NULL; @@ -121,7 +121,7 @@ nlc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); @@ -145,12 +145,10 @@ nlc_disable_cache(xlator_t *this) } static int -__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, - nlc_pe_t **nlc_pe_p) +__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) { int ret = 0; nlc_ctx_t *nlc_ctx = NULL; - nlc_pe_t *nlc_pe = NULL; uint64_t nlc_ctx_int = 0; uint64_t nlc_pe_int = 0; @@ -159,10 +157,6 @@ __nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, nlc_ctx = (void *)(long)(nlc_ctx_int); *nlc_ctx_p = nlc_ctx; } - if (ret == 0 && nlc_pe_p) { - nlc_pe = (void *)(long)(nlc_pe_int); - *nlc_pe_p = nlc_pe; - } return ret; } @@ -170,30 +164,32 @@ static int nlc_inode_ctx_set(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx, nlc_pe_t *nlc_pe_p) { + uint64_t ctx1, ctx2; int ret = -1; + ctx1 = (uint64_t)(uintptr_t)nlc_ctx; + ctx2 = (uint64_t)(uintptr_t)nlc_pe_p; + /* The caller may choose to set one of the ctxs, hence check * if the ctx1/2 is non zero and then send the address. If we * blindly send the address of both the ctxs, it may reset the * ctx the caller had sent NULL(intended as leave untouched) for.*/ LOCK(&inode->lock); { - ret = __inode_ctx_set2(inode, this, nlc_ctx ? (uint64_t *)&nlc_ctx : 0, - nlc_pe_p ? (uint64_t *)&nlc_pe_p : 0); + ret = __inode_ctx_set2(inode, this, ctx1 ? &ctx1 : 0, ctx2 ? &ctx2 : 0); } UNLOCK(&inode->lock); return ret; } static void -nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, - nlc_pe_t **nlc_pe_p) +nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) { int ret = 0; LOCK(&inode->lock); { - ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p, nlc_pe_p); + ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p); if (ret < 0) gf_msg_debug(this->name, 0, "inode ctx get failed for " @@ -243,6 +239,8 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) int ret = -1; conf = this->private; + if (!nlc_ctx) + goto out; LOCK(&nlc_ctx->lock); { @@ -264,7 +262,7 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) if (nlc_ctx->timer) { gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer, conf->cache_timeout); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); goto unlock; } @@ -283,14 +281,14 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) } unlock: UNLOCK(&nlc_ctx->lock); - +out: return; } static nlc_ctx_t * -nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, - nlc_pe_t **nlc_pe_p) +nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) { + uint64_t ctx; int ret = 0; nlc_ctx_t *nlc_ctx = NULL; nlc_conf_t *conf = NULL; @@ -299,7 +297,7 @@ nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, LOCK(&inode->lock); { - ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx, nlc_pe_p); + ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx); if (nlc_ctx) goto unlock; @@ -321,7 +319,8 @@ nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, goto unlock; } - ret = __inode_ctx_set2(inode, this, (uint64_t *)&nlc_ctx, NULL); + ctx = (uint64_t)(uintptr_t)nlc_ctx; + ret = __inode_ctx_set2(inode, this, &ctx, NULL); if (ret) { gf_msg(this->name, GF_LOG_ERROR, ENOMEM, NLC_MSG_NO_MEMORY, "inode ctx set failed"); @@ -408,7 +407,7 @@ nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state) goto out; } - nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get_set(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -428,7 +427,7 @@ nlc_cache_timeout_handler(struct gf_tw_timer_list *timer, void *data, nlc_timer_data_t *tmp = data; nlc_ctx_t *nlc_ctx = NULL; - nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx, NULL); + nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -497,7 +496,7 @@ __nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) nlc_ctx->timer_data = tmp; gf_tw_add_timer(conf->timer_wheel, timer); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); gf_msg_trace(this->name, 0, "Registering timer:%p, inode:%p, " "gfid:%s", @@ -694,7 +693,7 @@ nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason) { nlc_ctx_t *nlc_ctx = NULL; - nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -881,7 +880,7 @@ nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name) goto out; } - nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get_set(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -912,7 +911,7 @@ nlc_dir_remove_pe(xlator_t *this, inode_t *parent, inode_t *entry_ino, goto out; } - nlc_inode_ctx_get(this, parent, &nlc_ctx, NULL); + nlc_inode_ctx_get(this, parent, &nlc_ctx); if (!nlc_ctx) goto out; @@ -943,7 +942,7 @@ nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino, goto out; } - nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get_set(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -1049,7 +1048,7 @@ nlc_is_negative_lookup(xlator_t *this, loc_t *loc) goto out; } - nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -1100,7 +1099,7 @@ nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname, goto out; } - nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -1150,7 +1149,7 @@ nlc_dump_inodectx(xlator_t *this, inode_t *inode) nlc_ne_t *ne = NULL; nlc_ne_t *tmp1 = NULL; - nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); + nlc_inode_ctx_get(this, inode, &nlc_ctx); if (!nlc_ctx) goto out; @@ -1174,7 +1173,7 @@ nlc_dump_inodectx(xlator_t *this, inode_t *inode) gf_proc_dump_write("state", "%" PRIu64, nlc_ctx->state); gf_proc_dump_write("timer", "%p", nlc_ctx->timer); - gf_proc_dump_write("cache-time", "%" GF_PRI_TIME, nlc_ctx->cache_time); + gf_proc_dump_write("cache-time", "%ld", nlc_ctx->cache_time); gf_proc_dump_write("cache-size", "%zu", nlc_ctx->cache_size); gf_proc_dump_write("refd-inodes", "%" PRIu64, nlc_ctx->refd_inodes); diff --git a/xlators/performance/nl-cache/src/nl-cache-mem-types.h b/xlators/performance/nl-cache/src/nl-cache-mem-types.h index bc61eeca9da..93a17b3fd5a 100644 --- a/xlators/performance/nl-cache/src/nl-cache-mem-types.h +++ b/xlators/performance/nl-cache/src/nl-cache-mem-types.h @@ -11,11 +11,10 @@ #ifndef __NL_CACHE_MEM_TYPES_H__ #define __NL_CACHE_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_nlc_mem_types_ { - gf_nlc_mt_conf_t = gf_common_mt_end + 1, - gf_nlc_mt_nlc_conf_t, + gf_nlc_mt_nlc_conf_t = gf_common_mt_end + 1, gf_nlc_mt_nlc_ctx_t, gf_nlc_mt_nlc_local_t, gf_nlc_mt_nlc_pe_t, diff --git a/xlators/performance/nl-cache/src/nl-cache-messages.h b/xlators/performance/nl-cache/src/nl-cache-messages.h index 17fd96de17c..222d709e133 100644 --- a/xlators/performance/nl-cache/src/nl-cache-messages.h +++ b/xlators/performance/nl-cache/src/nl-cache-messages.h @@ -11,7 +11,7 @@ #ifndef __NL_CACHE_MESSAGES_H__ #define __NL_CACHE_MESSAGES_H__ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c index dc66341728e..33a7c471663 100644 --- a/xlators/performance/nl-cache/src/nl-cache.c +++ b/xlators/performance/nl-cache/src/nl-cache.c @@ -9,8 +9,8 @@ */ #include "nl-cache.h" -#include "statedump.h" -#include "upcall-utils.h" +#include <glusterfs/statedump.h> +#include <glusterfs/upcall-utils.h> static void nlc_dentry_op(call_frame_t *frame, xlator_t *this, gf_boolean_t multilink) @@ -520,15 +520,13 @@ int nlc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; - time_t now = 0; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: case GF_EVENT_CHILD_UP: case GF_EVENT_SOME_DESCENDENT_UP: - time(&now); - nlc_update_child_down_time(this, &now); + nlc_update_child_down_time(this, gf_time()); /* TODO: nlc_clear_all_cache (this); else lru prune will lazily clear it*/ break; @@ -731,7 +729,7 @@ nlc_init(xlator_t *this) GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0); INIT_LIST_HEAD(&conf->lru); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx); if (!conf->timer_wheel) { @@ -779,6 +777,14 @@ struct xlator_dumpops nlc_dumpops = { struct volume_options nlc_options[] = { { + .key = {"nl-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable nl-cache", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { .key = {"nl-cache-positive-entry"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false", diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h index 9c9682f6396..85fcc176342 100644 --- a/xlators/performance/nl-cache/src/nl-cache.h +++ b/xlators/performance/nl-cache/src/nl-cache.h @@ -13,10 +13,10 @@ #include "nl-cache-mem-types.h" #include "nl-cache-messages.h" -#include "glusterfs.h" -#include "xlator.h" -#include "defaults.h" -#include "atomic.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/atomic.h> #define NLC_INVALID 0x0000 #define NLC_PE_FULL 0x0001 @@ -155,7 +155,7 @@ nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, loc_t *loc, loc_t *loc2); void -nlc_update_child_down_time(xlator_t *this, time_t *now); +nlc_update_child_down_time(xlator_t *this, time_t now); void nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason); diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h index b16a678cbf4..6c1ab2e19d2 100644 --- a/xlators/performance/open-behind/src/open-behind-mem-types.h +++ b/xlators/performance/open-behind/src/open-behind-mem-types.h @@ -11,7 +11,7 @@ #ifndef __OB_MEM_TYPES_H__ #define __OB_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_ob_mem_types_ { gf_ob_mt_fd_t = gf_common_mt_end + 1, diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h index 09b4e8f0c28..0e789177684 100644 --- a/xlators/performance/open-behind/src/open-behind-messages.h +++ b/xlators/performance/open-behind/src/open-behind-messages.h @@ -10,7 +10,7 @@ #ifndef _OPEN_BEHIND_MESSAGES_H_ #define _OPEN_BEHIND_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * @@ -23,6 +23,10 @@ */ GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED, - OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY); + OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY, + OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE); + +#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop" +#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state" #endif /* _OPEN_BEHIND_MESSAGES_H_ */ diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index fdfbca450d6..600c3b62ffe 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -9,12 +9,24 @@ */ #include "open-behind-mem-types.h" -#include "xlator.h" -#include "statedump.h" -#include "call-stub.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/statedump.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> #include "open-behind-messages.h" -#include "glusterfs-acl.h" +#include <glusterfs/glusterfs-acl.h> + +/* Note: The initial design of open-behind was made to cover the simple case + * of open, read, close for small files. This pattern combined with + * quick-read can do the whole operation without a single request to the + * bricks (except the initial lookup). + * + * The way to do this has been improved, but the logic remains the same. + * Basically, this means that any operation sent to the fd or the inode + * that it's not a read, causes the open request to be sent to the + * bricks, and all future operations will be executed synchronously, + * including opens (it's reset once all fd's are closed). + */ typedef struct ob_conf { gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe @@ -32,1085 +44,811 @@ typedef struct ob_conf { */ } ob_conf_t; -typedef struct ob_inode { - inode_t *inode; - struct list_head resume_fops; - struct list_head ob_fds; - int count; - int op_ret; - int op_errno; - gf_boolean_t open_in_progress; - int unlinked; -} ob_inode_t; +/* A negative state represents an errno value negated. In this case the + * current operation cannot be processed. */ +typedef enum _ob_state { + /* There are no opens on the inode or the first open is already + * completed. The current operation can be sent directly. */ + OB_STATE_READY = 0, -typedef struct ob_fd { - call_frame_t *open_frame; - loc_t loc; - dict_t *xdata; - int flags; - int op_errno; - ob_inode_t *ob_inode; - fd_t *fd; - gf_boolean_t opened; - gf_boolean_t ob_inode_fops_waiting; - struct list_head list; - struct list_head ob_fds_on_inode; -} ob_fd_t; + /* There's an open pending and it has been triggered. The current + * operation should be "stubbified" and processed with + * ob_stub_dispatch(). */ + OB_STATE_OPEN_TRIGGERED, -ob_inode_t * -ob_inode_alloc(inode_t *inode) -{ - ob_inode_t *ob_inode = NULL; - - ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); - if (ob_inode == NULL) - goto out; + /* There's an open pending but it has not been triggered. The current + * operation can be processed directly but using an anonymous fd. */ + OB_STATE_OPEN_PENDING, - ob_inode->inode = inode; - INIT_LIST_HEAD(&ob_inode->resume_fops); - INIT_LIST_HEAD(&ob_inode->ob_fds); -out: - return ob_inode; -} + /* The current operation is the first open on the inode. */ + OB_STATE_FIRST_OPEN +} ob_state_t; -void -ob_inode_free(ob_inode_t *ob_inode) -{ - if (ob_inode == NULL) - goto out; +typedef struct ob_inode { + /* List of stubs pending on the first open. Once the first open is + * complete, all these stubs will be resubmitted, and dependencies + * will be checked again. */ + struct list_head resume_fops; - list_del_init(&ob_inode->resume_fops); - list_del_init(&ob_inode->ob_fds); + /* The inode this object references. */ + inode_t *inode; - GF_FREE(ob_inode); -out: - return; -} + /* The fd from the first open sent to this inode. It will be set + * from the moment the open is processed until the open if fully + * executed or closed before actually opened. It's NULL in all + * other cases. */ + fd_t *first_fd; + + /* The stub from the first open operation. When open fop starts + * being processed, it's assigned the OB_OPEN_PREPARING value + * until the actual stub is created. This is necessary to avoid + * creating the stub inside a locked region. Once the stub is + * successfully created, it's assigned here. This value is set + * to NULL once the stub is resumed. */ + call_stub_t *first_open; + + /* The total number of currently open fd's on this inode. */ + int32_t open_count; + + /* This flag is set as soon as we know that the open will be + * sent to the bricks, even before the stub is ready. */ + bool triggered; +} ob_inode_t; -ob_inode_t * -ob_inode_get(xlator_t *this, inode_t *inode) +/* Dummy pointer used temporarily while the actual open stub is being created */ +#define OB_OPEN_PREPARING ((call_stub_t *)-1) + +#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \ + case OB_STATE_FIRST_OPEN: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \ + "fop=%s", #_fop, "state=%d", __ob_state, NULL); \ + default_##_fop##_failure_cbk(_frame, EINVAL); \ + break; \ + case OB_STATE_READY: \ + default_##_fop(_frame, _xl, ##_args); \ + break; \ + case OB_STATE_OPEN_TRIGGERED: { \ + call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \ + ##_args); \ + if (__ob_stub != NULL) { \ + ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + default: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \ + OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \ + default_##_fop##_failure_cbk(_frame, -__ob_state) + +#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + if (!(_trigger)) { \ + fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \ + (_fd)->flags); \ + if (__ob_fd != NULL) { \ + default_##_fop(_frame, _xl, ##_args); \ + fd_unref(__ob_fd); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \ + break; \ + OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_inode( \ + _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +static ob_inode_t * +ob_inode_get_locked(xlator_t *this, inode_t *inode) { ob_inode_t *ob_inode = NULL; uint64_t value = 0; - int ret = 0; - if (!inode) - goto out; + if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) { + return (ob_inode_t *)(uintptr_t)value; + } - LOCK(&inode->lock); - { - __inode_ctx_get(inode, this, &value); - if (value == 0) { - ob_inode = ob_inode_alloc(inode); - if (ob_inode == NULL) - goto unlock; - - value = (uint64_t)((void *)ob_inode); - ret = __inode_ctx_set(inode, this, &value); - if (ret < 0) { - ob_inode_free(ob_inode); - ob_inode = NULL; - } - } else { - ob_inode = (ob_inode_t *)value; + ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); + if (ob_inode != NULL) { + ob_inode->inode = inode; + INIT_LIST_HEAD(&ob_inode->resume_fops); + + value = (uint64_t)(uintptr_t)ob_inode; + if (__inode_ctx_set(inode, this, &value) < 0) { + GF_FREE(ob_inode); + ob_inode = NULL; } } -unlock: - UNLOCK(&inode->lock); -out: return ob_inode; } -ob_fd_t * -__ob_fd_ctx_get(xlator_t *this, fd_t *fd) +static ob_state_t +ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd, + int32_t open_count, bool synchronous, bool trigger, + ob_inode_t **pob_inode, fd_t **pfd) { - uint64_t value = 0; - int ret = -1; - ob_fd_t *ob_fd = NULL; - - ret = __fd_ctx_get(fd, this, &value); - if (ret) - return NULL; + ob_conf_t *conf; + ob_inode_t *ob_inode; + call_stub_t *open_stub; - ob_fd = (void *)((long)value); + if (inode == NULL) { + return OB_STATE_READY; + } - return ob_fd; -} + conf = xl->private; -ob_fd_t * -ob_fd_ctx_get(xlator_t *this, fd_t *fd) -{ - ob_fd_t *ob_fd = NULL; + *pfd = NULL; - LOCK(&fd->lock); + LOCK(&inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - } - UNLOCK(&fd->lock); + ob_inode = ob_inode_get_locked(xl, inode); + if (ob_inode == NULL) { + UNLOCK(&inode->lock); - return ob_fd; -} - -int -__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - uint64_t value = 0; - int ret = -1; + return -ENOMEM; + } + *pob_inode = ob_inode; + + ob_inode->open_count += open_count; + + /* If first_fd is not NULL, it means that there's a previous open not + * yet completed. */ + if (ob_inode->first_fd != NULL) { + *pfd = ob_inode->first_fd; + /* If the current request doesn't trigger the open and it hasn't + * been triggered yet, we can continue without issuing the open + * only if the current request belongs to the same fd as the + * first one. */ + if (!trigger && !ob_inode->triggered && + (ob_inode->first_fd == fd)) { + UNLOCK(&inode->lock); + + return OB_STATE_OPEN_PENDING; + } - value = (long)((void *)ob_fd); + /* We need to issue the open. It could have already been triggered + * before. In this case open_stub will be NULL. Or the initial open + * may not be completely ready yet. In this case open_stub will be + * OB_OPEN_PREPARING. */ + open_stub = ob_inode->first_open; + ob_inode->first_open = NULL; + ob_inode->triggered = true; - ret = __fd_ctx_set(fd, this, value); + UNLOCK(&inode->lock); - return ret; -} + if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) { + call_resume(open_stub); + } -int -ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - int ret = -1; + return OB_STATE_OPEN_TRIGGERED; + } - LOCK(&fd->lock); - { - ret = __ob_fd_ctx_set(this, fd, ob_fd); - } - UNLOCK(&fd->lock); + /* There's no pending open. Only opens can be non synchronous, so all + * regular fops will be processed directly. For non synchronous opens, + * we'll still process them normally (i.e. synchornous) if there are + * more file descriptors open. */ + if (synchronous || (ob_inode->open_count > open_count)) { + UNLOCK(&inode->lock); - return ret; -} + return OB_STATE_READY; + } -ob_fd_t * -ob_fd_new(void) -{ - ob_fd_t *ob_fd = NULL; + *pfd = fd; - ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t); + /* This is the first open. We keep a reference on the fd and set + * first_open stub to OB_OPEN_PREPARING until the actual stub can + * be assigned (we don't create the stub here to avoid doing memory + * allocations inside the mutex). */ + ob_inode->first_fd = __fd_ref(fd); + ob_inode->first_open = OB_OPEN_PREPARING; - INIT_LIST_HEAD(&ob_fd->list); - INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode); + /* If lazy_open is not set, we'll need to immediately send the open, + * so we set triggered right now. */ + ob_inode->triggered = !conf->lazy_open; + } + UNLOCK(&inode->lock); - return ob_fd; + return OB_STATE_FIRST_OPEN; } -void -ob_fd_free(ob_fd_t *ob_fd) +static ob_state_t +ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count, + bool synchronous, bool trigger, ob_inode_t **pob_inode, + fd_t **pfd) { - LOCK(&ob_fd->fd->inode->lock); - { - list_del_init(&ob_fd->ob_fds_on_inode); - } - UNLOCK(&ob_fd->fd->inode->lock); + uint64_t err; - loc_wipe(&ob_fd->loc); - - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); - - if (ob_fd->open_frame) - STACK_DESTROY(ob_fd->open_frame->root); + if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) { + return (ob_state_t)-err; + } - GF_FREE(ob_fd); + return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous, + trigger, pob_inode, pfd); } -int -ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, fd_t *fd_ret, dict_t *xdata) +static ob_state_t +ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode, + fd_t **pfd) { - fd_t *fd = NULL; - int count = 0; - int ob_inode_op_ret = 0; - int ob_inode_op_errno = 0; - ob_fd_t *ob_fd = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t ob_inode_fops_waiting = _gf_false; - struct list_head fops_waiting_on_fd, fops_waiting_on_inode; - - fd = frame->local; - frame->local = NULL; + bool synchronous; - INIT_LIST_HEAD(&fops_waiting_on_fd); - INIT_LIST_HEAD(&fops_waiting_on_inode); + /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't + * we also execute this open synchronously ? */ + synchronous = (flags & O_TRUNC) != 0; - ob_inode = ob_inode_get(this, fd->inode); + return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd); +} - LOCK(&fd->lock); +static int32_t +ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + LOCK(&ob_inode->inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - ob_fd->opened = _gf_true; - - ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting; - - list_splice_init(&ob_fd->list, &fops_waiting_on_fd); - - if (op_ret < 0) { - /* mark fd BAD for ever */ - ob_fd->op_errno = op_errno; - ob_fd = NULL; /*shouldn't be freed*/ - } else { - __fd_ctx_del(fd, this, NULL); - } - } - UNLOCK(&fd->lock); - - if (ob_inode_fops_waiting) { - LOCK(&fd->inode->lock); - { - count = --ob_inode->count; - if (op_ret < 0) { - /* TODO: when to reset the error? */ - ob_inode->op_ret = -1; - ob_inode->op_errno = op_errno; - } - - if (count == 0) { - ob_inode->open_in_progress = _gf_false; - ob_inode_op_ret = ob_inode->op_ret; - ob_inode_op_errno = ob_inode->op_errno; - list_splice_init(&ob_inode->resume_fops, - &fops_waiting_on_inode); - } + /* We only queue a stub if the open has not been completed or + * cancelled. */ + if (ob_inode->first_fd == fd) { + list_add_tail(&stub->list, &ob_inode->resume_fops); + stub = NULL; } - UNLOCK(&fd->inode->lock); - } - - if (ob_fd) - ob_fd_free(ob_fd); - - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list) - { - list_del_init(&stub->list); - - if (op_ret < 0) - call_unwind_error(stub, -1, op_errno); - else - call_resume(stub); } + UNLOCK(&ob_inode->inode->lock); - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list) - { - list_del_init(&stub->list); - - if (ob_inode_op_ret < 0) - call_unwind_error(stub, -1, ob_inode_op_errno); - else - call_resume(stub); + if (stub != NULL) { + call_resume(stub); } - fd_unref(fd); - - STACK_DESTROY(frame->root); - return 0; } -int -ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +static void +ob_open_destroy(call_stub_t *stub, fd_t *fd) { - call_frame_t *frame = NULL; + stub->frame->local = NULL; + STACK_DESTROY(stub->frame->root); + call_stub_destroy(stub); + fd_unref(fd); +} - if (ob_fd == NULL) { - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; +static int32_t +ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + bool closed; - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - } - unlock: - UNLOCK(&fd->lock); - } else { - LOCK(&fd->lock); - { - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; + LOCK(&ob_inode->inode->lock); + { + closed = ob_inode->first_fd != fd; + if (!closed) { + if (ob_inode->triggered) { + ob_inode->first_open = NULL; + } else { + ob_inode->first_open = stub; + stub = NULL; + } } - UNLOCK(&fd->lock); } + UNLOCK(&ob_inode->inode->lock); - if (frame) { - frame->local = fd_ref(fd); - - STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, - ob_fd->xdata); + if (stub != NULL) { + if (closed) { + ob_open_destroy(stub, fd); + } else { + call_resume(stub); + } } return 0; } -void -ob_inode_wake(xlator_t *this, struct list_head *ob_fds) +static void +ob_resume_pending(struct list_head *list) { - ob_fd_t *ob_fd = NULL, *tmp = NULL; - fd_t *fd = NULL; + call_stub_t *stub; - list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) - { - ob_fd_wake(this, ob_fd->fd, ob_fd); - fd = ob_fd->fd; - ob_fd_free(ob_fd); - fd_unref(fd); - } -} + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); -/* called holding inode->lock and fd->lock */ -void -ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) -{ - if (!src || !dst) - goto out; - - dst->fd = __fd_ref(src->fd); - dst->loc.inode = inode_ref(src->loc.inode); - gf_uuid_copy(dst->loc.gfid, src->loc.gfid); - dst->flags = src->flags; - dst->xdata = dict_ref(src->xdata); - dst->ob_inode = src->ob_inode; -out: - return; + call_resume(stub); + } } -int -open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode, - call_stub_t *stub) +static void +ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret, + int32_t op_errno) { - ob_inode_t *ob_inode = NULL; - ob_fd_t *ob_fd = NULL, *tmp = NULL; - gf_boolean_t was_open_in_progress = _gf_false; - gf_boolean_t wait_for_open = _gf_false; - struct list_head ob_fds = { - 0, - }; + struct list_head list; - ob_inode = ob_inode_get(this, inode); - if (ob_inode == NULL) - goto out; + INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&ob_fds); + if (op_ret < 0) { + fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno); + } - LOCK(&inode->lock); + LOCK(&ob_inode->inode->lock); { - was_open_in_progress = ob_inode->open_in_progress; - ob_inode->unlinked = 1; - - if (was_open_in_progress) { - list_add_tail(&stub->list, &ob_inode->resume_fops); - goto inode_unlock; - } - - list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode) - { - LOCK(&ob_fd->fd->lock); - { - if (ob_fd->opened) - goto fd_unlock; - - ob_inode->count++; - ob_fd->ob_inode_fops_waiting = _gf_true; - - if (ob_fd->open_frame == NULL) { - /* open in progress no need of wake */ - } else { - tmp = ob_fd_new(); - tmp->open_frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - - ob_fd_copy(ob_fd, tmp); - list_add_tail(&tmp->ob_fds_on_inode, &ob_fds); - } - } - fd_unlock: - UNLOCK(&ob_fd->fd->lock); - } - - if (ob_inode->count) { - wait_for_open = ob_inode->open_in_progress = _gf_true; - list_add_tail(&stub->list, &ob_inode->resume_fops); + /* Only update the fields if the file has not been closed before + * getting here. */ + if (ob_inode->first_fd == fd) { + list_splice_init(&ob_inode->resume_fops, &list); + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; } } -inode_unlock: - UNLOCK(&inode->lock); + UNLOCK(&ob_inode->inode->lock); -out: - if (!was_open_in_progress) { - if (!wait_for_open) { - call_resume(stub); - } else { - ob_inode_wake(this, &ob_fds); - } - } + ob_resume_pending(&list); - return 0; + fd_unref(fd); } -int -open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub) +static int32_t +ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int op_errno = 0; - - if (!fd) - goto nofd; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; + ob_inode_t *ob_inode; - if (ob_fd->op_errno) { - op_errno = ob_fd->op_errno; - goto unlock; - } + ob_inode = frame->local; + frame->local = NULL; - list_add_tail(&stub->list, &ob_fd->list); - } -unlock: - UNLOCK(&fd->lock); + ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno); -nofd: - if (op_errno) - call_unwind_error(stub, -1, op_errno); - else if (ob_fd) - ob_fd_wake(this, fd, NULL); - else - call_resume(stub); + STACK_DESTROY(frame->root); return 0; } -int -ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, +static int32_t +ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int ret = -1; - ob_conf_t *conf = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t open_in_progress = _gf_false; - int unlinked = 0; + STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - conf = this->private; + return 0; +} - if (flags & O_TRUNC) { - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; +static int32_t +ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) +{ + ob_inode_t *ob_inode; + call_frame_t *open_frame; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * or the current flags require a synchronous open. */ + return default_open(frame, this, loc, flags, fd, xdata); } - ob_inode = ob_inode_get(this, fd->inode); - - ob_fd = ob_fd_new(); - if (!ob_fd) - goto enomem; - - ob_fd->ob_inode = ob_inode; + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } - /* don't do fd_ref, it'll cause leaks */ - ob_fd->fd = fd; + state = -ENOMEM; + } - ob_fd->open_frame = copy_frame(frame); - if (!ob_fd->open_frame) - goto enomem; - ret = loc_copy(&ob_fd->loc, loc); - if (ret) - goto enomem; + if (state == OB_STATE_FIRST_OPEN) { + /* We try to create a stub for the new open. A new frame needs to be + * used because the current one may be destroyed soon after sending + * the open's reply. */ + open_frame = copy_frame(frame); + if (open_frame != NULL) { + stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd, + xdata); + if (stub != NULL) { + open_frame->local = ob_inode; - ob_fd->flags = flags; - if (xdata) - ob_fd->xdata = dict_ref(xdata); + /* TODO: Previous version passed xdata back to the caller, but + * probably this doesn't make sense since it won't contain + * any requested data. I think it would be better to pass + * NULL for xdata. */ + default_open_cbk(frame, NULL, this, 0, 0, fd, xdata); - LOCK(&fd->inode->lock); - { - open_in_progress = ob_inode->open_in_progress; - unlinked = ob_inode->unlinked; - if (!open_in_progress && !unlinked) { - ret = ob_fd_ctx_set(this, fd, ob_fd); - if (ret) { - UNLOCK(&fd->inode->lock); - goto enomem; + return ob_open_dispatch(this, ob_inode, first_fd, stub); } - list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds); + STACK_DESTROY(open_frame->root); } - } - UNLOCK(&fd->inode->lock); - - if (!open_in_progress && !unlinked) { - fd_ref(fd); - STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + /* In case of error, simulate a regular completion but with an error + * code. */ + ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM); - if (!conf->lazy_open) - ob_fd_wake(this, fd, NULL); - - fd_unref(fd); - } else { - ob_fd_free(ob_fd); - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + state = -ENOMEM; } - return 0; -enomem: - if (ob_fd) { - if (ob_fd->open_frame) - STACK_DESTROY(ob_fd->open_frame->root); - - loc_wipe(&ob_fd->loc); - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - GF_FREE(ob_fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); - return -1; + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "open", "path=%s", loc->path, NULL); + + return default_open_failure_cbk(frame, -state); } -int -ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, - dict_t *xdata) +static int32_t +ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - fd_t *old_fd = NULL; - int ret = -1; - int op_errno = 0; - call_stub_t *stub = NULL; - - old_fd = fd_lookup(fd->inode, 0); - if (old_fd) { - /* open-behind only when this is the first FD */ - stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); - if (!stub) { - op_errno = ENOMEM; - fd_unref(old_fd); - goto err; - } - - open_and_resume(this, old_fd, stub); - - fd_unref(old_fd); - - return 0; - } - - ret = ob_open_behind(frame, this, loc, flags, fd, xdata); - if (ret) { - op_errno = ENOMEM; - goto err; + ob_inode_t *ob_inode; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + /* Create requests are never delayed. We always send them synchronously. */ + state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode, + &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * so we simply forward the request synchronously. */ + return default_create(frame, this, loc, flags, mode, umask, fd, xdata); } - return 0; -err: - gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s", - loc->path); - - STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0); - - return 0; -} + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd, + xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } -fd_t * -ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag) -{ - fd_t *wind_fd = NULL; - ob_fd_t *ob_fd = NULL; - ob_conf_t *conf = NULL; + state = -ENOMEM; + } - conf = this->private; + /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never + * be returned by ob_open_and_resume_fd(). If we are here it can only be + * because there has been a problem. */ - ob_fd = ob_fd_ctx_get(this, fd); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) { - wind_fd = fd_anonymous(fd->inode); - if ((ob_fd->flags & O_DIRECT) && (flag)) - *flag = *flag | O_DIRECT; - } else { - wind_fd = fd_ref(fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); + + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "create", "path=%s", loc->path, NULL); - return wind_fd; + return default_create_failure_cbk(frame, -state); } -int +static int32_t ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - ob_conf_t *conf = NULL; - - conf = this->private; - - if (!conf->read_after_open) - wind_fd = ob_get_wind_fd(this, fd, &flags); - else - wind_fd = fd_ref(fd); - - stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset, - flags, xdata); - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); + ob_conf_t *conf = this->private; + bool trigger = conf->read_after_open || !conf->use_anonymous_fd; - return 0; -err: - STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata); return 0; } -int +static int32_t ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, int count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset, - flags, iobref, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags, + iobref, xdata); return 0; } -int +static int32_t ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata); - - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata); return 0; } -int -ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +static int32_t +ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - call_stub_t *stub = NULL; - ob_fd_t *ob_fd = NULL; - gf_boolean_t unwind = _gf_false; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (ob_fd && ob_fd->open_frame) - /* if open() was never wound to backend, - no need to wind flush() either. - */ - unwind = _gf_true; - } - UNLOCK(&fd->lock); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - if (unwind) - goto unwind; - - stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0); + OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata); return 0; +} -unwind: - STACK_UNWIND_STRICT(flush, frame, 0, 0, 0); +static int32_t +ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + OB_POST_FLUSH(this, frame, fd, fd, xdata); return 0; } -int +static int32_t ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata); return 0; } -int +static int32_t ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata); return 0; } -int +static int32_t ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, int flags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata); return 0; } -int +static int32_t ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd, - flock, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); + OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd, - basename, cmd, type, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); + OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type, + xdata); return 0; } -int +static int32_t ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, optype, xattr, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata); return 0; } -int +static int32_t ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt, int valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata); return 0; } -int +static int32_t ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, - len, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); + OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata); return 0; -err: - STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; } -int +static int32_t ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, - xdata); - if (!stub) - goto err; + OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata); - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len, - xdata); - if (!stub) - goto err; + OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata); - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata); return 0; } -int +static int32_t ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, dst->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); + OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata); return 0; } -int32_t +static int32_t ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, - xdata); - if (!stub) - goto err; + OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid, + xdata); - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int32_t +static int32_t ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - gf_boolean_t access_xattr = _gf_false; - if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) || dict_get(dict, POSIX_ACL_ACCESS_XATTR) || - dict_get(dict, GF_SELINUX_XATTR_KEY)) - access_xattr = _gf_true; - - if (!access_xattr) + dict_get(dict, GF_SELINUX_XATTR_KEY)) { return default_setxattr(frame, this, loc, dict, flags, xdata); + } - stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, - xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); + OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags, + xdata); return 0; -err: - STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); - return 0; } -int -ob_release(xlator_t *this, fd_t *fd) +static void +ob_fdclose(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; + struct list_head list; + ob_inode_t *ob_inode; + call_stub_t *stub; - ob_fd = ob_fd_ctx_get(this, fd); + INIT_LIST_HEAD(&list); + stub = NULL; - ob_fd_free(ob_fd); + LOCK(&fd->inode->lock); + { + ob_inode = ob_inode_get_locked(this, fd->inode); + if (ob_inode != NULL) { + ob_inode->open_count--; + + /* If this fd is the same as ob_inode->first_fd, it means that + * the initial open has not fully completed. We'll try to cancel + * it. */ + if (ob_inode->first_fd == fd) { + if (ob_inode->first_open == OB_OPEN_PREPARING) { + /* In this case ob_open_dispatch() has not been called yet. + * We clear first_fd and first_open to allow that function + * to know that the open is not really needed. This also + * allows other requests to work as expected if they + * arrive before the dispatch function is called. If there + * are pending fops, we can directly process them here. + * (note that there shouldn't be any fd related fops, but + * if there are, it's fine if they fail). */ + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; + list_splice_init(&ob_inode->resume_fops, &list); + } else if (!ob_inode->triggered) { + /* If the open has already been dispatched, we can only + * cancel it if it has not been triggered. Otherwise we + * simply wait until it completes. While it's not triggered, + * first_open must be a valid stub and there can't be any + * pending fops. */ + GF_ASSERT((ob_inode->first_open != NULL) && + list_empty(&ob_inode->resume_fops)); + + ob_inode->first_fd = NULL; + stub = ob_inode->first_open; + ob_inode->first_open = NULL; + } + } + } + } + UNLOCK(&fd->inode->lock); - return 0; + if (stub != NULL) { + ob_open_destroy(stub, fd); + } + + ob_resume_pending(&list); } int ob_forget(xlator_t *this, inode_t *inode) { - ob_inode_t *ob_inode = NULL; + ob_inode_t *ob_inode; uint64_t value = 0; - inode_ctx_del(inode, this, &value); - - if (value) { - ob_inode = (ob_inode_t *)((void *)value); - ob_inode_free(ob_inode); + if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { + ob_inode = (ob_inode_t *)(uintptr_t)value; + GF_FREE(ob_inode); } return 0; @@ -1142,20 +880,18 @@ ob_priv_dump(xlator_t *this) int ob_fdctx_dump(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - int ret = 0; + uint64_t value = 0; + int ret = 0, error = 0; ret = TRY_LOCK(&fd->lock); if (ret) return 0; - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) { - UNLOCK(&fd->lock); - return 0; + if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) { + error = (int32_t)value; } gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind", @@ -1164,17 +900,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd) gf_proc_dump_write("fd", "%p", fd); - gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame); - - if (ob_fd->open_frame) - gf_proc_dump_write("open_frame.root.unique", "%" PRIu64, - ob_fd->open_frame->root->unique); - - gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path); - - gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid)); - - gf_proc_dump_write("flags", "%d", ob_fd->flags); + gf_proc_dump_write("error", "%d", error); UNLOCK(&fd->lock); @@ -1271,11 +997,13 @@ fini(xlator_t *this) struct xlator_fops fops = { .open = ob_open, + .create = ob_create, .readv = ob_readv, .writev = ob_writev, .flush = ob_flush, .fsync = ob_fsync, .fstat = ob_fstat, + .seek = ob_seek, .ftruncate = ob_ftruncate, .fsetxattr = ob_fsetxattr, .setxattr = ob_setxattr, @@ -1295,7 +1023,7 @@ struct xlator_fops fops = { }; struct xlator_cbks cbks = { - .release = ob_release, + .fdclose = ob_fdclose, .forget = ob_forget, }; @@ -1306,6 +1034,14 @@ struct xlator_dumpops dumpops = { struct volume_options options[] = { { + .key = {"open-behind"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable open-behind", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { .key = {"use-anonymous-fd"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "no", @@ -1349,3 +1085,17 @@ struct volume_options options[] = { {.key = {NULL}} }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "open-behind", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h index 0ebd7e81c3a..e4aef8549ff 100644 --- a/xlators/performance/quick-read/src/quick-read-mem-types.h +++ b/xlators/performance/quick-read/src/quick-read-mem-types.h @@ -11,17 +11,13 @@ #ifndef __QR_MEM_TYPES_H__ #define __QR_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_qr_mem_types_ { gf_qr_mt_qr_inode_t = gf_common_mt_end + 1, gf_qr_mt_content_t, - gf_qr_mt_qr_fd_ctx_t, - gf_qr_mt_iovec, - gf_qr_mt_qr_conf_t, gf_qr_mt_qr_priority_t, gf_qr_mt_qr_private_t, - gf_qr_mt_qr_unlink_ctx_t, gf_qr_mt_end }; #endif diff --git a/xlators/performance/quick-read/src/quick-read-messages.h b/xlators/performance/quick-read/src/quick-read-messages.h index 745eabbc664..da9724a3c9c 100644 --- a/xlators/performance/quick-read/src/quick-read-messages.h +++ b/xlators/performance/quick-read/src/quick-read-messages.h @@ -10,7 +10,7 @@ #ifndef _QUICK_READ_MESSAGES_H_ #define _QUICK_READ_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 265abc8f3b1..7fe4b3c3a4b 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -10,10 +10,10 @@ #include <math.h> #include "quick-read.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include "quick-read-messages.h" -#include "upcall-utils.h" -#include "atomic.h" +#include <glusterfs/upcall-utils.h> +#include <glusterfs/atomic.h> typedef struct qr_local { inode_t *inode; @@ -449,8 +449,7 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec; qr_inode->buf = *buf; - - gettimeofday(&qr_inode->last_refresh, NULL); + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } @@ -520,9 +519,7 @@ __qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) { qr_inode->buf = *buf; - - gettimeofday(&qr_inode->last_refresh, NULL); - + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } else { __qr_inode_prune(this, table, qr_inode, gen); @@ -554,20 +551,14 @@ __qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode) { qr_conf_t *conf = NULL; qr_private_t *priv = NULL; - struct timeval now; - struct timeval diff; priv = this->private; conf = &priv->conf; - gettimeofday(&now, NULL); - - timersub(&now, &qr_inode->last_refresh, &diff); - - if (qr_inode->last_refresh.tv_sec < priv->last_child_down) + if (qr_inode->last_refresh < priv->last_child_down) return _gf_false; - if (diff.tv_sec >= conf->cache_timeout) + if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout) return _gf_false; return _gf_true; @@ -1030,7 +1021,7 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - char buf[256] = { + char buf[GF_TIMESTR_SIZE] = { 0, }; @@ -1045,12 +1036,8 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) gf_proc_dump_write("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); - if (qr_inode->last_refresh.tv_sec) { - gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh.tv_sec, - gf_timefmt_FT); - snprintf(buf + strlen(buf), sizeof buf - strlen(buf), - ".%" GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec); - + if (qr_inode->last_refresh) { + gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT); gf_proc_dump_write("last-cache-validation-time", "%s", buf); } @@ -1214,8 +1201,8 @@ qr_reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out); - GF_OPTION_RECONF("cache-invalidation", conf->qr_invalidation, options, bool, - out); + GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation, + options, bool, out); GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options, bool, out); @@ -1365,7 +1352,8 @@ qr_init(xlator_t *this) GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out); - GF_OPTION_INIT("cache-invalidation", conf->qr_invalidation, bool, out); + GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool, + out); GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out); if (!check_cache_size_ok(this, conf->cache_size)) { @@ -1402,7 +1390,7 @@ qr_init(xlator_t *this) ret = 0; - time(&priv->last_child_down); + priv->last_child_down = gf_time(); GF_ATOMIC_INIT(priv->generation, 0); this->private = priv; out: @@ -1452,7 +1440,7 @@ qr_conf_destroy(qr_conf_t *conf) } void -qr_update_child_down_time(xlator_t *this, time_t *now) +qr_update_child_down_time(xlator_t *this, time_t now) { qr_private_t *priv = NULL; @@ -1460,7 +1448,7 @@ qr_update_child_down_time(xlator_t *this, time_t *now) LOCK(&priv->lock); { - priv->last_child_down = *now; + priv->last_child_down = now; } UNLOCK(&priv->lock); } @@ -1506,7 +1494,6 @@ qr_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; qr_private_t *priv = NULL; - time_t now = 0; qr_conf_t *conf = NULL; priv = this->private; @@ -1515,8 +1502,7 @@ qr_notify(xlator_t *this, int event, void *data, ...) switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - qr_update_child_down_time(this, &now); + qr_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->qr_invalidation) @@ -1577,6 +1563,14 @@ struct xlator_dumpops qr_dumpops = { }; struct volume_options qr_options[] = { + { + .key = {"quick-read"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable quick-read", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, {.key = {"priority"}, .type = GF_OPTION_TYPE_ANY}, {.key = {"cache-size"}, .type = GF_OPTION_TYPE_SIZET, @@ -1603,7 +1597,7 @@ struct volume_options qr_options[] = { .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, }, { - .key = {"cache-invalidation"}, + .key = {"quick-read-cache-invalidation"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false", .op_version = {GD_OP_VERSION_4_0_0}, diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h index ae99863b426..20fcc70b3a7 100644 --- a/xlators/performance/quick-read/src/quick-read.h +++ b/xlators/performance/quick-read/src/quick-read.h @@ -11,16 +11,16 @@ #ifndef __QUICK_READ_H #define __QUICK_READ_H -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "list.h" -#include "compat.h" -#include "compat-errno.h" -#include "common-utils.h" -#include "call-stub.h" -#include "defaults.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/list.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> #include <libgen.h> #include <sys/time.h> #include <sys/types.h> @@ -39,7 +39,7 @@ struct qr_inode { uint32_t ia_ctime_nsec; uint32_t gen_rollover; struct iatt buf; - struct timeval last_refresh; + time_t last_refresh; struct list_head lru; uint64_t gen; uint64_t invalidation_time; diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c index c4071393c95..8a58ad8bb7a 100644 --- a/xlators/performance/read-ahead/src/page.c +++ b/xlators/performance/read-ahead/src/page.c @@ -8,10 +8,10 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "read-ahead.h" #include <assert.h> #include "read-ahead-messages.h" @@ -25,7 +25,7 @@ ra_page_get(ra_file_t *file, off_t offset) GF_VALIDATE_OR_GOTO("read-ahead", file, out); page = file->pages.next; - rounded_offset = floor(offset, file->page_size); + rounded_offset = gf_floor(offset, file->page_size); while (page != &file->pages && page->offset < rounded_offset) page = page->next; @@ -47,7 +47,7 @@ ra_page_create(ra_file_t *file, off_t offset) GF_VALIDATE_OR_GOTO("read-ahead", file, out); page = file->pages.next; - rounded_offset = floor(offset, file->page_size); + rounded_offset = gf_floor(offset, file->page_size); while (page != &file->pages && page->offset < rounded_offset) page = page->next; @@ -347,19 +347,15 @@ ra_frame_fill(ra_page_t *page, call_frame_t *frame) new->size = copy_size; new->iobref = iobref_ref(page->iobref); new->count = iov_subset(page->vector, page->count, src_offset, - src_offset + copy_size, NULL); - new->vector = GF_CALLOC(new->count, sizeof(struct iovec), - gf_ra_mt_iovec); - if (new->vector == NULL) { + copy_size, &new->vector, 0); + if (new->count < 0) { local->op_ret = -1; local->op_errno = ENOMEM; + iobref_unref(new->iobref); GF_FREE(new); goto out; } - new->count = iov_subset(page->vector, page->count, src_offset, - src_offset + copy_size, new->vector); - new->next = fill; new->prev = new->next->prev; new->next->prev = new; diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h index 239e574506a..f07cfc5bba5 100644 --- a/xlators/performance/read-ahead/src/read-ahead-mem-types.h +++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h @@ -11,7 +11,7 @@ #ifndef __RA_MEM_TYPES_H__ #define __RA_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_ra_mem_types_ { gf_ra_mt_ra_file_t = gf_common_mt_end + 1, diff --git a/xlators/performance/read-ahead/src/read-ahead-messages.h b/xlators/performance/read-ahead/src/read-ahead-messages.h index 500d3246946..0302b7a7122 100644 --- a/xlators/performance/read-ahead/src/read-ahead-messages.h +++ b/xlators/performance/read-ahead/src/read-ahead-messages.h @@ -10,7 +10,7 @@ #ifndef _READ_AHEAD_MESSAGES_H_ #define _READ_AHEAD_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index c62bd1bb172..5246e1317d2 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -15,12 +15,12 @@ - ensure efficient memory management in case of random seek */ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "read-ahead.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include <assert.h> #include <sys/time.h> #include "read-ahead-messages.h" @@ -268,7 +268,7 @@ read_ahead(call_frame_t *frame, ra_file_t *file) } ra_size = file->page_size * file->page_count; - ra_offset = floor(file->offset, file->page_size); + ra_offset = gf_floor(file->offset, file->page_size); cap = file->size ? file->size : file->offset + ra_size; while (ra_offset < min(file->offset + ra_size, cap)) { @@ -354,8 +354,8 @@ dispatch_requests(call_frame_t *frame, ra_file_t *file) local = frame->local; conf = file->conf; - rounded_offset = floor(local->offset, file->page_size); - rounded_end = roof(local->offset + local->size, file->page_size); + rounded_offset = gf_floor(local->offset, file->page_size); + rounded_end = gf_roof(local->offset + local->size, file->page_size); trav_offset = rounded_offset; @@ -509,14 +509,14 @@ ra_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, dispatch_requests(frame, file); - flush_region(frame, file, 0, floor(offset, file->page_size), 0); + flush_region(frame, file, 0, gf_floor(offset, file->page_size), 0); read_ahead(frame, file); - ra_frame_return(frame); - file->offset = offset + size; + ra_frame_return(frame); + return 0; unwind: @@ -1021,7 +1021,6 @@ ra_priv_dump(xlator_t *this) char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - gf_boolean_t add_section = _gf_false; if (!this) { goto out; @@ -1037,7 +1036,6 @@ ra_priv_dump(xlator_t *this) gf_proc_dump_build_key(key_prefix, "xlator.performance.read-ahead", "priv"); gf_proc_dump_add_section("%s", key_prefix); - add_section = _gf_true; ret = pthread_mutex_trylock(&conf->conf_lock); if (ret) @@ -1053,9 +1051,6 @@ ra_priv_dump(xlator_t *this) ret = 0; out: if (ret && conf) { - if (add_section == _gf_false) - gf_proc_dump_add_section("%s", key_prefix); - gf_proc_dump_write("Unable to dump priv", "(Lock acquisition failed) %s", this->name); } @@ -1222,6 +1217,14 @@ struct xlator_dumpops dumpops = { }; struct volume_options options[] = { + { + .key = {"read-ahead"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable read-ahead", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, {.key = {"force-atime-update"}, .type = GF_OPTION_TYPE_BOOL, .op_version = {1}, @@ -1253,3 +1256,17 @@ struct volume_options options[] = { .description = "Enable/Disable read ahead translator"}, {.key = {NULL}}, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "read-ahead", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h index 4e99853cc32..e9432fb47cc 100644 --- a/xlators/performance/read-ahead/src/read-ahead.h +++ b/xlators/performance/read-ahead/src/read-ahead.h @@ -11,11 +11,11 @@ #ifndef __READ_AHEAD_H #define __READ_AHEAD_H -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/common-utils.h> #include "read-ahead-mem-types.h" struct ra_conf; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h index e2462b48833..498ffae7f64 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h @@ -11,7 +11,7 @@ #ifndef __RDA_MEM_TYPES_H__ #define __RDA_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_rda_mem_types_ { gf_rda_mt_rda_local = gf_common_mt_end + 1, diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h index c9ce16307e6..28ec14dd845 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h @@ -10,7 +10,7 @@ #ifndef _READDIR_AHEAD_MESSAGES_H_ #define _READDIR_AHEAD_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c index 98c1a3f7259..4ba7ee7077a 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.c +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -24,12 +24,12 @@ */ #include <math.h> -#include "glusterfs.h" -#include "xlator.h" -#include "call-stub.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> #include "readdir-ahead.h" #include "readdir-ahead-mem-types.h" -#include "defaults.h" +#include <glusterfs/defaults.h> #include "readdir-ahead-messages.h" static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); @@ -68,13 +68,13 @@ get_rda_fd_ctx(fd_t *fd, xlator_t *this) /* ctx offset values initialized to 0 */ ctx->xattrs = NULL; - if (__fd_ctx_set(fd, this, (uint64_t)ctx) < 0) { + if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx) < 0) { GF_FREE(ctx); ctx = NULL; goto out; } } else { - ctx = (struct rda_fd_ctx *)val; + ctx = (struct rda_fd_ctx *)(uintptr_t)val; } out: UNLOCK(&fd->lock); @@ -90,7 +90,7 @@ __rda_inode_ctx_get(inode_t *inode, xlator_t *this) ret = __inode_ctx_get1(inode, this, &ctx_uint); if (ret == 0) - return (rda_inode_ctx_t *)ctx_uint; + return (rda_inode_ctx_t *)(uintptr_t)ctx_uint; ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_rda_mt_inode_ctx_t); if (!ctx_p) @@ -98,7 +98,8 @@ __rda_inode_ctx_get(inode_t *inode, xlator_t *this) GF_ATOMIC_INIT(ctx_p->generation, 0); - ret = __inode_ctx_set1(inode, this, (uint64_t *)&ctx_p); + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set1(inode, this, &ctx_uint); if (ret < 0) { GF_FREE(ctx_p); return NULL; @@ -129,7 +130,10 @@ __rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this, * An example of this case can be response of write request * that is cached in write-behind. */ - tmp_stat = ctx_p->statbuf; + if (stbuf_in) + tmp_stat = *stbuf_in; + else + tmp_stat = ctx_p->statbuf; memset(&ctx_p->statbuf, 0, sizeof(ctx_p->statbuf)); gf_uuid_copy(ctx_p->statbuf.ia_gfid, tmp_stat.ia_gfid); ctx_p->statbuf.ia_type = tmp_stat.ia_type; @@ -145,7 +149,8 @@ __rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this, goto out; } } else { - if (generation != GF_ATOMIC_GET(ctx_p->generation)) + if ((generation != -1) && + (generation != GF_ATOMIC_GET(ctx_p->generation))) goto out; } @@ -201,6 +206,61 @@ rda_reset_ctx(xlator_t *this, struct rda_fd_ctx *ctx) } } +static void +rda_mark_inode_dirty(xlator_t *this, inode_t *inode) +{ + inode_t *parent = NULL; + fd_t *fd = NULL; + uint64_t val = 0; + int32_t ret = 0; + struct rda_fd_ctx *fd_ctx = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + parent = inode_parent(inode, NULL, NULL); + if (parent) { + LOCK(&parent->lock); + { + list_for_each_entry(fd, &parent->fd_list, inode_list) + { + val = 0; + fd_ctx_get(fd, this, &val); + if (val == 0) + continue; + + fd_ctx = (void *)(uintptr_t)val; + uuid_utoa_r(inode->gfid, gfid); + if (!GF_ATOMIC_GET(fd_ctx->prefetching)) + continue; + + LOCK(&fd_ctx->lock); + { + if (GF_ATOMIC_GET(fd_ctx->prefetching)) { + if (fd_ctx->writes_during_prefetch == NULL) + fd_ctx->writes_during_prefetch = dict_new(); + + ret = dict_set_int8(fd_ctx->writes_during_prefetch, + gfid, 1); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "marking to invalidate stats of %s from an " + "in progress " + "prefetching has failed, might result in " + "stale stat to " + "application", + gfid); + } + } + } + UNLOCK(&fd_ctx->lock); + } + } + UNLOCK(&parent->lock); + inode_unref(parent); + } + + return; +} + /* * Check whether we can handle a request. Offset verification is done by the * caller, so we only check whether the preload buffer has completion status @@ -264,7 +324,8 @@ __rda_fill_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, memset(&tmp_stat, 0, sizeof(tmp_stat)); - if (dirent->inode) { + if (dirent->inode && (!((strcmp(dirent->d_name, ".") == 0) || + (strcmp(dirent->d_name, "..") == 0)))) { rda_inode_ctx_get_iatt(dirent->inode, this, &tmp_stat); dirent->d_stat = tmp_stat; } @@ -433,6 +494,11 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int ret = 0; gf_boolean_t serve = _gf_false; call_stub_t *stub = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; + uint64_t generation = 0; + call_frame_t *fill_frame = NULL; INIT_LIST_HEAD(&serve_entries.list); LOCK(&ctx->lock); @@ -451,6 +517,7 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, list_for_each_entry_safe(dirent, tmp, &entries->list, list) { list_del_init(&dirent->list); + /* must preserve entry order */ list_add_tail(&dirent->list, &ctx->entries.list); if (dirent->inode) { @@ -460,8 +527,21 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * request was initiated. So, we pass 0 for * generation number */ - rda_inode_ctx_update_iatts(dirent->inode, this, &dirent->d_stat, - &dirent->d_stat, 0); + + generation = -1; + if (ctx->writes_during_prefetch) { + memset(gfid, 0, sizeof(gfid)); + uuid_utoa_r(dirent->inode->gfid, gfid); + if (dict_get(ctx->writes_during_prefetch, gfid)) + generation = 0; + } + + if (!((strcmp(dirent->d_name, ".") == 0) || + (strcmp(dirent->d_name, "..") == 0))) { + rda_inode_ctx_update_iatts(dirent->inode, this, + &dirent->d_stat, &dirent->d_stat, + generation); + } } dirent_size = gf_dirent_size(dirent->d_name); @@ -474,6 +554,13 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } } + if (ctx->writes_during_prefetch) { + dict_unref(ctx->writes_during_prefetch); + ctx->writes_during_prefetch = NULL; + } + + GF_ATOMIC_DEC(ctx->prefetching); + if (ctx->cur_size >= priv->rda_high_wmark) ctx->state &= ~RDA_FD_PLUGGED; @@ -522,8 +609,7 @@ out: ctx->xattrs = NULL; } - rda_local_wipe(ctx->fill_frame->local); - STACK_DESTROY(ctx->fill_frame->root); + fill_frame = ctx->fill_frame; ctx->fill_frame = NULL; } @@ -532,6 +618,10 @@ out: op_errno = 0; UNLOCK(&ctx->lock); + if (fill_frame) { + rda_local_wipe(fill_frame->local); + STACK_DESTROY(fill_frame->root); + } if (serve) { STACK_UNWIND_STRICT(readdirp, stub->frame, ret, op_errno, @@ -603,6 +693,7 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) } local->offset = offset; + GF_ATOMIC_INC(ctx->prefetching); UNLOCK(&ctx->lock); @@ -677,11 +768,12 @@ rda_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); unwind: RDA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, &postbuf_out, xdata); @@ -712,12 +804,10 @@ rda_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); - unwind: RDA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, &postbuf_out, xdata); @@ -747,12 +837,10 @@ rda_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); - unwind: RDA_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, &postbuf_out, xdata); @@ -782,11 +870,10 @@ rda_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); unwind: RDA_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, &postbuf_out, xdata); @@ -816,12 +903,10 @@ rda_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); - unwind: RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out, xdata); @@ -851,10 +936,9 @@ rda_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); unwind: RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out, @@ -881,7 +965,7 @@ rda_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; - + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, local->generation); unwind: @@ -908,7 +992,7 @@ rda_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; - + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, local->generation); unwind: @@ -939,10 +1023,9 @@ rda_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); unwind: RDA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, &postbuf_out, @@ -973,10 +1056,9 @@ rda_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out, local->generation); - if (postbuf_out.ia_ctime == 0) - memset(&postbuf_out, 0, sizeof(postbuf_out)); unwind: RDA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, &postbuf_out, @@ -1003,7 +1085,7 @@ rda_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; - + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, local->generation); unwind: @@ -1030,7 +1112,7 @@ rda_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; local = frame->local; - + rda_mark_inode_dirty(this, local->inode); rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, local->generation); unwind: @@ -1056,7 +1138,7 @@ rda_releasedir(xlator_t *this, fd_t *fd) if (fd_ctx_del(fd, this, &val) < 0) return -1; - ctx = (struct rda_fd_ctx *)val; + ctx = (struct rda_fd_ctx *)(uintptr_t)val; if (!ctx) return 0; @@ -1084,7 +1166,7 @@ rda_forget(xlator_t *this, inode_t *inode) if (!ctx_uint) return 0; - ctx = (rda_inode_ctx_t *)ctx_uint; + ctx = (rda_inode_ctx_t *)(uintptr_t)ctx_uint; GF_FREE(ctx); @@ -1222,6 +1304,14 @@ struct xlator_cbks cbks = { struct volume_options options[] = { { + .key = {"readdir-ahead"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable readdir-ahead", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { .key = {"rda-request-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 4096, @@ -1277,3 +1367,16 @@ struct volume_options options[] = { .description = "Enable/Disable readdir ahead translator"}, {.key = {NULL}}, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "readdir-ahead", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h index b5e13800826..619c41059ff 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -67,6 +67,8 @@ struct rda_fd_ctx { call_stub_t *stub; int op_errno; dict_t *xattrs; /* md-cache keys to be sent in readdirp() */ + dict_t *writes_during_prefetch; + gf_atomic_t prefetching; }; struct rda_local { diff --git a/xlators/performance/symlink-cache/Makefile.am b/xlators/performance/symlink-cache/Makefile.am deleted file mode 100644 index d471a3f9243..00000000000 --- a/xlators/performance/symlink-cache/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am deleted file mode 100644 index 0bfb03a68de..00000000000 --- a/xlators/performance/symlink-cache/src/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -xlator_LTLIBRARIES = symlink-cache.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance - -symlink_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) - -symlink_cache_la_SOURCES = symlink-cache.c -symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = symlink-cache-messages.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = diff --git a/xlators/performance/symlink-cache/src/symlink-cache-messages.h b/xlators/performance/symlink-cache/src/symlink-cache-messages.h deleted file mode 100644 index c1ef1a67f9b..00000000000 --- a/xlators/performance/symlink-cache/src/symlink-cache-messages.h +++ /dev/null @@ -1,30 +0,0 @@ -/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _SYMLINK_CACHE_MESSAGES_H_ -#define _SYMLINK_CACHE_MESSAGES_H_ - -#include "glfs-message-id.h" - -/* To add new message IDs, append new identifiers at the end of the list. - * - * Never remove a message ID. If it's not used anymore, you can rename it or - * leave it as it is, but not delete it. This is to prevent reutilization of - * IDs by other messages. - * - * The component name must match one of the entries defined in - * glfs-message-id.h. - */ - -GLFS_MSGID(SYMLINK_CACHE, SYMLINK_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, - SYMLINK_CACHE_MSG_VOL_MISCONFIGURED, SYMLINK_CACHE_MSG_NO_MEMORY, - SYMLINK_CACHE_MSG_DICT_GET_FAILED, - SYMLINK_CACHE_MSG_DICT_SET_FAILED); - -#endif /* _SYMLINK_CACHE_MESSAGES_H_ */ diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c deleted file mode 100644 index 2a686dcb87e..00000000000 --- a/xlators/performance/symlink-cache/src/symlink-cache.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "list.h" -#include "compat.h" -#include "compat-errno.h" -#include "common-utils.h" -#include "symlink-cache-messages.h" - -struct symlink_cache { - time_t ctime; - char *readlink; -}; - -static int -symlink_inode_ctx_get(inode_t *inode, xlator_t *this, void **ctx) -{ - int ret = 0; - uint64_t tmp_ctx = 0; - ret = inode_ctx_get(inode, this, &tmp_ctx); - if (-1 == ret) - gf_msg(this->name, GF_LOG_ERROR, EINVAL, - SYMLINK_CACHE_MSG_DICT_GET_FAILED, "dict get failed"); - else - *ctx = (void *)(long)tmp_ctx; - - return 0; -} - -static int -symlink_inode_ctx_set(inode_t *inode, xlator_t *this, void *ctx) -{ - int ret = 0; - ret = inode_ctx_put(inode, this, (uint64_t)(long)ctx); - if (-1 == ret) - gf_msg(this->name, GF_LOG_ERROR, EINVAL, - SYMLINK_CACHE_MSG_DICT_SET_FAILED, "dict set failed"); - - return 0; -} - -int -sc_cache_update(xlator_t *this, inode_t *inode, const char *link) -{ - struct symlink_cache *sc = NULL; - - symlink_inode_ctx_get(inode, this, VOID(&sc)); - if (!sc) - return 0; - - if (!sc->readlink) { - gf_msg_debug(this->name, 0, "updating cache: %s", link); - - sc->readlink = strdup(link); - } else - gf_msg_debug(this->name, 0, "not updating existing cache: %s with %s", - sc->readlink, link); - - return 0; -} - -int -sc_cache_set(xlator_t *this, inode_t *inode, struct iatt *buf, const char *link) -{ - struct symlink_cache *sc = NULL; - int ret = -1; - int need_set = 0; - - symlink_inode_ctx_get(inode, this, VOID(&sc)); - if (!sc) { - need_set = 1; - sc = CALLOC(1, sizeof(*sc)); - if (!sc) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - SYMLINK_CACHE_MSG_NO_MEMORY, "out of memory :("); - goto err; - } - } - - if (sc->readlink) { - gf_msg_debug(this->name, 0, - "replacing old cache: %s with new cache: %s", sc->readlink, - link); - FREE(sc->readlink); - sc->readlink = NULL; - } - - if (link) { - sc->readlink = strdup(link); - if (!sc->readlink) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - SYMLINK_CACHE_MSG_NO_MEMORY, "out of memory :("); - goto err; - } - } - - sc->ctime = buf->ia_ctime; - - gf_msg_debug(this->name, 0, "setting symlink cache: %s", link); - - if (need_set) { - ret = symlink_inode_ctx_set(inode, this, sc); - - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, SYMLINK_CACHE_MSG_NO_MEMORY, - "could not set inode context "); - goto err; - } - } - - return 0; -err: - - if (sc) { - FREE(sc->readlink); - sc->readlink = NULL; - FREE(sc); - } - - return -1; -} - -int -sc_cache_flush(xlator_t *this, inode_t *inode) -{ - struct symlink_cache *sc = NULL; - - symlink_inode_ctx_get(inode, this, VOID(&sc)); - if (!sc) - return 0; - - if (sc->readlink) { - gf_msg_debug(this->name, 0, "flushing cache: %s", sc->readlink); - - FREE(sc->readlink); - sc->readlink = NULL; - } - - FREE(sc); - - return 0; -} - -int -sc_cache_validate(xlator_t *this, inode_t *inode, struct iatt *buf) -{ - struct symlink_cache *sc = NULL; - uint64_t tmp_sc = 0; - - if (!IA_ISLNK(buf->ia_type)) { - sc_cache_flush(this, inode); - return 0; - } - - symlink_inode_ctx_get(inode, this, VOID(&sc)); - - if (!sc) { - sc_cache_set(this, inode, buf, NULL); - inode_ctx_get(inode, this, &tmp_sc); - - if (!tmp_sc) { - gf_msg(this->name, GF_LOG_ERROR, 0, SYMLINK_CACHE_MSG_NO_MEMORY, - "out of memory :("); - return 0; - } - sc = (struct symlink_cache *)(long)tmp_sc; - } - - if (sc->ctime == buf->ia_ctime) - return 0; - - /* STALE */ - if (sc->readlink) { - gf_msg_debug(this->name, 0, "flushing cache: %s", sc->readlink); - - FREE(sc->readlink); - sc->readlink = NULL; - } - - sc->ctime = buf->ia_ctime; - - return 0; -} - -int -sc_cache_get(xlator_t *this, inode_t *inode, char **link) -{ - struct symlink_cache *sc = NULL; - - symlink_inode_ctx_get(inode, this, VOID(&sc)); - - if (!sc) - return 0; - - if (link && sc->readlink) - *link = strdup(sc->readlink); - return 0; -} - -int -sc_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, const char *link, struct iatt *sbuf, - dict_t *xdata) -{ - if (op_ret > 0) - sc_cache_update(this, frame->local, link); - - inode_unref(frame->local); - frame->local = NULL; - - STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, link, sbuf, xdata); - return 0; -} - -int -sc_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, - dict_t *xdata) -{ - char *link = NULL; - struct iatt buf = { - 0, - }; - - sc_cache_get(this, loc->inode, &link); - - if (link) { - /* cache hit */ - gf_msg_debug(this->name, 0, "cache hit %s -> %s", loc->path, link); - - /* - libglusterfsclient, nfs or any other translators - using buf in readlink_cbk should be aware that @buf - is 0 filled - */ - STACK_UNWIND_STRICT(readlink, frame, strlen(link), 0, link, &buf, NULL); - FREE(link); - return 0; - } - - frame->local = inode_ref(loc->inode); - - STACK_WIND(frame, sc_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, size, xdata); - - return 0; -} - -int -sc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - if (op_ret == 0) { - if (frame->local) { - sc_cache_set(this, inode, buf, frame->local); - } - } - - STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent, - postparent, xdata); - return 0; -} - -int -sc_symlink(call_frame_t *frame, xlator_t *this, const char *dst, loc_t *src, - mode_t umask, dict_t *xdata) -{ - frame->local = strdup(dst); - - STACK_WIND(frame, sc_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, dst, src, umask, xdata); - - return 0; -} - -int -sc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - if (op_ret == 0) - sc_cache_validate(this, inode, buf); - else - sc_cache_flush(this, inode); - - STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, - postparent); - return 0; -} - -int -sc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - STACK_WIND(frame, sc_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - - return 0; -} - -int -sc_forget(xlator_t *this, inode_t *inode) -{ - sc_cache_flush(this, inode); - - return 0; -} - -int32_t -init(xlator_t *this) -{ - if (!this->children || this->children->next) { - gf_msg(this->name, GF_LOG_ERROR, 0, - SYMLINK_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, - "FATAL: volume (%s) not configured with exactly one " - "child", - this->name); - return -1; - } - - if (!this->parents) { - gf_msg(this->name, GF_LOG_WARNING, 0, - SYMLINK_CACHE_MSG_VOL_MISCONFIGURED, - "dangling volume. check volfile "); - } - - return 0; -} - -void -fini(xlator_t *this) -{ - return; -} - -struct xlator_fops fops = { - .lookup = sc_lookup, - .symlink = sc_symlink, - .readlink = sc_readlink, -}; - -struct xlator_cbks cbks = { - .forget = sc_forget, -}; - -struct volume_options options[] = { - {.key = {NULL}}, -}; diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h index c92a7d4c1f4..a0647299150 100644 --- a/xlators/performance/write-behind/src/write-behind-mem-types.h +++ b/xlators/performance/write-behind/src/write-behind-mem-types.h @@ -11,7 +11,7 @@ #ifndef __WB_MEM_TYPES_H__ #define __WB_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_wb_mem_types_ { gf_wb_mt_wb_file_t = gf_common_mt_end + 1, diff --git a/xlators/performance/write-behind/src/write-behind-messages.h b/xlators/performance/write-behind/src/write-behind-messages.h index 914fc63dda6..e9ea474879b 100644 --- a/xlators/performance/write-behind/src/write-behind-messages.h +++ b/xlators/performance/write-behind/src/write-behind-messages.h @@ -10,7 +10,7 @@ #ifndef _WRITE_BEHIND_MESSAGES_H_ #define _WRITE_BEHIND_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 62974db93de..00cfca016e6 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -8,17 +8,17 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "list.h" -#include "compat.h" -#include "compat-errno.h" -#include "common-utils.h" -#include "call-stub.h" -#include "statedump.h" -#include "defaults.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/list.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/statedump.h> +#include <glusterfs/defaults.h> #include "write-behind-mem-types.h" #include "write-behind-messages.h" @@ -226,7 +226,7 @@ out: } static void -wb_set_invalidate(wb_inode_t *wb_inode, int set) +wb_set_invalidate(wb_inode_t *wb_inode) { int readdirps = 0; inode_t *parent_inode = NULL; @@ -240,21 +240,21 @@ wb_set_invalidate(wb_inode_t *wb_inode, int set) LOCK(&wb_parent_inode->lock); { readdirps = GF_ATOMIC_GET(wb_parent_inode->readdirps); - if (readdirps && set) { - GF_ATOMIC_SWAP(wb_inode->invalidate, 1); - list_del_init(&wb_inode->invalidate_list); + if (readdirps && list_empty(&wb_inode->invalidate_list)) { + inode_ref(wb_inode->inode); + GF_ATOMIC_INIT(wb_inode->invalidate, 1); list_add(&wb_inode->invalidate_list, &wb_parent_inode->invalidate_list); - } else if (readdirps == 0) { - GF_ATOMIC_SWAP(wb_inode->invalidate, 0); - list_del_init(&wb_inode->invalidate_list); } } UNLOCK(&wb_parent_inode->lock); } else { - GF_ATOMIC_SWAP(wb_inode->invalidate, 0); + GF_ATOMIC_INIT(wb_inode->invalidate, 0); } + if (parent_inode) + inode_unref(parent_inode); + return; } @@ -718,6 +718,10 @@ wb_inode_destroy(wb_inode_t *wb_inode) { GF_VALIDATE_OR_GOTO("write-behind", wb_inode, out); + GF_ASSERT(list_empty(&wb_inode->todo)); + GF_ASSERT(list_empty(&wb_inode->liability)); + GF_ASSERT(list_empty(&wb_inode->temptation)); + LOCK_DESTROY(&wb_inode->lock); GF_FREE(wb_inode); out: @@ -967,8 +971,7 @@ __wb_modify_write_request(wb_request_t *req, int synced_size) vector = req->stub->args.vector; count = req->stub->args.count; - req->stub->args.count = iov_subset(vector, count, synced_size, - iov_length(vector, count), vector); + req->stub->args.count = iov_skip(vector, count, synced_size); out: return; @@ -1092,7 +1095,7 @@ wb_fulfill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * In the above scenario, stat for the file is sent back in readdirp * response but it is stale. * </comment> */ - wb_set_invalidate(wb_inode, 1); + wb_set_invalidate(wb_inode); if (op_ret == -1) { wb_fulfill_err(head, op_errno); @@ -1280,14 +1283,14 @@ __wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies) wb_inode->window_current += req->orig_size; + wb_inode->gen++; + if (!req->ordering.fulfilled) { /* burden increased */ list_add_tail(&req->lie, &wb_inode->liability); req->ordering.lied = 1; - wb_inode->gen++; - uuid_utoa_r(req->gfid, gfid); gf_msg_debug(wb_inode->this->name, 0, "(unique=%" PRIu64 @@ -1519,6 +1522,10 @@ __wb_handle_failed_conflict(wb_request_t *req, wb_request_t *conflict, */ req->op_ret = -1; req->op_errno = conflict->op_errno; + if ((req->stub->fop == GF_FOP_TRUNCATE) || + (req->stub->fop == GF_FOP_FTRUNCATE)) { + req->stub->frame->local = NULL; + } list_del_init(&req->todo); list_add_tail(&req->winds, tasks); @@ -1744,15 +1751,9 @@ wb_do_winds(wb_inode_t *wb_inode, list_head_t *tasks) void wb_process_queue(wb_inode_t *wb_inode) { - list_head_t tasks = { - 0, - }; - list_head_t lies = { - 0, - }; - list_head_t liabilities = { - 0, - }; + list_head_t tasks; + list_head_t lies; + list_head_t liabilities; int wind_failure = 0; INIT_LIST_HEAD(&tasks); @@ -1773,15 +1774,18 @@ wb_process_queue(wb_inode_t *wb_inode) } UNLOCK(&wb_inode->lock); - wb_do_unwinds(wb_inode, &lies); + if (!list_empty(&lies)) + wb_do_unwinds(wb_inode, &lies); - wb_do_winds(wb_inode, &tasks); + if (!list_empty(&tasks)) + wb_do_winds(wb_inode, &tasks); /* If there is an error in wb_fulfill before winding write * requests, we would miss invocation of wb_process_queue * from wb_fulfill_cbk. So, retry processing again. */ - wind_failure = wb_fulfill(wb_inode, &liabilities); + if (!list_empty(&liabilities)) + wind_failure = wb_fulfill(wb_inode, &liabilities); } while (wind_failure); return; @@ -1812,6 +1816,12 @@ wb_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, frame->local = NULL; wb_inode = req->wb_inode; + LOCK(&req->wb_inode->lock); + { + list_del_init(&req->wip); + } + UNLOCK(&req->wb_inode->lock); + wb_request_unref(req); /* requests could be pending while this was in progress */ @@ -1931,6 +1941,9 @@ wb_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, unwind: STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, NULL); + + if (stub) + call_stub_destroy(stub); return 0; noqueue: @@ -2013,6 +2026,9 @@ wb_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) unwind: STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL); + if (stub) + call_stub_destroy(stub); + return 0; noqueue: @@ -2056,6 +2072,8 @@ wb_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, unwind: STACK_UNWIND_STRICT(fsync, frame, -1, op_errno, NULL, NULL, NULL); + if (stub) + call_stub_destroy(stub); return 0; noqueue: @@ -2471,6 +2489,9 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory) wb_directory_inode = wb_inode_create(this, directory); + if (!wb_directory_inode) + return; + LOCK(&wb_directory_inode->lock); { GF_ATOMIC_INC(wb_directory_inode->readdirps); @@ -2488,6 +2509,9 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory) wb_directory_inode = wb_inode_ctx_get(this, directory); + if (!wb_directory_inode) + return; + LOCK(&wb_directory_inode->lock); { readdirps = GF_ATOMIC_DEC(wb_directory_inode->readdirps); @@ -2499,7 +2523,8 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory) invalidate_list) { list_del_init(&wb_inode->invalidate_list); - GF_ATOMIC_SWAP(wb_inode->invalidate, 0); + GF_ATOMIC_INIT(wb_inode->invalidate, 0); + inode_unref(wb_inode->inode); } } unlock: @@ -2541,16 +2566,19 @@ wb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, entry->inode = NULL; memset(&entry->d_stat, 0, sizeof(entry->d_stat)); - - inode_unref(inode); } } UNLOCK(&wb_inode->lock); + + if (inode) { + inode_unref(inode); + inode = NULL; + } } +unwind: wb_mark_readdirp_end(this, fd->inode); -unwind: frame->local = NULL; STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); return 0; @@ -2801,11 +2829,7 @@ wb_forget(xlator_t *this, inode_t *inode) if (!wb_inode) return 0; - GF_ASSERT(list_empty(&wb_inode->todo)); - GF_ASSERT(list_empty(&wb_inode->liability)); - GF_ASSERT(list_empty(&wb_inode->temptation)); - - GF_FREE(wb_inode); + wb_inode_destroy(wb_inode); return 0; } @@ -3148,6 +3172,14 @@ struct xlator_dumpops dumpops = { }; struct volume_options options[] = { + { + .key = {"write-behind"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable write-behind", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, {.key = {"flush-behind"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "on", @@ -3230,3 +3262,17 @@ struct volume_options options[] = { }, {.key = {NULL}}, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "write-behind", + .category = GF_MAINTAINED, +}; |
