diff options
Diffstat (limited to 'xlators/features')
121 files changed, 34069 insertions, 4641 deletions
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 303767d35..d2f5ef192 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -1,3 +1,4 @@ -SUBDIRS = locks trash quota read-only access-control #path-converter # filter +SUBDIRS = locks quota read-only mac-compat quiesce marker index \ + protect compress changelog gfid-access $(GLUPY_SUBDIR) qemu-block # trash path-converter # filter -CLEANFILES = +CLEANFILES = diff --git a/xlators/features/access-control/src/Makefile.am b/xlators/features/access-control/src/Makefile.am deleted file mode 100644 index 6ab8cc4ec..000000000 --- a/xlators/features/access-control/src/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -xlator_LTLIBRARIES = access-control.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -access_control_la_LDFLAGS = -module -avoidversion -access_control_la_SOURCES = access-control.c -access_control_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = access-control.h - -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\ - -L$(xlatordir)/ - -CLEANFILES = diff --git a/xlators/features/access-control/src/access-control.c b/xlators/features/access-control/src/access-control.c deleted file mode 100644 index f90184d1a..000000000 --- a/xlators/features/access-control/src/access-control.c +++ /dev/null @@ -1,1844 +0,0 @@ -/* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "access-control.h" -#include "xlator.h" -#include "call-stub.h" -#include "defaults.h" -#include "iatt.h" - -/* Careful, this function erases the stub from frame->local. Dont call this if - * a subsequent callback requires retaining access to the stub. This should be - * called at the end of all access-control related operations, i.e. once the - * frame will be handed off to the actual fop and the next callback that will - * be called is the default callback. IOW, the function where call_resume is - * called. - * NOTE: this is required because FRAME_DESTROY tries to free frame->local if - * it finds it to be non-NULL. - */ -call_stub_t * -__get_frame_stub (call_frame_t *fr) -{ - call_stub_t *st = NULL; - - if (!fr) - return NULL; - - st = fr->local; - fr->local = NULL; - - return st; -} - - -int -ac_test_owner_access (struct iatt *ia, uid_t uid, int accesstest) -{ - int ret = -1; - - if (!ia) - return -1; - - /* First test permissions using the uid. */ - if (ia->ia_uid != uid) { - ret = -1; - goto out; - } - - /* At this point we know, the uid matches that of the stat structure, so - * if the caller does not care, we should return success. - */ - if (ac_test_dontcare (accesstest)) { - ret = 0; - goto out; - } - - if (ac_test_read (accesstest)) - ret = IA_PROT_RUSR (ia->ia_prot); - - if (ac_test_write (accesstest)) - ret = IA_PROT_WUSR (ia->ia_prot); - - if (ac_test_exec (accesstest)) - ret = IA_PROT_XUSR (ia->ia_prot); - - /* For failed access test for owner, we need to return EACCES */ - if (!ret) - ret = -1; - else - ret = 0; -out: - return ret; -} - - -int -ac_test_group_access (struct iatt *ia, gid_t gid, gid_t *auxgids, int auxcount, - int accesstest) -{ - int ret = -1; - int testgid = -1; - int x = 0; - - if (!ia) - return -1; - /* First, determine which gid to test against. This will be determined - * by first checking which of the gids given to us match the gid in the - * stat. If none match, then we go to checking with others as the user. - */ - - /* If we are only given the primary gid. Dont depend on @auxgids - * being NULL since I know users of this function can pass statically - * allocated arrays which cant be NULL and yet contain no valid gids. - */ - - if ((ia->ia_gid != gid) && (auxcount == 0)) { - ret = -1; - goto out; - } - - if (ia->ia_gid == gid) - testgid = gid; - else { - for (; x < auxcount; ++x) { - if (ia->ia_gid == auxgids[x]) { - testgid = ia->ia_gid; - break; - } - } - } - - /* None of the gids match with the gid in the stat. */ - if (testgid == -1) { - ret = -1; - goto out; - } - - /* At this point, at least one gid matches that in the stat, now we must - * check whether the caller is interested in the access check at all. - */ - if (ac_test_dontcare (accesstest)) { - ret = 0; - goto out; - } - - if (ac_test_read (accesstest)) - ret = IA_PROT_RGRP (ia->ia_prot); - - if (ac_test_write (accesstest)) - ret = IA_PROT_WGRP (ia->ia_prot); - - if (ac_test_exec (accesstest)) - ret = IA_PROT_XGRP (ia->ia_prot); - - if (!ret) - ret = -1; - else - ret = 0; - -out: - return ret; -} - - -int -ac_test_other_access (struct iatt *ia, int accesstest) -{ - int ret = 0; - - if (!ia) - return -1; - - if (ac_test_read (accesstest)) - ret = IA_PROT_ROTH (ia->ia_prot); - - if (ac_test_write (accesstest)) - ret = IA_PROT_WOTH (ia->ia_prot); - - if (ac_test_exec (accesstest)) - ret = IA_PROT_XOTH (ia->ia_prot); - - if (!ret) - ret = -1; - else - ret = 0; - - return ret; -} - - -/* Returns -1 on a failed access test with @operrno set to the relevant error - * number. - */ -int -ac_test_access (struct iatt *ia, uid_t uid, gid_t gid, gid_t *auxgids, - int auxcount, int accesstest, int testwho, int *operrno) -{ - int ret = -1; - - if ((!ia) || (!operrno)) - return -1; - - if ((uid == 0) && (gid == 0)) { - gf_log (ACTRL, GF_LOG_TRACE, "Root has access"); - return 0; - } - - if (ac_test_owner (testwho)) { - gf_log (ACTRL, GF_LOG_TRACE, "Testing owner access"); - ret = ac_test_owner_access (ia, uid, accesstest); - } - - if (ret == 0) { - gf_log (ACTRL, GF_LOG_TRACE, "Owner has access"); - goto out; - } - - if (ac_test_group (testwho)) { - gf_log (ACTRL, GF_LOG_TRACE, "Testing group access"); - ret = ac_test_group_access (ia, gid, auxgids, auxcount, - accesstest); - } - - if (ret == 0) { - gf_log (ACTRL, GF_LOG_TRACE, "Group has access"); - goto out; - } - - if (ac_test_other (testwho)) { - gf_log (ACTRL, GF_LOG_TRACE, "Testing other access"); - ret = ac_test_other_access (ia, accesstest); - } - - if (ret == 0) - gf_log (ACTRL, GF_LOG_TRACE, "Other has access"); -out: - if (ret == -1) { - gf_log (ACTRL, GF_LOG_TRACE, "No access allowed"); - *operrno = EPERM; - } - - return ret; -} - - -int -ac_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) -{ - int ret = -EFAULT; - - if (!loc) - return ret; - - if (inode) { - loc->inode = inode_ref (inode); - loc->ino = inode->ino; - } - - if (parent) - loc->parent = inode_ref (parent); - - loc->path = strdup (path); - if (!loc->path) { - gf_log (ACTRL, GF_LOG_ERROR, "strdup failed"); - goto loc_wipe; - } - - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - else - goto loc_wipe; - - ret = 0; -loc_wipe: - if (ret < 0) - loc_wipe (loc); - - return ret; -} - - -int -ac_inode_loc_fill (inode_t *inode, loc_t *loc) -{ - char *resolvedpath = NULL; - inode_t *parent = NULL; - int ret = -EFAULT; - - if ((!inode) || (!loc)) - return ret; - - if ((inode) && (inode->ino == 1)) - goto ignore_parent; - - parent = inode_parent (inode, 0, NULL); - if (!parent) - goto err; - -ignore_parent: - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) - goto err; - - ret = ac_loc_fill (loc, inode, parent, resolvedpath); - if (ret < 0) - goto err; - -err: - if (parent) - inode_unref (parent); - - if (resolvedpath) - FREE (resolvedpath); - - return ret; -} - - -int -ac_parent_loc_fill (loc_t *parentloc, loc_t *childloc) -{ - if ((!parentloc) || (!childloc)) - return -1; - - return ac_inode_loc_fill (childloc->parent, parentloc); -} - - -int32_t -ac_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset) -{ - STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - return 0; -} - - -int32_t -ac_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) - goto out; - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_truncate_stub (frame, ac_truncate_resume, loc, offset); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_truncate_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); - - ret = 0; -out: - if (ret < 0) - STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL); - - return 0; -} - - -int32_t -ac_access_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) -{ - STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, loc, mask); - return 0; -} - - -int32_t -ac_access_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - int32_t mask = 0; - int acctest = 0; - - stub = __get_frame_stub (frame); - mask = stub->args.access.mask; - - /* If mask requests test for file existence then do not - * return a failure with ENOENT, instead return a failed - * access test. - */ - if (op_ret == -1) { - if (mask & F_OK) - op_errno = EACCES; - else - op_errno = errno; - - goto out; - } - - if (R_OK & mask) - acctest |= ACCTEST_READ; - else if (W_OK & mask) - acctest |= ACCTEST_WRITE; - else if (X_OK & mask) - acctest |= ACCTEST_EXEC; - else - acctest = 0; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - acctest, ACCTEST_ANY, &op_errno); - if (op_ret == -1) - goto out; - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (access, frame, -1, op_errno); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_access_stub (frame, ac_access_resume, loc, mask); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_access_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (access, frame, -1, -ret); - - return 0; -} - - -int32_t -ac_readlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, - size_t size) -{ - STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, size); - return 0; -} - - -int32_t -ac_readlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_READ, ACCTEST_ANY, &op_errno); - if (op_ret == -1) - goto out; - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_readlink_stub (frame, ac_readlink_resume, loc, size); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_readlink_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL); - - return 0; -} - - -int32_t -ac_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev) -{ - STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev); - return 0; -} - - -int32_t -ac_mknod_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, - NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_mknod_stub (frame, ac_mknod_resume, loc, mode, rdev); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, loc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_mknod_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase any stored frame before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL, - NULL); - } - - return 0; -} - - -int32_t -ac_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) -{ - STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode); - return 0; -} - - -int32_t -ac_mkdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - /* On a failed write test on parent dir, we need to return - * EACCES, not EPERM that is returned by default by - * ac_test_access. - */ - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL, - NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_mkdir_stub (frame, ac_mkdir_resume, loc, mode); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, loc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_mkdir_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL, - NULL); - } - - return 0; -} - - -int32_t -ac_unlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc); - return 0; -} - - -int32_t -ac_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_unlink_stub (frame, ac_unlink_resume, loc); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, loc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_unlink_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL); - } - - return 0; -} - - -int32_t -ac_rmdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc); - return 0; -} - - -int32_t -ac_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_rmdir_stub (frame, ac_rmdir_resume, loc); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, loc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_rmdir_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL); - } - - return 0; -} - - -int32_t -ac_symlink_resume (call_frame_t *frame, xlator_t *this, const char *linkname, - loc_t *loc) -{ - STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkname, loc); - return 0; -} - - -int32_t -ac_symlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL, - NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, - loc_t *loc) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_symlink_stub (frame, ac_symlink_resume, linkname, loc); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, loc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_symlink_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL, - NULL); - } - - return 0; -} - - -int32_t -ac_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) -{ - STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc); - return 0; -} - - -int32_t -ac_rename_dst_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, - frame->root->gid, frame->root->groups, - frame->root->ngrps, ACCTEST_WRITE, - ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_rename_src_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - loc_t parentloc = {0, }; - - stub = frame->local; - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, - frame->root->gid, frame->root->groups, - frame->root->ngrps, ACCTEST_WRITE, - ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - op_ret = ac_parent_loc_fill (&parentloc, &stub->args.rename.new); - if (op_ret == -1) { - op_errno = -EFAULT; - goto out; - } - - STACK_WIND (frame, ac_rename_dst_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - -out: - if (op_ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - } - - return 0; -} - - -int32_t -ac_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_rename_stub (frame, ac_rename_resume, oldloc, newloc); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, oldloc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_rename_src_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL, - NULL, NULL); - } - - return 0; -} - - -int32_t -ac_link_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) -{ - STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc); - return 0; -} - - -int32_t -ac_link_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - /* By default ac_test_access sets the op_errno to EPERM - * but in the case of link, we need to return EACCES to meet - * posix requirements when a write permission is not available - * for the new directory. - */ - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL, - NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_link_stub (frame, ac_link_resume, oldloc, newloc); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, newloc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_link_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL, - NULL); - } - - return 0; -} - - -int32_t -ac_create_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, mode_t mode, fd_t *fd) -{ - STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, fd); - return 0; -} - - -int32_t -ac_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - loc_t parentloc = {0, }; - - stub = fop_create_stub (frame, ac_create_resume, loc, flags, mode, fd); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - ret = ac_parent_loc_fill (&parentloc, loc); - if (ret < 0) - goto out; - - STACK_WIND (frame, ac_create_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL, - NULL, NULL); - } - - return 0; -} - - -int32_t -ac_open_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) -{ - STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); - return 0; -} - - -int32_t -ac_open_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int -ac_open_create (call_stub_t *stub) -{ - int ret = -EFAULT; - loc_t parentloc = {0, }; - xlator_t *this = NULL; - - if (!stub) - return ret; - - ret = ac_parent_loc_fill (&parentloc, &stub->args.open.loc); - if (ret < 0) - goto out; - - this = stub->frame->this; - STACK_WIND (stub->frame, ac_open_create_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &parentloc); - loc_wipe (&parentloc); - ret = 0; - -out: - return ret; -} - - -int32_t -ac_open_only_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - int acctest = 0; - int32_t flags = 0; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - flags = stub->args.open.flags; - /* The permissions we test for depend on how the open needs to be - * performed. */ - if ((flags & O_ACCMODE) == O_RDONLY) - acctest = ACCTEST_READ; - else if (((flags & O_ACCMODE) == O_RDWR) || - ((flags & O_ACCMODE) == O_WRONLY)) - acctest = ACCTEST_WRITE; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - acctest, ACCTEST_ANY, &op_errno); - if (op_ret == -1) - goto out; - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int -ac_open_only (call_stub_t *stub) -{ - int ret = -EFAULT; - xlator_t *this = NULL; - - if (!stub) - return ret; - - this = stub->frame->this; - STACK_WIND (stub->frame, ac_open_only_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, &stub->args.open.loc); - return 0; -} - -int32_t -ac_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_open_stub (frame, ac_open_resume, loc, flags, fd, wbflags); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - /* If we are not supposed to create the file then there is no need to - * check the parent dir permissions. */ - if (!(flags & O_CREAT)) - ret = ac_open_create (stub); - else - ret = ac_open_only (stub); - -out: - if (ret < 0) { - /* Erase the stored stub before unwinding. */ - stub = __get_frame_stub (frame); - if (stub) - call_stub_destroy (stub); - STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL); - } - - return 0; -} - - -int32_t -ac_readv_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) -{ - STACK_WIND (frame, default_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, offset); - return 0; -} - - -int32_t -ac_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_READ, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, - NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_readv_stub (frame, ac_readv_resume, fd, size, offset); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_readv_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, 0, NULL, - NULL); - - return 0; -} - - -int32_t -ac_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) -{ - STACK_WIND (frame, default_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, offset, - iobref); - return 0; -} - - -int32_t -ac_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_WRITE, ACCTEST_ANY, &op_errno); - if (op_ret == -1) { - op_errno = EACCES; - goto out; - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t offset, struct iobref *iobref) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_writev_stub (frame, ac_writev_resume, fd, vector, count, - offset, iobref); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_writev_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL); - - return 0; -} - - -int32_t -ac_opendir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) -{ - STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd); - return 0; -} - - -int32_t -ac_opendir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_READ, ACCTEST_ANY, &op_errno); - if (op_ret == -1) - goto out; - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - -int32_t -ac_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_opendir_stub (frame, ac_opendir_resume, loc, fd); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_opendir_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL); - - return 0; -} - - -int32_t -ac_setattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *buf, int32_t valid) -{ - STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, buf, valid); - return 0; -} - - -int32_t -ac_setattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - int32_t valid = 0; - struct iatt *setbuf = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_DONTCARE, ACCTEST_OWNER, - &op_errno); - if (op_ret == -1) - goto out; - - valid = stub->args.setattr.valid; - setbuf = &stub->args.setattr.stbuf; - if (gf_attr_uid_set (valid) || gf_attr_gid_set (valid)) { - /* chown returns EPERM if the operation would change the - * ownership, but the effective user ID is not the - * super-user and the process is not an owner of the file. - * Ref: posix-testsuite/chown/07.t - */ - if ((frame->root->uid != 0) && (gf_attr_uid_set (valid))) { - if (buf->ia_uid != setbuf->ia_uid) { - op_ret = -1; - op_errno = EPERM; - goto out; - } - } - - /* non-super-user can modify file group if he is owner of a - * file and gid he is setting is in his groups list. - * Ref: posix-testsuite/chown/00.t - */ - if ((frame->root->uid != 0) && (gf_attr_gid_set (valid))) { - if (frame->root->uid != buf->ia_uid) { - op_ret = -1; - op_errno = EPERM; - goto out; - } - - op_ret = ac_test_access (setbuf, 0, frame->root->gid, - frame->root->groups, - frame->root->ngrps, - ACCTEST_DONTCARE, - ACCTEST_GROUP, &op_errno); - if (op_ret == -1) - goto out; - } - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - -int32_t -ac_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, - int32_t valid) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_setattr_stub (frame, ac_setattr_resume, loc, buf, valid); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_setattr_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL); - - return 0; -} - - -int32_t -ac_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *buf, int32_t valid) -{ - STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid); - return 0; -} - - -int32_t -ac_fsetattr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - call_stub_t *stub = NULL; - int32_t valid = 0; - struct iatt *setbuf = NULL; - - stub = __get_frame_stub (frame); - if (op_ret == -1) - goto out; - - op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid, - frame->root->groups, frame->root->ngrps, - ACCTEST_DONTCARE, ACCTEST_OWNER, - &op_errno); - if (op_ret == -1) - goto out; - - valid = stub->args.fsetattr.valid; - setbuf = &stub->args.fsetattr.stbuf; - if (gf_attr_uid_set (valid) && gf_attr_gid_set (valid)) { - /* chown returns EPERM if the operation would change the - * ownership, but the effective user ID is not the - * super-user and the process is not an owner of the file. - * Ref: posix-testsuite/chown/07.t - */ - if ((frame->root->uid != 0) && (gf_attr_uid_set (valid))) { - if (buf->ia_uid != setbuf->ia_uid) { - op_ret = -1; - op_errno = EPERM; - goto out; - } - } - - /* non-super-user can modify file group if he is owner of a - * file and gid he is setting is in his groups list. - * Ref: posix-testsuite/chown/00.t - */ - if ((frame->root->uid != 0) && (gf_attr_gid_set (valid))) { - if (frame->root->uid != buf->ia_uid) { - op_ret = -1; - op_errno = EPERM; - goto out; - } - - op_ret = ac_test_access (buf, 0, frame->root->gid, - frame->root->groups, - frame->root->ngrps, - ACCTEST_DONTCARE, - ACCTEST_GROUP, &op_errno); - if (op_ret == -1) - goto out; - } - } - - call_resume (stub); -out: - if (op_ret < 0) { - STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL); - if (stub) - call_stub_destroy (stub); - } - - return 0; -} - - -int32_t -ac_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf, - int32_t valid) -{ - call_stub_t *stub = NULL; - int ret = -EFAULT; - - stub = fop_fsetattr_stub (frame, ac_fsetattr_resume, fd, buf, valid); - if (!stub) { - gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: " - "(out of memory)"); - ret = -ENOMEM; - goto out; - } - - frame->local = stub; - STACK_WIND (frame, ac_fsetattr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - ret = 0; - -out: - if (ret < 0) - STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL); - - return 0; -} - - -struct xlator_fops fops = { - .truncate = ac_truncate, - .access = ac_access, - .readlink = ac_readlink, - .mknod = ac_mknod, - .mkdir = ac_mkdir, - .unlink = ac_unlink, - .rmdir = ac_rmdir, - .symlink = ac_symlink, - .rename = ac_rename, - .link = ac_link, - .create = ac_create, - .open = ac_open, - .readv = ac_readv, - .writev = ac_writev, - .opendir = ac_opendir, - .setattr = ac_setattr, - .fsetattr = ac_fsetattr, -}; - -int -init (xlator_t *this) -{ - return 0; -} - -void -fini (xlator_t *this) -{ - return; -} - -struct xlator_mops mops = { -}; - -struct xlator_cbks cbks = { -}; diff --git a/xlators/features/access-control/src/access-control.h b/xlators/features/access-control/src/access-control.h deleted file mode 100644 index bfc0d7752..000000000 --- a/xlators/features/access-control/src/access-control.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - - -#ifndef __ACCESS_CONTROL_H_ -#define __ACCESS_CONTROL_H_ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#define ACTRL "access-control" -#define ACCTEST_READ 0x1 -#define ACCTEST_WRITE 0x2 -#define ACCTEST_EXEC 0x4 -#define ACCTEST_DONTCARE 0x8 - -/* Note if the caller is only interested in ownership test i.e. one of the below -+ * in combination with GF_ACCTEST_DONTCARE, then only one type of user's owner -+ * ship can be tested with one call to gf_test_access, i.e. we can only -+ * check of either owner and group, if both need to be tested for a specific -+ * (uid, gid) pair then two calls will be needed. -+ */ -#define ACCTEST_OWNER 0x1 -#define ACCTEST_GROUP 0x2 -#define ACCTEST_OTHER 0x4 - -/* Signifies any user, as long as we get access. */ -#define ACCTEST_ANY (ACCTEST_OWNER | ACCTEST_GROUP | ACCTEST_OTHER) - -#define ac_test_owner(acc) ((acc) & ACCTEST_OWNER) -#define ac_test_group(acc) ((acc) & ACCTEST_GROUP) -#define ac_test_other(acc) ((acc) & ACCTEST_OTHER) -#define ac_test_dontcare(acc) ((acc) & ACCTEST_DONTCARE) -#define ac_test_read(acc) ((acc) & ACCTEST_READ) -#define ac_test_write(acc) ((acc) & ACCTEST_WRITE) -#define ac_test_exec(acc) ((acc) & ACCTEST_EXEC) -#endif diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am new file mode 100644 index 000000000..153bb6850 --- /dev/null +++ b/xlators/features/changelog/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src lib + +CLEANFILES = diff --git a/xlators/features/access-control/Makefile.am b/xlators/features/changelog/lib/Makefile.am index a985f42a8..a985f42a8 100644 --- a/xlators/features/access-control/Makefile.am +++ b/xlators/features/changelog/lib/Makefile.am diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c new file mode 100644 index 000000000..14562585a --- /dev/null +++ b/xlators/features/changelog/lib/examples/c/get-changes.c @@ -0,0 +1,87 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/** + * get set of new changes every 10 seconds (just print the file names) + * + * Compile it using: + * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \ + * `pkg-config --libs libgfchangelog` + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/un.h> +#include <limits.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <errno.h> + +#include "changelog.h" + +#define handle_error(fn) \ + printf ("%s (reason: %s)\n", fn, strerror (errno)) + +int +main (int argc, char ** argv) +{ + int i = 0; + int ret = 0; + ssize_t nr_changes = 0; + ssize_t changes = 0; + char fbuf[PATH_MAX] = {0,}; + + /* get changes for brick "/home/vshankar/export/yow/yow-1" */ + ret = gf_changelog_register ("/home/vshankar/export/yow/yow-1", + "/tmp/scratch", "/tmp/change.log", 9, 5); + if (ret) { + handle_error ("register failed"); + goto out; + } + + while (1) { + i = 0; + nr_changes = gf_changelog_scan (); + if (nr_changes < 0) { + handle_error ("scan(): "); + break; + } + + if (nr_changes == 0) + goto next; + + printf ("Got %ld changelog files\n", nr_changes); + + while ( (changes = + gf_changelog_next_change (fbuf, PATH_MAX)) > 0) { + printf ("changelog file [%d]: %s\n", ++i, fbuf); + + /* process changelog */ + /* ... */ + /* ... */ + /* ... */ + /* done processing */ + + ret = gf_changelog_done (fbuf); + if (ret) + handle_error ("gf_changelog_done"); + } + + if (changes == -1) + handle_error ("gf_changelog_next_change"); + + next: + sleep (10); + } + + out: + return ret; +} diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py new file mode 100644 index 000000000..d21db8eab --- /dev/null +++ b/xlators/features/changelog/lib/examples/python/changes.py @@ -0,0 +1,32 @@ +#!/usr/bin/python + +import os +import sys +import time +import libgfchangelog + +cl = libgfchangelog.Changes() + +def get_changes(brick, scratch_dir, log_file, log_level, interval): + change_list = [] + try: + cl.cl_register(brick, scratch_dir, log_file, log_level) + while True: + cl.cl_scan() + change_list = cl.cl_getchanges() + if change_list: + print change_list + for change in change_list: + print('done with %s' % (change)) + cl.cl_done(change) + time.sleep(interval) + except OSError: + ex = sys.exc_info()[1] + print ex + +if __name__ == '__main__': + if len(sys.argv) != 5: + print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>" + % (sys.argv[0])) + sys.exit(1) + get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4])) diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py new file mode 100644 index 000000000..68ec3baf1 --- /dev/null +++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py @@ -0,0 +1,64 @@ +import os +from ctypes import * +from ctypes.util import find_library + +class Changes(object): + libgfc = CDLL(find_library("gfchangelog"), use_errno=True) + + @classmethod + def geterrno(cls): + return get_errno() + + @classmethod + def raise_oserr(cls): + errn = cls.geterrno() + raise OSError(errn, os.strerror(errn)) + + @classmethod + def _get_api(cls, call): + return getattr(cls.libgfc, call) + + @classmethod + def cl_register(cls, brick, path, log_file, log_level, retries = 0): + ret = cls._get_api('gf_changelog_register')(brick, path, + log_file, log_level, retries) + if ret == -1: + cls.raise_oserr() + + @classmethod + def cl_scan(cls): + ret = cls._get_api('gf_changelog_scan')() + if ret == -1: + cls.raise_oserr() + + @classmethod + def cl_startfresh(cls): + ret = cls._get_api('gf_changelog_start_fresh')() + if ret == -1: + cls.raise_oserr() + + @classmethod + def cl_getchanges(cls): + """ remove hardcoding for path name length """ + def clsort(f): + return f.split('.')[-1] + changes = [] + buf = create_string_buffer('\0', 4096) + call = cls._get_api('gf_changelog_next_change') + + while True: + ret = call(buf, 4096) + if ret in (0, -1): + break; + changes.append(buf.raw[:ret-1]) + if ret == -1: + cls.raise_oserr() + # cleanup tracker + cls.cl_startfresh() + return sorted(changes, key=clsort) + + @classmethod + def cl_done(cls, clfile): + ret = cls._get_api('gf_changelog_done')(clfile) + if ret == -1: + cls.raise_oserr() diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am new file mode 100644 index 000000000..fbaaea628 --- /dev/null +++ b/xlators/features/changelog/lib/src/Makefile.am @@ -0,0 +1,37 @@ +libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ + -DDATADIR=\"$(localstatedir)\" + +libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \ + -I../../../src/ -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/xlators/features/changelog/src \ + -DDATADIR=\"$(localstatedir)\" + +libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(GF_GLUSTERFS_LIBS) + +libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS) + +libgfchangelogdir = $(includedir)/glusterfs/gfchangelog +lib_LTLIBRARIES = libgfchangelog.la + +CONTRIB_BUILDDIR = $(top_builddir)/contrib + +libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-process.c \ + gf-changelog-helpers.c $(CONTRIBDIR)/uuid/clear.c \ + $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c \ + $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/parse.c \ + $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c \ + $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c \ + $(CONTRIBDIR)/uuid/unpack.c + +noinst_HEADERS = gf-changelog-helpers.h $(CONTRIBDIR)/uuid/uuidd.h \ + $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \ + $(CONTRIB_BUILDDIR)/uuid/uuid_types.h + +libgfchangelog_HEADERS = changelog.h + +CLEANFILES = +CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h + +$(top_builddir)/libglusterfs/src/libglusterfs.la: + $(MAKE) -C $(top_builddir)/libglusterfs/src/ all diff --git a/xlators/features/changelog/lib/src/changelog.h b/xlators/features/changelog/lib/src/changelog.h new file mode 100644 index 000000000..5cddfb583 --- /dev/null +++ b/xlators/features/changelog/lib/src/changelog.h @@ -0,0 +1,31 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GF_CHANGELOG_H +#define _GF_CHANGELOG_H + +/* API set */ + +int +gf_changelog_register (char *brick_path, char *scratch_dir, + char *log_file, int log_levl, int max_reconnects); +ssize_t +gf_changelog_scan (); + +int +gf_changelog_start_fresh (); + +ssize_t +gf_changelog_next_change (char *bufptr, size_t maxlen); + +int +gf_changelog_done (char *file); + +#endif diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c new file mode 100644 index 000000000..1eef8bf04 --- /dev/null +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c @@ -0,0 +1,180 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-mem-types.h" +#include "gf-changelog-helpers.h" + +ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize) +{ + return read (fd, buffer, bufsize); +} + +size_t +gf_changelog_write (int fd, char *buffer, size_t len) +{ + ssize_t size = 0; + size_t writen = 0; + + while (writen < len) { + size = write (fd, + buffer + writen, len - writen); + if (size <= 0) + break; + + writen += size; + } + + return writen; +} + +void +gf_rfc3986_encode (unsigned char *s, char *enc, char *estr) +{ + for (; *s; s++) { + if (estr[*s]) + sprintf(enc, "%c", estr[*s]); + else + sprintf(enc, "%%%02X", *s); + while (*++enc); + } +} + +/** + * thread safe version of readline with buffering + * (taken from Unix Network Programming Volume I, W.R. Stevens) + * + * This is favoured over fgets() as we'd need to ftruncate() + * (see gf_changelog_scan() API) to record new changelog files. + * stream open functions does have a truncate like api (although + * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp), + * but this involves mixing POSIX file descriptors and stream FILE *). + * + * NOTE: This implmentation still does work with more than one fd's + * used to perform gf_readline(). For this very reason it's not + * made a part of libglusterfs. + */ + +static pthread_key_t rl_key; +static pthread_once_t rl_once = PTHREAD_ONCE_INIT; + +static void +readline_destructor (void *ptr) +{ + GF_FREE (ptr); +} + +static void +readline_once (void) +{ + pthread_key_create (&rl_key, readline_destructor); +} + +static ssize_t +my_read (read_line_t *tsd, int fd, char *ptr) +{ + if (tsd->rl_cnt <= 0) { + if ( (tsd->rl_cnt = read (fd, tsd->rl_buf, MAXLINE)) < 0 ) + return -1; + else if (tsd->rl_cnt == 0) + return 0; + tsd->rl_bufptr = tsd->rl_buf; + } + + tsd->rl_cnt--; + *ptr = *tsd->rl_bufptr++; + return 1; +} + +static int +gf_readline_init_once (read_line_t **tsd) +{ + if (pthread_once (&rl_once, readline_once) != 0) + return -1; + + *tsd = pthread_getspecific (rl_key); + if (*tsd) + goto out; + + *tsd = GF_CALLOC (1, sizeof (**tsd), + gf_changelog_mt_libgfchangelog_rl_t); + if (!*tsd) + return -1; + + if (pthread_setspecific (rl_key, *tsd) != 0) + return -1; + + out: + return 0; +} + +ssize_t +gf_readline (int fd, void *vptr, size_t maxlen) +{ + size_t n = 0; + size_t rc = 0; + char c = ' '; + char *ptr = NULL; + read_line_t *tsd = NULL; + + if (gf_readline_init_once (&tsd)) + return -1; + + ptr = vptr; + for (n = 1; n < maxlen; n++) { + if ( (rc = my_read (tsd, fd, &c)) == 1 ) { + *ptr++ = c; + if (c == '\n') + break; + } else if (rc == 0) { + *ptr = '\0'; + return (n - 1); + } else + return -1; + } + + *ptr = '\0'; + return n; + +} + +off_t +gf_lseek (int fd, off_t offset, int whence) +{ + off_t off = 0; + read_line_t *tsd = NULL; + + if (gf_readline_init_once (&tsd)) + return -1; + + if ( (off = lseek (fd, offset, whence)) == -1) + return -1; + + tsd->rl_cnt = 0; + tsd->rl_bufptr = tsd->rl_buf; + + return off; +} + +int +gf_ftruncate (int fd, off_t length) +{ + read_line_t *tsd = NULL; + + if (gf_readline_init_once (&tsd)) + return -1; + + if (ftruncate (fd, 0)) + return -1; + + tsd->rl_cnt = 0; + tsd->rl_bufptr = tsd->rl_buf; + + return 0; +} diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h new file mode 100644 index 000000000..3aa6ed7b8 --- /dev/null +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h @@ -0,0 +1,97 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GF_CHANGELOG_HELPERS_H +#define _GF_CHANGELOG_HELPERS_H + +#include <unistd.h> +#include <dirent.h> +#include <limits.h> +#include <pthread.h> + +#include <xlator.h> + +#define GF_CHANGELOG_TRACKER "tracker" + +#define GF_CHANGELOG_CURRENT_DIR ".current" +#define GF_CHANGELOG_PROCESSED_DIR ".processed" +#define GF_CHANGELOG_PROCESSING_DIR ".processing" + +#ifndef MAXLINE +#define MAXLINE 4096 +#endif + +#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \ + memcpy (ascii + off, ptr, len); \ + off += len; \ + } while (0) + +typedef struct read_line { + int rl_cnt; + char *rl_bufptr; + char rl_buf[MAXLINE]; +} read_line_t; + +typedef struct gf_changelog { + xlator_t *this; + + /* 'processing' directory stream */ + DIR *gfc_dir; + + /* fd to the tracker file */ + int gfc_fd; + + /* connection retries */ + int gfc_connretries; + + char gfc_sockpath[PATH_MAX]; + + char gfc_brickpath[PATH_MAX]; + + /* socket for recieving notifications */ + int gfc_sockfd; + + char *gfc_working_dir; + + /* RFC 3986 string encoding */ + char rfc3986[256]; + + char gfc_current_dir[PATH_MAX]; + char gfc_processed_dir[PATH_MAX]; + char gfc_processing_dir[PATH_MAX]; + + pthread_t gfc_changelog_processor; +} gf_changelog_t; + +int +gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc); + +void * +gf_changelog_process (void *data); + +ssize_t +gf_changelog_read_path (int fd, char *buffer, size_t bufsize); + +void +gf_rfc3986_encode (unsigned char *s, char *enc, char *estr); + +size_t +gf_changelog_write (int fd, char *buffer, size_t len); + +ssize_t +gf_readline (int fd, void *vptr, size_t maxlen); + +int +gf_ftruncate (int fd, off_t length); + +off_t +gf_lseek (int fd, off_t offset, int whence); + +#endif diff --git a/xlators/features/changelog/lib/src/gf-changelog-process.c b/xlators/features/changelog/lib/src/gf-changelog-process.c new file mode 100644 index 000000000..df7204931 --- /dev/null +++ b/xlators/features/changelog/lib/src/gf-changelog-process.c @@ -0,0 +1,571 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <unistd.h> +#include <pthread.h> + +#include "uuid.h" +#include "globals.h" +#include "glusterfs.h" + +#include "gf-changelog-helpers.h" + +/* from the changelog translator */ +#include "changelog-misc.h" + +extern int byebye; + +/** + * number of gfid records after fop number + */ +int nr_gfids[] = { + [GF_FOP_MKNOD] = 1, + [GF_FOP_MKDIR] = 1, + [GF_FOP_UNLINK] = 1, + [GF_FOP_RMDIR] = 1, + [GF_FOP_SYMLINK] = 1, + [GF_FOP_RENAME] = 2, + [GF_FOP_LINK] = 1, + [GF_FOP_CREATE] = 1, +}; + +static char * +binary_to_ascii (uuid_t uuid) +{ + return uuid_utoa (uuid); +} + +static char * +conv_noop (char *ptr) { return ptr; } + +#define VERIFY_SEPARATOR(ptr, plen, perr) \ + { \ + if (*(ptr + plen) != '\0') { \ + perr = 1; \ + break; \ + } \ + } + +#define MOVER_MOVE(mover, nleft, bytes) \ + { \ + mover += bytes; \ + nleft -= bytes; \ + } \ + +#define PARSE_GFID(mov, ptr, le, fn, perr) \ + { \ + VERIFY_SEPARATOR (mov, le, perr); \ + ptr = fn (mov); \ + if (!ptr) { \ + perr = 1; \ + break; \ + } \ + } + +#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \ + { \ + GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \ + MOVER_MOVE (mo, nl, le); \ + } + + +#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \ + { \ + memcpy (uuid, mover, sizeof (uuid_t)); \ + ptr = binary_to_ascii (uuid); \ + if (!ptr) { \ + perr = 1; \ + break; \ + } \ + MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \ + } \ + +#define LINE_BUFSIZE 3*PATH_MAX /* enough buffer for extra chars too */ + +/** + * using mmap() makes parsing easy. fgets() cannot be used here as + * the binary gfid could contain a line-feed (0x0A), in that case fgets() + * would read an incomplete line and parsing would fail. using POSIX fds + * would result is additional code to maintain state in case of partial + * reads of data (where multiple entries do not fit extirely in the buffer). + * + * mmap() gives the flexibility of pointing to an offset in the file + * without us worrying about reading it in memory (VM does that for us for + * free). + */ + +static int +gf_changelog_parse_binary (xlator_t *this, + gf_changelog_t *gfc, int from_fd, int to_fd, + size_t start_offset, struct stat *stbuf) + +{ + int ret = -1; + off_t off = 0; + off_t nleft = 0; + uuid_t uuid = {0,}; + char *ptr = NULL; + char *bname_start = NULL; + char *bname_end = NULL; + char *mover = NULL; + char *start = NULL; + char current_mover = ' '; + size_t blen = 0; + int parse_err = 0; + char ascii[LINE_BUFSIZE] = {0,}; + + nleft = stbuf->st_size; + + start = (char *) mmap (NULL, nleft, + PROT_READ, MAP_PRIVATE, from_fd, 0); + if (!start) { + gf_log (this->name, GF_LOG_ERROR, + "mmap() error (reason: %s)", strerror (errno)); + goto out; + } + + mover = start; + + MOVER_MOVE (mover, nleft, start_offset); + + while (nleft > 0) { + + off = blen = 0; + ptr = bname_start = bname_end = NULL; + + current_mover = *mover; + + switch (current_mover) { + case 'D': + case 'M': + MOVER_MOVE (mover, nleft, 1); + PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err); + + break; + + case 'E': + MOVER_MOVE (mover, nleft, 1); + PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err); + + bname_start = mover; + if ( (bname_end = strchr (mover, '\n')) == NULL ) { + parse_err = 1; + break; + } + + blen = bname_end - bname_start; + MOVER_MOVE (mover, nleft, blen); + + break; + + default: + parse_err = 1; + } + + if (parse_err) + break; + + GF_CHANGELOG_FILL_BUFFER (¤t_mover, ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr)); + if (blen) + GF_CHANGELOG_FILL_BUFFER (bname_start, + ascii, off, blen); + GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1); + + if (gf_changelog_write (to_fd, ascii, off) != off) { + gf_log (this->name, GF_LOG_ERROR, + "processing binary changelog failed due to " + " error in writing ascii change (reason: %s)", + strerror (errno)); + break; + } + + MOVER_MOVE (mover, nleft, 1); + } + + if ( (nleft == 0) && (!parse_err)) + ret = 0; + + if (munmap (start, stbuf->st_size)) + gf_log (this->name, GF_LOG_ERROR, + "munmap() error (reason: %s)", strerror (errno)); + out: + return ret; +} + +/** + * ascii decoder: + * - separate out one entry from another + * - use fop name rather than fop number + */ +static int +gf_changelog_parse_ascii (xlator_t *this, + gf_changelog_t *gfc, int from_fd, int to_fd, + size_t start_offset, struct stat *stbuf) +{ + int ng = 0; + int ret = -1; + int fop = 0; + int len = 0; + off_t off = 0; + off_t nleft = 0; + char *ptr = NULL; + char *eptr = NULL; + char *start = NULL; + char *mover = NULL; + int parse_err = 0; + char current_mover = ' '; + char ascii[LINE_BUFSIZE] = {0,}; + const char *fopname = NULL; + + nleft = stbuf->st_size; + + start = (char *) mmap (NULL, nleft, + PROT_READ, MAP_PRIVATE, from_fd, 0); + if (!start) { + gf_log (this->name, GF_LOG_ERROR, + "mmap() error (reason: %s)", strerror (errno)); + goto out; + } + + mover = start; + + MOVER_MOVE (mover, nleft, start_offset); + + while (nleft > 0) { + off = 0; + current_mover = *mover; + + GF_CHANGELOG_FILL_BUFFER (¤t_mover, ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); + + switch (current_mover) { + case 'D': + case 'M': + MOVER_MOVE (mover, nleft, 1); + + /* target gfid */ + PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN, + conv_noop, parse_err); + FILL_AND_MOVE(ptr, ascii, off, + mover, nleft, UUID_CANONICAL_FORM_LEN); + break; + + case 'E': + MOVER_MOVE (mover, nleft, 1); + + /* target gfid */ + PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN, + conv_noop, parse_err); + FILL_AND_MOVE (ptr, ascii, off, + mover, nleft, UUID_CANONICAL_FORM_LEN); + FILL_AND_MOVE (" ", ascii, off, + mover, nleft, 1); + + /* fop */ + len = strlen (mover); + VERIFY_SEPARATOR (mover, len, parse_err); + + fop = atoi (mover); + if ( (fopname = gf_fop_list[fop]) == NULL) { + parse_err = 1; + break; + } + + MOVER_MOVE (mover, nleft, len); + + len = strlen (fopname); + GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len); + + /* pargfid + bname */ + ng = nr_gfids[fop]; + while (ng-- > 0) { + MOVER_MOVE (mover, nleft, 1); + len = strlen (mover); + GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); + + PARSE_GFID (mover, ptr, len, + conv_noop, parse_err); + eptr = calloc (3, strlen (ptr)); + if (!eptr) { + parse_err = 1; + break; + } + + gf_rfc3986_encode ((unsigned char *) ptr, + eptr, gfc->rfc3986); + FILL_AND_MOVE (eptr, ascii, off, + mover, nleft, len); + free (eptr); + } + + break; + default: + parse_err = 1; + } + + if (parse_err) + break; + + GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1); + + if (gf_changelog_write (to_fd, ascii, off) != off) { + gf_log (this->name, GF_LOG_ERROR, + "processing ascii changelog failed due to " + " wrror in writing change (reason: %s)", + strerror (errno)); + break; + } + + MOVER_MOVE (mover, nleft, 1); + + } + + if ( (nleft == 0) && (!parse_err)) + ret = 0; + + if (munmap (start, stbuf->st_size)) + gf_log (this->name, GF_LOG_ERROR, + "munmap() error (reason: %s)", strerror (errno)); + + out: + return ret; +} + +#define COPY_BUFSIZE 8192 +static int +gf_changelog_copy (xlator_t *this, int from_fd, int to_fd) +{ + ssize_t size = 0; + char buffer[COPY_BUFSIZE+1] = {0,}; + + while (1) { + size = read (from_fd, buffer, COPY_BUFSIZE); + if (size <= 0) + break; + + if (gf_changelog_write (to_fd, + buffer, size) != size) { + gf_log (this->name, GF_LOG_ERROR, + "error processing ascii changlog"); + size = -1; + break; + } + } + + return (size < 0 ? -1 : 0); +} + +static int +gf_changelog_decode (xlator_t *this, gf_changelog_t *gfc, int from_fd, + int to_fd, struct stat *stbuf, int *zerob) +{ + int ret = -1; + int encoding = -1; + size_t elen = 0; + char buffer[1024] = {0,}; + + CHANGELOG_GET_ENCODING (from_fd, buffer, 1024, encoding, elen); + if (encoding == -1) /* unknown encoding */ + goto out; + + if (!CHANGELOG_VALID_ENCODING (encoding)) + goto out; + + if (elen == stbuf->st_size) { + *zerob = 1; + goto out; + } + + /** + * start processing after the header + */ + lseek (from_fd, elen, SEEK_SET); + + switch (encoding) { + case CHANGELOG_ENCODE_BINARY: + /** + * this ideally should have been a part of changelog-encoders.c + * (ie. part of the changelog translator). + */ + ret = gf_changelog_parse_binary (this, gfc, from_fd, + to_fd, elen, stbuf); + break; + + case CHANGELOG_ENCODE_ASCII: + ret = gf_changelog_parse_ascii (this, gfc, from_fd, + to_fd, elen, stbuf); + break; + default: + ret = gf_changelog_copy (this, from_fd, to_fd); + } + + out: + return ret; +} + +static int +gf_changelog_consume (xlator_t *this, gf_changelog_t *gfc, char *from_path) +{ + int ret = -1; + int fd1 = 0; + int fd2 = 0; + int zerob = 0; + struct stat stbuf = {0,}; + char dest[PATH_MAX] = {0,}; + char to_path[PATH_MAX] = {0,}; + + ret = stat (from_path, &stbuf); + if (ret || !S_ISREG(stbuf.st_mode)) { + gf_log (this->name, GF_LOG_ERROR, + "stat failed on changelog file: %s", from_path); + goto out; + } + + fd1 = open (from_path, O_RDONLY); + if (fd1 < 0) { + gf_log (this->name, GF_LOG_ERROR, + "cannot open changelog file: %s (reason: %s)", + from_path, strerror (errno)); + goto out; + } + + (void) snprintf (to_path, PATH_MAX, "%s%s", + gfc->gfc_current_dir, basename (from_path)); + (void) snprintf (dest, PATH_MAX, "%s%s", + gfc->gfc_processing_dir, basename (from_path)); + + fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd2 < 0) { + gf_log (this->name, GF_LOG_ERROR, + "cannot create ascii changelog file %s (reason %s)", + to_path, strerror (errno)); + goto close_fd; + } else { + ret = gf_changelog_decode (this, gfc, fd1, + fd2, &stbuf, &zerob); + + close (fd2); + + if (!ret) { + /* move it to processing on a successfull + decode */ + ret = rename (to_path, dest); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "error moving %s to processing dir" + " (reason: %s)", to_path, + strerror (errno)); + } + + /* remove it from .current if it's an empty file */ + if (zerob) { + ret = unlink (to_path); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "could not unlink %s (reason: %s", + to_path, strerror (errno)); + } + } + + close_fd: + close (fd1); + + out: + return ret; +} + +static char * +gf_changelog_ext_change (xlator_t *this, + gf_changelog_t *gfc, char *path, size_t readlen) +{ + int alo = 0; + int ret = 0; + size_t len = 0; + char *buf = NULL; + + buf = path; + while (len < readlen) { + if (*buf == '\0') { + alo = 1; + gf_log (this->name, GF_LOG_DEBUG, + "processing changelog: %s", path); + ret = gf_changelog_consume (this, gfc, path); + } + + if (ret) + break; + + len++; buf++; + if (alo) { + alo = 0; + path = buf; + } + } + + return (ret) ? NULL : path; +} + +void * +gf_changelog_process (void *data) +{ + ssize_t len = 0; + ssize_t offlen = 0; + xlator_t *this = NULL; + char *sbuf = NULL; + gf_changelog_t *gfc = NULL; + char from_path[PATH_MAX] = {0,}; + + gfc = (gf_changelog_t *) data; + this = gfc->this; + + pthread_detach (pthread_self()); + + for (;;) { + len = gf_changelog_read_path (gfc->gfc_sockfd, + from_path + offlen, + PATH_MAX - offlen); + if (len < 0) + continue; /* ignore it for now */ + + if (len == 0) { /* close() from the changelog translator */ + gf_log (this->name, GF_LOG_INFO, "close from changelog" + " notification translator."); + + if (gfc->gfc_connretries != 1) { + if (!gf_changelog_notification_init(this, gfc)) + continue; + } + + byebye = 1; + break; + } + + len += offlen; + sbuf = gf_changelog_ext_change (this, gfc, from_path, len); + if (!sbuf) { + gf_log (this->name, GF_LOG_ERROR, + "could not extract changelog filename"); + continue; + } + + offlen = 0; + if (sbuf != (from_path + len)) { + offlen = from_path + len - sbuf; + memmove (from_path, sbuf, offlen); + } + } + + gf_log (this->name, GF_LOG_DEBUG, + "byebye (%d) from processing thread...", byebye); + return NULL; +} diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c new file mode 100644 index 000000000..ca8e373e7 --- /dev/null +++ b/xlators/features/changelog/lib/src/gf-changelog.c @@ -0,0 +1,515 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <errno.h> +#include <dirent.h> +#include <stddef.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <string.h> + +#include "globals.h" +#include "glusterfs.h" +#include "logging.h" + +#include "gf-changelog-helpers.h" + +/* from the changelog translator */ +#include "changelog-misc.h" +#include "changelog-mem-types.h" + +int byebye = 0; + +static void +gf_changelog_cleanup (gf_changelog_t *gfc) +{ + /* socket */ + if (gfc->gfc_sockfd != -1) + close (gfc->gfc_sockfd); + /* tracker fd */ + if (gfc->gfc_fd != -1) + close (gfc->gfc_fd); + /* processing dir */ + if (gfc->gfc_dir) + closedir (gfc->gfc_dir); + + if (gfc->gfc_working_dir) + free (gfc->gfc_working_dir); /* allocated by realpath */ +} + +void +__attribute__ ((constructor)) gf_changelog_ctor (void) +{ + glusterfs_ctx_t *ctx = NULL; + + ctx = glusterfs_ctx_new (); + if (!ctx) + return; + + if (glusterfs_globals_init (ctx)) { + free (ctx); + ctx = NULL; + return; + } + + THIS->ctx = ctx; +} + +void +__attribute__ ((destructor)) gf_changelog_dtor (void) +{ + xlator_t *this = NULL; + glusterfs_ctx_t *ctx = NULL; + gf_changelog_t *gfc = NULL; + + this = THIS; + if (!this) + return; + + ctx = this->ctx; + gfc = this->private; + + if (gfc) { + gf_changelog_cleanup (gfc); + GF_FREE (gfc); + } + + if (ctx) { + pthread_mutex_destroy (&ctx->lock); + free (ctx); + ctx = NULL; + } +} + + +static int +gf_changelog_open_dirs (gf_changelog_t *gfc) +{ + int ret = -1; + DIR *dir = NULL; + int tracker_fd = 0; + char tracker_path[PATH_MAX] = {0,}; + + (void) snprintf (gfc->gfc_current_dir, PATH_MAX, + "%s/"GF_CHANGELOG_CURRENT_DIR"/", + gfc->gfc_working_dir); + ret = mkdir_p (gfc->gfc_current_dir, 0600, _gf_false); + if (ret) + goto out; + + (void) snprintf (gfc->gfc_processed_dir, PATH_MAX, + "%s/"GF_CHANGELOG_PROCESSED_DIR"/", + gfc->gfc_working_dir); + ret = mkdir_p (gfc->gfc_processed_dir, 0600, _gf_false); + if (ret) + goto out; + + (void) snprintf (gfc->gfc_processing_dir, PATH_MAX, + "%s/"GF_CHANGELOG_PROCESSING_DIR"/", + gfc->gfc_working_dir); + ret = mkdir_p (gfc->gfc_processing_dir, 0600, _gf_false); + if (ret) + goto out; + + dir = opendir (gfc->gfc_processing_dir); + if (!dir) { + gf_log ("", GF_LOG_ERROR, + "opendir() error [reason: %s]", strerror (errno)); + goto out; + } + + gfc->gfc_dir = dir; + + (void) snprintf (tracker_path, PATH_MAX, + "%s/"GF_CHANGELOG_TRACKER, gfc->gfc_working_dir); + + tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (tracker_fd < 0) { + closedir (gfc->gfc_dir); + ret = -1; + goto out; + } + + gfc->gfc_fd = tracker_fd; + ret = 0; + out: + return ret; +} + +int +gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc) +{ + int ret = 0; + int len = 0; + int tries = 0; + int sockfd = 0; + struct sockaddr_un remote; + + this = gfc->this; + + if (gfc->gfc_sockfd != -1) { + gf_log (this->name, GF_LOG_INFO, + "Reconnecting..."); + close (gfc->gfc_sockfd); + } + + sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) { + ret = -1; + goto out; + } + + CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath, + gfc->gfc_sockpath, PATH_MAX); + gf_log (this->name, GF_LOG_INFO, + "connecting to changelog socket: %s (brick: %s)", + gfc->gfc_sockpath, gfc->gfc_brickpath); + + remote.sun_family = AF_UNIX; + strcpy (remote.sun_path, gfc->gfc_sockpath); + + len = strlen (remote.sun_path) + sizeof (remote.sun_family); + + while (tries < gfc->gfc_connretries) { + gf_log (this->name, GF_LOG_WARNING, + "connection attempt %d/%d...", + tries + 1, gfc->gfc_connretries); + + /* initiate a connect */ + if (connect (sockfd, (struct sockaddr *) &remote, len) == 0) { + gfc->gfc_sockfd = sockfd; + break; + } + + tries++; + sleep (2); + } + + if (tries == gfc->gfc_connretries) { + gf_log (this->name, GF_LOG_ERROR, + "could not connect to changelog socket!" + " bailing out..."); + ret = -1; + } else + gf_log (this->name, GF_LOG_INFO, + "connection successful"); + + out: + return ret; +} + +int +gf_changelog_done (char *file) +{ + int ret = -1; + char *buffer = NULL; + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + char to_path[PATH_MAX] = {0,}; + + errno = EINVAL; + + this = THIS; + if (!this) + goto out; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + if (!file || !strlen (file)) + goto out; + + /* make sure 'file' is inside ->gfc_working_dir */ + buffer = realpath (file, NULL); + if (!buffer) + goto out; + + if (strncmp (gfc->gfc_working_dir, + buffer, strlen (gfc->gfc_working_dir))) + goto out; + + (void) snprintf (to_path, PATH_MAX, "%s%s", + gfc->gfc_processed_dir, basename (buffer)); + gf_log (this->name, GF_LOG_DEBUG, + "moving %s to processed directory", file); + ret = rename (buffer, to_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "cannot move %s to %s (reason: %s)", + file, to_path, strerror (errno)); + goto out; + } + + ret = 0; + + out: + if (buffer) + free (buffer); /* allocated by realpath() */ + return ret; +} + +/** + * @API + * for a set of changelogs, start from the begining + */ +int +gf_changelog_start_fresh () +{ + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + + this = THIS; + if (!this) + goto out; + + errno = EINVAL; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + if (gf_ftruncate (gfc->gfc_fd, 0)) + goto out; + + return 0; + + out: + return -1; +} + +/** + * @API + * return the next changelog file entry. zero means all chanelogs + * consumed. + */ +ssize_t +gf_changelog_next_change (char *bufptr, size_t maxlen) +{ + ssize_t size = 0; + int tracker_fd = 0; + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + char buffer[PATH_MAX] = {0,}; + + errno = EINVAL; + + this = THIS; + if (!this) + goto out; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + tracker_fd = gfc->gfc_fd; + + size = gf_readline (tracker_fd, buffer, maxlen); + if (size < 0) + goto out; + if (size == 0) + return 0; + + memcpy (bufptr, buffer, size - 1); + *(buffer + size) = '\0'; + + return size; + + out: + return -1; +} + +/** + * @API + * gf_changelog_scan() - scan and generate a list of change entries + * + * calling this api multiple times (without calling gf_changlog_done()) + * would result new changelogs(s) being refreshed in the tracker file. + * This call also acts as a cancellation point for the consumer. + */ +ssize_t +gf_changelog_scan () +{ + int ret = 0; + int tracker_fd = 0; + size_t len = 0; + size_t off = 0; + xlator_t *this = NULL; + size_t nr_entries = 0; + gf_changelog_t *gfc = NULL; + struct dirent *entryp = NULL; + struct dirent *result = NULL; + char buffer[PATH_MAX] = {0,}; + + this = THIS; + if (!this) + goto out; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + /** + * do we need to protect 'byebye' with locks? worst, the + * consumer would get notified during next scan(). + */ + if (byebye) { + errno = ECONNREFUSED; + goto out; + } + + errno = EINVAL; + + tracker_fd = gfc->gfc_fd; + + if (gf_ftruncate (tracker_fd, 0)) + goto out; + + len = offsetof(struct dirent, d_name) + + pathconf(gfc->gfc_processing_dir, _PC_NAME_MAX) + 1; + entryp = GF_CALLOC (1, len, + gf_changelog_mt_libgfchangelog_dirent_t); + if (!entryp) + goto out; + + rewinddir (gfc->gfc_dir); + while (1) { + ret = readdir_r (gfc->gfc_dir, entryp, &result); + if (ret || !result) + break; + + if ( !strcmp (basename (entryp->d_name), ".") + || !strcmp (basename (entryp->d_name), "..") ) + continue; + + nr_entries++; + + GF_CHANGELOG_FILL_BUFFER (gfc->gfc_processing_dir, + buffer, off, + strlen (gfc->gfc_processing_dir)); + GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer, + off, strlen (entryp->d_name)); + GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1); + + if (gf_changelog_write (tracker_fd, buffer, off) != off) { + gf_log (this->name, GF_LOG_ERROR, + "error writing changelog filename" + " to tracker file"); + break; + } + off = 0; + } + + GF_FREE (entryp); + + if (!result) { + if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1) + return nr_entries; + } + out: + return -1; +} + +/** + * @API + * gf_changelog_register() - register a client for updates. + */ +int +gf_changelog_register (char *brick_path, char *scratch_dir, + char *log_file, int log_level, int max_reconnects) +{ + int i = 0; + int ret = -1; + int errn = 0; + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + + this = THIS; + if (!this->ctx) + goto out; + + errno = ENOMEM; + + gfc = GF_CALLOC (1, sizeof (*gfc), + gf_changelog_mt_libgfchangelog_t); + if (!gfc) + goto out; + + gfc->this = this; + + gfc->gfc_dir = NULL; + gfc->gfc_fd = gfc->gfc_sockfd = -1; + + gfc->gfc_working_dir = realpath (scratch_dir, NULL); + if (!gfc->gfc_working_dir) { + errn = errno; + goto cleanup; + } + + ret = gf_changelog_open_dirs (gfc); + if (ret) { + errn = errno; + gf_log (this->name, GF_LOG_ERROR, + "could not create entries in scratch dir"); + goto cleanup; + } + + /* passing ident as NULL means to use default ident for syslog */ + if (gf_log_init (this->ctx, log_file, NULL)) + goto cleanup; + + gf_log_set_loglevel ((log_level == -1) ? GF_LOG_INFO : + log_level); + + gfc->gfc_connretries = (max_reconnects <= 0) ? 1 : max_reconnects; + (void) strncpy (gfc->gfc_brickpath, brick_path, PATH_MAX); + + ret = gf_changelog_notification_init (this, gfc); + if (ret) { + errn = errno; + goto cleanup; + } + + ret = gf_thread_create (&gfc->gfc_changelog_processor, + NULL, gf_changelog_process, gfc); + if (ret) { + errn = errno; + gf_log (this->name, GF_LOG_ERROR, + "error creating changelog processor thread" + " new changes won't be recorded!!!"); + goto cleanup; + } + + for (; i < 256; i++) { + gfc->rfc3986[i] = + (isalnum(i) || i == '~' || + i == '-' || i == '.' || i == '_') ? i : 0; + } + + ret = 0; + this->private = gfc; + + goto out; + + cleanup: + gf_changelog_cleanup (gfc); + GF_FREE (gfc); + this->private = NULL; + errno = errn; + + out: + return ret; +} diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am new file mode 100644 index 000000000..e85031ad4 --- /dev/null +++ b/xlators/features/changelog/src/Makefile.am @@ -0,0 +1,19 @@ +xlator_LTLIBRARIES = changelog.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \ + changelog-misc.h changelog-encoders.h changelog-notifier.h + +changelog_la_LDFLAGS = -module -avoidversion + +changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \ + changelog-encoders.c changelog-notifier.c +changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 \ + -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -nostartfiles -DDATADIR=\"$(localstatedir)\" + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c new file mode 100644 index 000000000..553eec85c --- /dev/null +++ b/xlators/features/changelog/src/changelog-encoders.c @@ -0,0 +1,176 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "changelog-encoders.h" + +size_t +entry_fn (void *data, char *buffer, gf_boolean_t encode) +{ + char *tmpbuf = NULL; + size_t bufsz = 0; + struct changelog_entry_fields *ce = NULL; + + ce = (struct changelog_entry_fields *) data; + + if (encode) { + tmpbuf = uuid_utoa (ce->cef_uuid); + CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf)); + } else { + CHANGELOG_FILL_BUFFER (buffer, bufsz, + ce->cef_uuid, sizeof (uuid_t)); + } + + CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1); + CHANGELOG_FILL_BUFFER (buffer, bufsz, + ce->cef_bname, strlen (ce->cef_bname)); + return bufsz; +} + +size_t +fop_fn (void *data, char *buffer, gf_boolean_t encode) +{ + char buf[10] = {0,}; + size_t bufsz = 0; + glusterfs_fop_t fop = 0; + + fop = *(glusterfs_fop_t *) data; + + if (encode) { + (void) snprintf (buf, sizeof (buf), "%d", fop); + CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf)); + } else + CHANGELOG_FILL_BUFFER (buffer, bufsz, &fop, sizeof (fop)); + + return bufsz; +} + +void +entry_free_fn (void *data) +{ + changelog_opt_t *co = data; + + if (!co) + return; + + GF_FREE (co->co_entry.cef_bname); +} + +/** + * try to write all data in one shot + */ + +static inline void +changelog_encode_write_xtra (changelog_log_data_t *cld, + char *buffer, size_t *off, gf_boolean_t encode) +{ + int i = 0; + size_t offset = 0; + void *data = NULL; + changelog_opt_t *co = NULL; + + offset = *off; + + co = (changelog_opt_t *) cld->cld_ptr; + + for (; i < cld->cld_xtra_records; i++, co++) { + CHANGELOG_FILL_BUFFER (buffer, offset, "\0", 1); + + switch (co->co_type) { + case CHANGELOG_OPT_REC_FOP: + data = &co->co_fop; + break; + case CHANGELOG_OPT_REC_ENTRY: + data = &co->co_entry; + break; + } + + if (co->co_convert) + offset += co->co_convert (data, + buffer + offset, encode); + else /* no coversion: write it out as it is */ + CHANGELOG_FILL_BUFFER (buffer, offset, + data, co->co_len); + } + + *off = offset; +} + +int +changelog_encode_ascii (xlator_t *this, changelog_log_data_t *cld) +{ + size_t off = 0; + size_t gfid_len = 0; + char *gfid_str = NULL; + char *buffer = NULL; + changelog_priv_t *priv = NULL; + + priv = this->private; + + gfid_str = uuid_utoa (cld->cld_gfid); + gfid_len = strlen (gfid_str); + + /* extra bytes for decorations */ + buffer = alloca (gfid_len + cld->cld_ptr_len + 10); + CHANGELOG_STORE_ASCII (priv, buffer, + off, gfid_str, gfid_len, cld); + + if (cld->cld_xtra_records) + changelog_encode_write_xtra (cld, buffer, &off, _gf_true); + + CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1); + + return changelog_write_change (priv, buffer, off); +} + +int +changelog_encode_binary (xlator_t *this, changelog_log_data_t *cld) +{ + size_t off = 0; + char *buffer = NULL; + changelog_priv_t *priv = NULL; + + priv = this->private; + + /* extra bytes for decorations */ + buffer = alloca (sizeof (uuid_t) + cld->cld_ptr_len + 10); + CHANGELOG_STORE_BINARY (priv, buffer, off, cld->cld_gfid, cld); + + if (cld->cld_xtra_records) + changelog_encode_write_xtra (cld, buffer, &off, _gf_false); + + CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1); + + return changelog_write_change (priv, buffer, off); +} + +static struct changelog_encoder +cb_encoder[] = { + [CHANGELOG_ENCODE_BINARY] = + { + .encoder = CHANGELOG_ENCODE_BINARY, + .encode = changelog_encode_binary, + }, + [CHANGELOG_ENCODE_ASCII] = + { + .encoder = CHANGELOG_ENCODE_ASCII, + .encode = changelog_encode_ascii, + }, +}; + +void +changelog_encode_change( changelog_priv_t * priv) +{ + priv->ce = &cb_encoder[priv->encode_mode]; +} diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h new file mode 100644 index 000000000..a3efbee05 --- /dev/null +++ b/xlators/features/changelog/src/changelog-encoders.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_ENCODERS_H +#define _CHANGELOG_ENCODERS_H + +#include "xlator.h" +#include "defaults.h" + +#include "changelog-helpers.h" + +#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) do { \ + CHANGELOG_FILL_BUFFER (buffer, off, \ + priv->maps[cld->cld_type], 1); \ + CHANGELOG_FILL_BUFFER (buffer, \ + off, gfid, gfid_len); \ + } while (0) + +#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) do { \ + CHANGELOG_FILL_BUFFER (buffer, off, \ + priv->maps[cld->cld_type], 1); \ + CHANGELOG_FILL_BUFFER (buffer, \ + off, gfid, sizeof (uuid_t)); \ + } while (0) + +size_t +entry_fn (void *data, char *buffer, gf_boolean_t encode); +size_t +fop_fn (void *data, char *buffer, gf_boolean_t encode); +void +entry_free_fn (void *data); +int +changelog_encode_binary (xlator_t *, changelog_log_data_t *); +int +changelog_encode_ascii (xlator_t *, changelog_log_data_t *); +void +changelog_encode_change(changelog_priv_t *); + +#endif /* _CHANGELOG_ENCODERS_H */ diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c new file mode 100644 index 000000000..7ab0091b5 --- /dev/null +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -0,0 +1,693 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "logging.h" +#include "iobuf.h" + +#include "changelog-helpers.h" +#include "changelog-mem-types.h" + +#include "changelog-encoders.h" +#include <pthread.h> + +void +changelog_thread_cleanup (xlator_t *this, pthread_t thr_id) +{ + int ret = 0; + void *retval = NULL; + + /* send a cancel request to the thread */ + ret = pthread_cancel (thr_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "could not cancel thread (reason: %s)", + strerror (errno)); + goto out; + } + + ret = pthread_join (thr_id, &retval); + if (ret || (retval != PTHREAD_CANCELED)) { + gf_log (this->name, GF_LOG_ERROR, + "cancel request not adhered as expected" + " (reason: %s)", strerror (errno)); + } + + out: + return; +} + +inline void * +changelog_get_usable_buffer (changelog_local_t *local) +{ + changelog_log_data_t *cld = NULL; + + cld = &local->cld; + if (!cld->cld_iobuf) + return NULL; + + return cld->cld_iobuf->ptr; +} + +inline void +changelog_set_usable_record_and_length (changelog_local_t *local, + size_t len, int xr) +{ + changelog_log_data_t *cld = NULL; + + cld = &local->cld; + + cld->cld_ptr_len = len; + cld->cld_xtra_records = xr; +} + +void +changelog_local_cleanup (xlator_t *xl, changelog_local_t *local) +{ + int i = 0; + changelog_opt_t *co = NULL; + changelog_log_data_t *cld = NULL; + + if (!local) + return; + + cld = &local->cld; + + /* cleanup dynamic allocation for extra records */ + if (cld->cld_xtra_records) { + co = (changelog_opt_t *) cld->cld_ptr; + for (; i < cld->cld_xtra_records; i++, co++) + if (co->co_free) + co->co_free (co); + } + + CHANGELOG_IOBUF_UNREF (cld->cld_iobuf); + + if (local->inode) + inode_unref (local->inode); + + mem_put (local); +} + +inline int +changelog_write (int fd, char *buffer, size_t len) +{ + ssize_t size = 0; + size_t writen = 0; + + while (writen < len) { + size = write (fd, + buffer + writen, len - writen); + if (size <= 0) + break; + + writen += size; + } + + return (writen != len); +} + +static int +changelog_rollover_changelog (xlator_t *this, + changelog_priv_t *priv, unsigned long ts) +{ + int ret = -1; + int notify = 0; + char *bname = NULL; + char ofile[PATH_MAX] = {0,}; + char nfile[PATH_MAX] = {0,}; + + if (priv->changelog_fd != -1) { + close (priv->changelog_fd); + priv->changelog_fd = -1; + } + + (void) snprintf (ofile, PATH_MAX, + "%s/"CHANGELOG_FILE_NAME, priv->changelog_dir); + (void) snprintf (nfile, PATH_MAX, + "%s/"CHANGELOG_FILE_NAME".%lu", + priv->changelog_dir, ts); + + ret = rename (ofile, nfile); + if (!ret) + notify = 1; + + if (ret && (errno == ENOENT)) { + ret = 0; + } + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "error renaming %s -> %s (reason %s)", + ofile, nfile, strerror (errno)); + } + + if (notify) { + bname = basename (nfile); + gf_log (this->name, GF_LOG_DEBUG, "notifying: %s", bname); + ret = changelog_write (priv->wfd, bname, strlen (bname) + 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send file name to notify thread" + " (reason: %s)", strerror (errno)); + } + } + + return ret; +} + +int +changelog_open (xlator_t *this, + changelog_priv_t *priv) +{ + int fd = 0; + int ret = -1; + int flags = 0; + char buffer[1024] = {0,}; + char changelog_path[PATH_MAX] = {0,}; + + (void) snprintf (changelog_path, PATH_MAX, + "%s/"CHANGELOG_FILE_NAME, + priv->changelog_dir); + + flags |= (O_CREAT | O_RDWR); + if (priv->fsync_interval == 0) + flags |= O_SYNC; + + fd = open (changelog_path, flags, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "unable to open/create changelog file %s" + " (reason: %s). change-logging will be" + " inactive", changelog_path, strerror (errno)); + goto out; + } + + priv->changelog_fd = fd; + + (void) snprintf (buffer, 1024, CHANGELOG_HEADER, + CHANGELOG_VERSION_MAJOR, + CHANGELOG_VERSION_MINOR, + priv->ce->encoder); + ret = changelog_write_change (priv, buffer, strlen (buffer)); + if (ret) { + close (priv->changelog_fd); + priv->changelog_fd = -1; + goto out; + } + + ret = 0; + + out: + return ret; +} + +int +changelog_start_next_change (xlator_t *this, + changelog_priv_t *priv, + unsigned long ts, gf_boolean_t finale) +{ + int ret = -1; + + ret = changelog_rollover_changelog (this, priv, ts); + + if (!ret && !finale) + ret = changelog_open (this, priv); + + return ret; +} + +/** + * return the length of entry + */ +inline size_t +changelog_entry_length () +{ + return sizeof (changelog_log_data_t); +} + +int +changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last) +{ + struct timeval tv = {0,}; + + cld->cld_type = CHANGELOG_TYPE_ROLLOVER; + + if (gettimeofday (&tv, NULL)) + return -1; + + cld->cld_roll_time = (unsigned long) tv.tv_sec; + cld->cld_finale = is_last; + return 0; +} + +int +changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len) +{ + return changelog_write (priv->changelog_fd, buffer, len); +} + +inline int +changelog_handle_change (xlator_t *this, + changelog_priv_t *priv, changelog_log_data_t *cld) +{ + int ret = 0; + + if (CHANGELOG_TYPE_IS_ROLLOVER (cld->cld_type)) { + changelog_encode_change(priv); + ret = changelog_start_next_change (this, priv, + cld->cld_roll_time, + cld->cld_finale); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Problem rolling over changelog(s)"); + goto out; + } + + /** + * case when there is reconfigure done (disabling changelog) and there + * are still fops that have updates in prgress. + */ + if (priv->changelog_fd == -1) + return 0; + + if (CHANGELOG_TYPE_IS_FSYNC (cld->cld_type)) { + ret = fsync (priv->changelog_fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fsync failed (reason: %s)", + strerror (errno)); + } + goto out; + } + + ret = priv->ce->encode (this, cld); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "error writing changelog to disk"); + } + + out: + return ret; +} + +changelog_local_t * +changelog_local_init (xlator_t *this, inode_t *inode, + uuid_t gfid, int xtra_records, + gf_boolean_t update_flag) +{ + changelog_local_t *local = NULL; + struct iobuf *iobuf = NULL; + + /** + * We relax the presence of inode if @update_flag is true. + * The caller (implmentation of the fop) needs to be careful to + * not blindly use local->inode. + */ + if (!update_flag && !inode) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "inode needed for version checking !!!"); + goto out; + } + + if (xtra_records) { + iobuf = iobuf_get2 (this->ctx->iobuf_pool, + xtra_records * CHANGELOG_OPT_RECORD_LEN); + if (!iobuf) + goto out; + } + + local = mem_get0 (this->local_pool); + if (!local) { + CHANGELOG_IOBUF_UNREF (iobuf); + goto out; + } + + local->update_no_check = update_flag; + + uuid_copy (local->cld.cld_gfid, gfid); + + local->cld.cld_iobuf = iobuf; + local->cld.cld_xtra_records = 0; /* set by the caller */ + + if (inode) + local->inode = inode_ref (inode); + + out: + return local; +} + +int +changelog_forget (xlator_t *this, inode_t *inode) +{ + uint64_t ctx_addr = 0; + changelog_inode_ctx_t *ctx = NULL; + + inode_ctx_del (inode, this, &ctx_addr); + if (!ctx_addr) + return 0; + + ctx = (changelog_inode_ctx_t *) (long) ctx_addr; + GF_FREE (ctx); + + return 0; +} + +int +changelog_inject_single_event (xlator_t *this, + changelog_priv_t *priv, + changelog_log_data_t *cld) +{ + return priv->cd.dispatchfn (this, priv, priv->cd.cd_data, cld, NULL); +} + +/** + * TODO: these threads have many thing in common (wake up after + * a certain time etc..). move them into separate routine. + */ +void * +changelog_rollover (void *data) +{ + int ret = 0; + xlator_t *this = NULL; + struct timeval tv = {0,}; + changelog_log_data_t cld = {0,}; + changelog_time_slice_t *slice = NULL; + changelog_priv_t *priv = data; + + this = priv->cr.this; + slice = &priv->slice; + + while (1) { + tv.tv_sec = priv->rollover_time; + tv.tv_usec = 0; + + ret = select (0, NULL, NULL, NULL, &tv); + if (ret) + continue; + + ret = changelog_fill_rollover_data (&cld, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to fill rollover data"); + continue; + } + + LOCK (&priv->lock); + { + ret = changelog_inject_single_event (this, priv, &cld); + if (!ret) + SLICE_VERSION_UPDATE (slice); + } + UNLOCK (&priv->lock); + } + + return NULL; +} + +void * +changelog_fsync_thread (void *data) +{ + int ret = 0; + xlator_t *this = NULL; + struct timeval tv = {0,}; + changelog_log_data_t cld = {0,}; + changelog_priv_t *priv = data; + + this = priv->cf.this; + cld.cld_type = CHANGELOG_TYPE_FSYNC; + + while (1) { + tv.tv_sec = priv->fsync_interval; + tv.tv_usec = 0; + + ret = select (0, NULL, NULL, NULL, &tv); + if (ret) + continue; + + ret = changelog_inject_single_event (this, priv, &cld); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to inject fsync event"); + } + + return NULL; +} + +/* macros for inode/changelog version checks */ + +#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) do { \ + LOCK (&inode->lock); \ + { \ + LOCK (&priv->lock); \ + { \ + *iver = slice->changelog_version[type]; \ + } \ + UNLOCK (&priv->lock); \ + } \ + UNLOCK (&inode->lock); \ + } while (0) + +#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) do { \ + LOCK (&priv->lock); \ + { \ + upd = (ver == slice->changelog_version[type]) \ + ? _gf_false : _gf_true; \ + } \ + UNLOCK (&priv->lock); \ + } while (0) + +static int +__changelog_inode_ctx_set (xlator_t *this, + inode_t *inode, changelog_inode_ctx_t *ctx) +{ + uint64_t ctx_addr = (uint64_t) ctx; + return __inode_ctx_set (inode, this, &ctx_addr); +} + +/** + * one shot routine to get the address and the value of a inode version + * for a particular type. + */ +static changelog_inode_ctx_t * +__changelog_inode_ctx_get (xlator_t *this, + inode_t *inode, unsigned long **iver, + unsigned long *version, changelog_log_type type) +{ + int ret = 0; + uint64_t ctx_addr = 0; + changelog_inode_ctx_t *ctx = NULL; + + ret = __inode_ctx_get (inode, this, &ctx_addr); + if (ret < 0) + ctx_addr = 0; + if (ctx_addr != 0) { + ctx = (changelog_inode_ctx_t *) (long)ctx_addr; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_changelog_mt_inode_ctx_t); + if (!ctx) + goto out; + + ret = __changelog_inode_ctx_set (this, inode, ctx); + if (ret) { + GF_FREE (ctx); + ctx = NULL; + } + + out: + if (ctx && iver && version) { + *iver = CHANGELOG_INODE_VERSION_TYPE (ctx, type); + *version = **iver; + } + + return ctx; +} + +static changelog_inode_ctx_t * +changelog_inode_ctx_get (xlator_t *this, + inode_t *inode, unsigned long **iver, + unsigned long *version, changelog_log_type type) +{ + changelog_inode_ctx_t *ctx = NULL; + + LOCK (&inode->lock); + { + ctx = __changelog_inode_ctx_get (this, + inode, iver, version, type); + } + UNLOCK (&inode->lock); + + return ctx; +} + +/** + * This is the main update routine. Locking has been made granular so as to + * maximize parallelism of fops - I'll try to explain it below using execution + * timelines. + * + * Basically, the contention is between multiple execution threads of this + * routine and the roll-over thread. So, instead of having a big lock, we hold + * granular locks: inode->lock and priv->lock. Now I'll explain what happens + * when there is an update and a roll-over at just about the same time. + * NOTE: + * - the dispatcher itself synchronizes updates via it's own lock + * - the slice version in incremented by the roll-over thread + * + * Case 1: When the rollover thread wins before the inode version can be + * compared with the slice version. + * + * [updater] | [rollover] + * | + * | <SLICE: 1, 1, 1> + * <changelog_update> | + * <changelog_inode_ctx_get> | + * <CTX: 1, 1, 1> | + * | <dispatch-rollover-event> + * | LOCK (&priv->lock) + * | <SLICE_VERSION_UPDATE> + * | <SLICE: 2, 2, 2> + * | UNLOCK (&priv->lock) + * | + * LOCK (&priv->lock) | + * <INODE_VERSION_EQUALS_SLICE> | + * I: 1 <-> S: 2 | + * update: true | + * UNLOCK (&priv->lock) | + * | + * <if update == true> | + * <dispath-update-event> | + * <INODE_VERSION_UPDATE> | + * LOCK (&inode->lock) | + * LOCK (&priv->lock) | + * <CTX: 2, 1, 1> | + * UNLOCK (&priv->lock) | + * UNLOCK (&inode->lock) | + * + * Therefore, the change gets recorded in the next change (no lost change). If + * the slice version was ahead of the inode version (say I:1, S: 2), then + * anyway the comparison would result in a update (I: 1, S: 3). + * + * If the rollover time is too less, then there is another contention when the + * updater tries to bring up inode version to the slice version (this is also + * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE. + * + * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2> + * | + * | + * <dispath-update-event> | + * <INODE_VERSION_UPDATE> | + * LOCK (&inode->lock) | + * LOCK (&priv->lock) | + * <CTX: 2, 1, 1> | + * UNLOCK (&priv->lock) | + * UNLOCK (&inode->lock) | + * | <dispatch-rollover-event> + * | LOCK (&priv->lock) + * | <SLICE_VERSION_UPDATE> + * | <SLICE: 3, 3, 3> + * | UNLOCK (&priv->lock) + * + * + * Case 2: When the fop thread wins + * + * [updater] | [rollover] + * | + * | <SLICE: 1, 1, 1> + * <changelog_update> | + * <changelog_inode_ctx_get> | + * <CTX: 0, 0, 0> | + * | + * LOCK (&priv->lock) | + * <INODE_VERSION_EQUALS_SLICE> | + * I: 0 <-> S: 1 | + * update: true | + * UNLOCK (&priv->lock) | + * | <dispatch-rollover-event> + * | LOCK (&priv->lock) + * | <SLICE_VERSION_UPDATE> + * | <SLICE: 2, 2, 2> + * | UNLOCK (&priv->lock) + * <if update == true> | + * <dispath-update-event> | + * <INODE_VERSION_UPDATE> | + * LOCK (&inode->lock) | + * LOCK (&priv->lock) | + * <CTX: 2, 0, 0> | + * UNLOCK (&priv->lock) | + * UNLOCK (&inode->lock) | + * + * Here again, if the inode version was equal to the slice version (I: 1, S: 1) + * then there is no need to record an update (as the equality of the two version + * signifies an update was recorded in the current time slice). + */ +inline void +changelog_update (xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local, changelog_log_type type) +{ + int ret = 0; + unsigned long *iver = NULL; + unsigned long version = 0; + inode_t *inode = NULL; + changelog_time_slice_t *slice = NULL; + changelog_inode_ctx_t *ctx = NULL; + changelog_log_data_t *cld_0 = NULL; + changelog_log_data_t *cld_1 = NULL; + changelog_local_t *next_local = NULL; + gf_boolean_t need_upd = _gf_true; + + slice = &priv->slice; + + /** + * for fops that do not require inode version checking + */ + if (local->update_no_check) + goto update; + + inode = local->inode; + + ctx = changelog_inode_ctx_get (this, + inode, &iver, &version, type); + if (!ctx) + goto update; + + INODE_VERSION_EQUALS_SLICE (priv, version, slice, type, need_upd); + + update: + if (need_upd) { + cld_0 = &local->cld; + cld_0->cld_type = type; + + if ( (next_local = local->prev_entry) != NULL ) { + cld_1 = &next_local->cld; + cld_1->cld_type = type; + } + + ret = priv->cd.dispatchfn (this, priv, + priv->cd.cd_data, cld_0, cld_1); + + /** + * update after the dispatcher has successfully done + * it's job. + */ + if (!local->update_no_check && iver && !ret) + INODE_VERSION_UPDATE (priv, inode, iver, slice, type); + } + + return; +} diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h new file mode 100644 index 000000000..ad79636b0 --- /dev/null +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -0,0 +1,395 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_HELPERS_H +#define _CHANGELOG_HELPERS_H + +#include "locking.h" +#include "timer.h" +#include "pthread.h" +#include "iobuf.h" + +#include "changelog-misc.h" + +/** + * the changelog entry + */ +typedef struct changelog_log_data { + /* rollover related */ + unsigned long cld_roll_time; + + /* reopen changelog? */ + gf_boolean_t cld_finale; + + changelog_log_type cld_type; + + /** + * sincd gfid is _always_ a necessity, it's not a part + * of the iobuf. by doing this we do not add any overhead + * for data and metadata related fops. + */ + uuid_t cld_gfid; + + /** + * iobufs are used for optionals records: pargfid, path, + * write offsets etc.. It's the fop implementers job + * to allocate (iobuf_get() in the fop) and get unref'ed + * in the callback (CHANGELOG_STACK_UNWIND). + */ + struct iobuf *cld_iobuf; + +#define cld_ptr cld_iobuf->ptr + + /** + * after allocation you can point this to the length of + * usable data, but make sure it does not exceed the + * the size of the requested iobuf. + */ + size_t cld_iobuf_len; + +#define cld_ptr_len cld_iobuf_len + + /** + * number of optional records + */ + int cld_xtra_records; +} changelog_log_data_t; + +/** + * holder for dispatch function and private data + */ + +typedef struct changelog_priv changelog_priv_t; + +typedef struct changelog_dispatcher { + void *cd_data; + int (*dispatchfn) (xlator_t *, changelog_priv_t *, void *, + changelog_log_data_t *, changelog_log_data_t *); +} changelog_dispatcher_t; + +struct changelog_bootstrap { + changelog_mode_t mode; + int (*ctor) (xlator_t *, changelog_dispatcher_t *); + int (*dtor) (xlator_t *, changelog_dispatcher_t *); +}; + +struct changelog_encoder { + changelog_encoder_t encoder; + int (*encode) (xlator_t *, changelog_log_data_t *); +}; + + +/* xlator private */ + +typedef struct changelog_time_slice { + /** + * just in case we need nanosecond granularity some day. + * field is unused as of now (maybe we'd need it later). + */ + struct timeval tv_start; + + /** + * version of changelog file, incremented each time changes + * rollover. + */ + unsigned long changelog_version[CHANGELOG_MAX_TYPE]; +} changelog_time_slice_t; + +typedef struct changelog_rollover { + /* rollover thread */ + pthread_t rollover_th; + + xlator_t *this; +} changelog_rollover_t; + +typedef struct changelog_fsync { + /* fsync() thread */ + pthread_t fsync_th; + + xlator_t *this; +} changelog_fsync_t; + +# define CHANGELOG_MAX_CLIENTS 5 +typedef struct changelog_notify { + /* reader end of the pipe */ + int rfd; + + /* notifier thread */ + pthread_t notify_th; + + /* unique socket path */ + char sockpath[PATH_MAX]; + + int socket_fd; + + /** + * simple array of accept()'ed fds. Not scalable at all + * for large number of clients, but it's okay as we have + * a ahrd limit in this version (@CHANGELOG_MAX_CLIENTS). + */ + int client_fd[CHANGELOG_MAX_CLIENTS]; + + xlator_t *this; +} changelog_notify_t; + +struct changelog_priv { + gf_boolean_t active; + + /* to generate unique socket file per brick */ + char *changelog_brick; + + /* logging directory */ + char *changelog_dir; + + /* one file for all changelog types */ + int changelog_fd; + + gf_lock_t lock; + + /* writen end of the pipe */ + int wfd; + + /* rollover time */ + int32_t rollover_time; + + /* fsync() interval */ + int32_t fsync_interval; + + /* changelog type maps */ + const char *maps[CHANGELOG_MAX_TYPE]; + + /* time slicer */ + changelog_time_slice_t slice; + + /* context of the updater */ + changelog_dispatcher_t cd; + + /* context of the rollover thread */ + changelog_rollover_t cr; + + /* context of fsync thread */ + changelog_fsync_t cf; + + /* context of the notifier thread */ + changelog_notify_t cn; + + /* operation mode */ + changelog_mode_t op_mode; + + /* bootstrap routine for 'current' logger */ + struct changelog_bootstrap *cb; + + /* encoder mode */ + changelog_encoder_t encode_mode; + + /* encoder */ + struct changelog_encoder *ce; +}; + +struct changelog_local { + inode_t *inode; + gf_boolean_t update_no_check; + + changelog_log_data_t cld; + + /** + * ->prev_entry is used in cases when there needs to be + * additional changelog entry for the parent (eg. rename) + * It's analogous to ->next in single linked list world, + * but we call it as ->prev_entry... ha ha ha + */ + struct changelog_local *prev_entry; +}; + +typedef struct changelog_local changelog_local_t; + +/* inode version is stored in inode ctx */ +typedef struct changelog_inode_ctx { + unsigned long iversion[CHANGELOG_MAX_TYPE]; +} changelog_inode_ctx_t; + +#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type]) + +/** + * Optional Records: + * fops that need to save additional information request a array of + * @changelog_opt_t struct. The array is allocated via @iobufs. + */ +typedef enum { + CHANGELOG_OPT_REC_FOP, + CHANGELOG_OPT_REC_ENTRY, +} changelog_optional_rec_type_t; + +struct changelog_entry_fields { + uuid_t cef_uuid; + char *cef_bname; +}; + +typedef struct { + /** + * @co_covert can be used to do post-processing of the record before + * it's persisted to the CHANGELOG. If this is NULL, then the record + * is persisted as per it's in memory format. + */ + size_t (*co_convert) (void *data, char *buffer, gf_boolean_t encode); + + /* release routines */ + void (*co_free) (void *data); + + /* type of the field */ + changelog_optional_rec_type_t co_type; + + /** + * sizeof of the 'valid' field in the union. This field is not used if + * @co_convert is specified. + */ + size_t co_len; + + union { + glusterfs_fop_t co_fop; + struct changelog_entry_fields co_entry; + }; +} changelog_opt_t; + +#define CHANGELOG_OPT_RECORD_LEN sizeof (changelog_opt_t) + +/** + * helpers routines + */ + +void +changelog_thread_cleanup (xlator_t *this, pthread_t thr_id); +inline void * +changelog_get_usable_buffer (changelog_local_t *local); +inline void +changelog_set_usable_record_and_length (changelog_local_t *local, + size_t len, int xr); +void +changelog_local_cleanup (xlator_t *xl, changelog_local_t *local); +changelog_local_t * +changelog_local_init (xlator_t *this, inode_t *inode, uuid_t gfid, + int xtra_records, gf_boolean_t update_flag); +int +changelog_start_next_change (xlator_t *this, + changelog_priv_t *priv, + unsigned long ts, gf_boolean_t finale); +int +changelog_open (xlator_t *this, changelog_priv_t *priv); +int +changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last); +int +changelog_inject_single_event (xlator_t *this, + changelog_priv_t *priv, + changelog_log_data_t *cld); +inline size_t +changelog_entry_length (); +inline int +changelog_write (int fd, char *buffer, size_t len); +int +changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len); +inline int +changelog_handle_change (xlator_t *this, + changelog_priv_t *priv, changelog_log_data_t *cld); +inline void +changelog_update (xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local, changelog_log_type type); +void * +changelog_rollover (void *data); +void * +changelog_fsync_thread (void *data); +int +changelog_forget (xlator_t *this, inode_t *inode); + +/* macros */ + +#define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \ + changelog_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + if (frame) { \ + __local = frame->local; \ + __xl = frame->this; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT (fop, frame, params); \ + changelog_local_cleanup (__xl, __local); \ + if (__local && __local->prev_entry) \ + changelog_local_cleanup (__xl, \ + __local->prev_entry); \ + } while (0) + +#define CHANGELOG_IOBUF_REF(iobuf) do { \ + if (iobuf) \ + iobuf_ref (iobuf); \ + } while (0) + +#define CHANGELOG_IOBUF_UNREF(iobuf) do { \ + if (iobuf) \ + iobuf_unref (iobuf); \ + } while (0) + +#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) do { \ + memcpy (buffer + off, val, len); \ + off += len; \ + } while (0) + +#define SLICE_VERSION_UPDATE(slice) do { \ + int i = 0; \ + for (; i < CHANGELOG_MAX_TYPE; i++) { \ + slice->changelog_version[i]++; \ + } \ + } while (0) + +#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \ + co->co_convert = converter; \ + co->co_free = NULL; \ + co->co_type = CHANGELOG_OPT_REC_FOP; \ + co->co_fop = fop; \ + xlen += sizeof (fop); \ + } while (0) + +#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, \ + converter, freefn, xlen, label) \ + do { \ + co->co_convert = converter; \ + co->co_free = freefn; \ + co->co_type = CHANGELOG_OPT_REC_ENTRY; \ + uuid_copy (co->co_entry.cef_uuid, pargfid); \ + co->co_entry.cef_bname = gf_strdup(bname); \ + if (!co->co_entry.cef_bname) \ + goto label; \ + xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \ + } while (0) + +#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \ + local = changelog_local_init (this, inode, gfid, xrec, _gf_false) + +#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \ + local = changelog_local_init (this, inode, gfid, xrec, _gf_true) + +#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) do { \ + if (!priv->active) \ + goto label; \ + /* ignore rebalance process's activity. */ \ + if (frame->root->pid == GF_CLIENT_PID_DEFRAG) \ + goto label; \ + } while (0) + +/* ignore internal fops */ +#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(dict, label) do { \ + if (dict && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \ + goto label; \ + } while (0) + +#define CHANGELOG_COND_GOTO(priv, cond, label) do { \ + if (!priv->active || cond) \ + goto label; \ + } while (0) + +#endif /* _CHANGELOG_HELPERS_H */ diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h new file mode 100644 index 000000000..d72464eab --- /dev/null +++ b/xlators/features/changelog/src/changelog-mem-types.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_MEM_TYPES_H +#define _CHANGELOG_MEM_TYPES_H + +#include "mem-types.h" + +enum gf_changelog_mem_types { + gf_changelog_mt_priv_t = gf_common_mt_end + 1, + gf_changelog_mt_str_t = gf_common_mt_end + 2, + gf_changelog_mt_batch_t = gf_common_mt_end + 3, + gf_changelog_mt_rt_t = gf_common_mt_end + 4, + gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5, + gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 6, + gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 7, + gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 8, + gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 9, + gf_changelog_mt_end +}; + +#endif diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h new file mode 100644 index 000000000..0712a3771 --- /dev/null +++ b/xlators/features/changelog/src/changelog-misc.h @@ -0,0 +1,101 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_MISC_H +#define _CHANGELOG_MISC_H + +#include "glusterfs.h" +#include "common-utils.h" + +#define CHANGELOG_MAX_TYPE 3 +#define CHANGELOG_FILE_NAME "CHANGELOG" + +#define CHANGELOG_VERSION_MAJOR 1 +#define CHANGELOG_VERSION_MINOR 0 + +#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock" + +/** + * header starts with the version and the format of the changelog. + * 'version' not much of a use now. + */ +#define CHANGELOG_HEADER \ + "GlusterFS Changelog | version: v%d.%d | encoding : %d\n" + +#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \ + char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \ + md5_wrapper((unsigned char *) brick_path, \ + strlen(brick_path), \ + md5_sum); \ + (void) snprintf (sockpath, len, \ + CHANGELOG_UNIX_SOCK, md5_sum); \ + } while (0) + +/** + * ... used by libgfchangelog. + */ +#define CHANGELOG_GET_ENCODING(fd, buffer, len, enc, enc_len) do { \ + FILE *fp; \ + int fd_dup, maj, min; \ + \ + enc = -1; \ + fd_dup = dup (fd); \ + \ + if (fd_dup != -1) { \ + fp = fdopen (fd_dup, "r"); \ + if (fp) { \ + if (fgets (buffer, len, fp)) { \ + elen = strlen (buffer); \ + sscanf (buffer, \ + CHANGELOG_HEADER, \ + &maj, &min, &enc); \ + } \ + fclose (fp); \ + } else { \ + close (fd_dup); \ + } \ + } \ + } while (0) + +/** + * everything after 'CHANGELOG_TYPE_ENTRY' are internal types + * (ie. none of the fops trigger this type of event), hence + * CHANGELOG_MAX_TYPE = 3 + */ +typedef enum { + CHANGELOG_TYPE_DATA = 0, + CHANGELOG_TYPE_METADATA, + CHANGELOG_TYPE_ENTRY, + CHANGELOG_TYPE_ROLLOVER, + CHANGELOG_TYPE_FSYNC, +} changelog_log_type; + +/* operation modes - RT for now */ +typedef enum { + CHANGELOG_MODE_RT = 0, +} changelog_mode_t; + +/* encoder types */ + +typedef enum { + CHANGELOG_ENCODE_MIN = 0, + CHANGELOG_ENCODE_BINARY, + CHANGELOG_ENCODE_ASCII, + CHANGELOG_ENCODE_MAX, +} changelog_encoder_t; + +#define CHANGELOG_VALID_ENCODING(enc) \ + (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX) + +#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY) +#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER) +#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC) + +#endif /* _CHANGELOG_MISC_H */ diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c new file mode 100644 index 000000000..1f8b31253 --- /dev/null +++ b/xlators/features/changelog/src/changelog-notifier.c @@ -0,0 +1,314 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-notifier.h" + +#include <pthread.h> + +inline static void +changelog_notify_clear_fd (changelog_notify_t *cn, int i) +{ + cn->client_fd[i] = -1; +} + +inline static void +changelog_notify_save_fd (changelog_notify_t *cn, int i, int fd) +{ + cn->client_fd[i] = fd; +} + +static int +changelog_notify_insert_fd (xlator_t *this, changelog_notify_t *cn, int fd) +{ + int i = 0; + int ret = 0; + + for (; i < CHANGELOG_MAX_CLIENTS; i++) { + if (cn->client_fd[i] == -1) + break; + } + + if (i == CHANGELOG_MAX_CLIENTS) { + /** + * this case should not be hit as listen() would limit + * the number of completely established connections. + */ + gf_log (this->name, GF_LOG_WARNING, + "hit max client limit (%d)", CHANGELOG_MAX_CLIENTS); + ret = -1; + } + else + changelog_notify_save_fd (cn, i, fd); + + return ret; +} + +static void +changelog_notify_fill_rset (changelog_notify_t *cn, fd_set *rset, int *maxfd) +{ + int i = 0; + + FD_ZERO (rset); + + FD_SET (cn->socket_fd, rset); + *maxfd = cn->socket_fd; + + FD_SET (cn->rfd, rset); + *maxfd = max (*maxfd, cn->rfd); + + for (; i < CHANGELOG_MAX_CLIENTS; i++) { + if (cn->client_fd[i] != -1) { + FD_SET (cn->client_fd[i], rset); + *maxfd = max (*maxfd, cn->client_fd[i]); + } + } + + *maxfd = *maxfd + 1; +} + +static int +changelog_notify_client (changelog_notify_t *cn, char *path, ssize_t len) +{ + int i = 0; + int ret = 0; + + for (; i < CHANGELOG_MAX_CLIENTS; i++) { + if (cn->client_fd[i] == -1) + continue; + + if (changelog_write (cn->client_fd[i], + path, len)) { + ret = -1; + + close (cn->client_fd[i]); + changelog_notify_clear_fd (cn, i); + } + } + + return ret; +} + +static void +changelog_notifier_init (changelog_notify_t *cn) +{ + int i = 0; + + cn->socket_fd = -1; + + for (; i < CHANGELOG_MAX_CLIENTS; i++) { + changelog_notify_clear_fd (cn, i); + } +} + +static void +changelog_close_client_conn (changelog_notify_t *cn) +{ + int i = 0; + + for (; i < CHANGELOG_MAX_CLIENTS; i++) { + if (cn->client_fd[i] == -1) + continue; + + close (cn->client_fd[i]); + changelog_notify_clear_fd (cn, i); + } +} + +static void +changelog_notifier_cleanup (void *arg) +{ + changelog_notify_t *cn = NULL; + + cn = (changelog_notify_t *) arg; + + changelog_close_client_conn (cn); + + if (cn->socket_fd != -1) + close (cn->socket_fd); + + if (cn->rfd) + close (cn->rfd); + + if (unlink (cn->sockpath)) + gf_log ("", GF_LOG_WARNING, + "could not unlink changelog socket file" + " %s (reason: %s", cn->sockpath, strerror (errno)); +} + +void * +changelog_notifier (void *data) +{ + int i = 0; + int fd = 0; + int max_fd = 0; + int len = 0; + ssize_t readlen = 0; + xlator_t *this = NULL; + changelog_priv_t *priv = NULL; + changelog_notify_t *cn = NULL; + struct sockaddr_un local = {0,}; + char path[PATH_MAX] = {0,}; + char abspath[PATH_MAX] = {0,}; + + char buffer; + fd_set rset; + + priv = (changelog_priv_t *) data; + + cn = &priv->cn; + this = cn->this; + + pthread_cleanup_push (changelog_notifier_cleanup, cn); + + changelog_notifier_init (cn); + + cn->socket_fd = socket (AF_UNIX, SOCK_STREAM, 0); + if (cn->socket_fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "changelog socket error (reason: %s)", + strerror (errno)); + goto out; + } + + CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, + cn->sockpath, PATH_MAX); + if (unlink (cn->sockpath) < 0) { + if (errno != ENOENT) { + gf_log (this->name, GF_LOG_ERROR, + "Could not unlink changelog socket file (%s)" + " (reason: %s)", + CHANGELOG_UNIX_SOCK, strerror (errno)); + goto cleanup; + } + } + + local.sun_family = AF_UNIX; + strcpy (local.sun_path, cn->sockpath); + + len = strlen (local.sun_path) + sizeof (local.sun_family); + + /* bind to the unix domain socket */ + if (bind (cn->socket_fd, (struct sockaddr *) &local, len) < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not bind to changelog socket (reason: %s)", + strerror (errno)); + goto cleanup; + } + + /* listen for incoming connections */ + if (listen (cn->socket_fd, CHANGELOG_MAX_CLIENTS) < 0) { + gf_log (this->name, GF_LOG_ERROR, + "listen() error on changelog socket (reason: %s)", + strerror (errno)); + goto cleanup; + } + + /** + * simple select() on all to-be-read file descriptors. This method + * though old school works pretty well when you have a handfull of + * fd's to be watched (clients). + * + * Future TODO: move this to epoll based notification facility if + * number of clients increase. + */ + for (;;) { + changelog_notify_fill_rset (cn, &rset, &max_fd); + + if (select (max_fd, &rset, NULL, NULL, NULL) < 0) { + gf_log (this->name, GF_LOG_ERROR, + "select() returned -1 (reason: %s)", + strerror (errno)); + sleep (2); + continue; + } + + if (FD_ISSET (cn->socket_fd, &rset)) { + fd = accept (cn->socket_fd, NULL, NULL); + if (fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "accept error on changelog socket" + " (reason: %s)", strerror (errno)); + } else if (changelog_notify_insert_fd (this, cn, fd)) { + gf_log (this->name, GF_LOG_ERROR, + "hit max client limit"); + } + } + + if (FD_ISSET (cn->rfd, &rset)) { + /** + * read changelog filename and notify all connected + * clients. + */ + readlen = 0; + while (readlen < PATH_MAX) { + len = read (cn->rfd, &path[readlen++], 1); + if (len == -1) { + break; + } + + if (len == 0) { + gf_log (this->name, GF_LOG_ERROR, + "rollover thread sent EOF" + " on pipe - possibly a crash."); + /* be blunt and close all connections */ + pthread_exit(NULL); + } + + if (path[readlen - 1] == '\0') + break; + } + + /* should we close all client connections here too? */ + if (len < 0 || readlen == PATH_MAX) { + gf_log (this->name, GF_LOG_ERROR, + "Could not get pathname from rollover" + " thread or pathname too long"); + goto process_rest; + } + + (void) snprintf (abspath, PATH_MAX, + "%s/%s", priv->changelog_dir, path); + if (changelog_notify_client (cn, abspath, + strlen (abspath) + 1)) + gf_log (this->name, GF_LOG_ERROR, + "could not notify some clients with new" + " changelogs"); + } + + process_rest: + for (i = 0; i < CHANGELOG_MAX_CLIENTS; i++) { + if ( (fd = cn->client_fd[i]) == -1 ) + continue; + + if (FD_ISSET (fd, &rset)) { + /** + * the only data we accept from the client is a + * disconnect. Anything else is treated as bogus + * and is silently discarded (also warned!!!). + */ + if ( (readlen = read (fd, &buffer, 1)) <= 0 ) { + close (fd); + changelog_notify_clear_fd (cn, i); + } else { + /* silently discard data and log */ + gf_log (this->name, GF_LOG_WARNING, + "misbehaving changelog client"); + } + } + } + + } + + cleanup:; + pthread_cleanup_pop (1); + + out: + return NULL; +} diff --git a/xlators/features/changelog/src/changelog-notifier.h b/xlators/features/changelog/src/changelog-notifier.h new file mode 100644 index 000000000..55e728356 --- /dev/null +++ b/xlators/features/changelog/src/changelog-notifier.h @@ -0,0 +1,19 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_NOTIFIER_H +#define _CHANGELOG_NOTIFIER_H + +#include "changelog-helpers.h" + +void * +changelog_notifier (void *data); + +#endif diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c new file mode 100644 index 000000000..c147f68ca --- /dev/null +++ b/xlators/features/changelog/src/changelog-rt.c @@ -0,0 +1,72 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "logging.h" + +#include "changelog-rt.h" +#include "changelog-mem-types.h" + +int +changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd) +{ + changelog_rt_t *crt = NULL; + + crt = GF_CALLOC (1, sizeof (*crt), + gf_changelog_mt_rt_t); + if (!crt) + return -1; + + LOCK_INIT (&crt->lock); + + cd->cd_data = crt; + cd->dispatchfn = &changelog_rt_enqueue; + + return 0; +} + +int +changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd) +{ + changelog_rt_t *crt = NULL; + + crt = cd->cd_data; + + LOCK_DESTROY (&crt->lock); + GF_FREE (crt); + + return 0; +} + +int +changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch, + changelog_log_data_t *cld_0, changelog_log_data_t *cld_1) +{ + int ret = 0; + changelog_rt_t *crt = NULL; + + crt = (changelog_rt_t *) cbatch; + + LOCK (&crt->lock); + { + ret = changelog_handle_change (this, priv, cld_0); + if (!ret && cld_1) + ret = changelog_handle_change (this, priv, cld_1); + } + UNLOCK (&crt->lock); + + return ret; +} diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h new file mode 100644 index 000000000..1fc2bbc5b --- /dev/null +++ b/xlators/features/changelog/src/changelog-rt.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_RT_H +#define _CHANGELOG_RT_H + +#include "locking.h" +#include "timer.h" +#include "pthread.h" + +#include "changelog-helpers.h" + +/* unused as of now - may be you would need it later */ +typedef struct changelog_rt { + gf_lock_t lock; +} changelog_rt_t; + +int +changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd); +int +changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd); +int +changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch, + changelog_log_data_t *cld_0, changelog_log_data_t *cld_1); + +#endif /* _CHANGELOG_RT_H */ diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c new file mode 100644 index 000000000..cea0e8c70 --- /dev/null +++ b/xlators/features/changelog/src/changelog.c @@ -0,0 +1,1477 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "logging.h" +#include "iobuf.h" + +#include "changelog-rt.h" + +#include "changelog-encoders.h" +#include "changelog-mem-types.h" + +#include <pthread.h> + +#include "changelog-notifier.h" + +static struct changelog_bootstrap +cb_bootstrap[] = { + { + .mode = CHANGELOG_MODE_RT, + .ctor = changelog_rt_init, + .dtor = changelog_rt_fini, + }, +}; + +/* Entry operations - TYPE III */ + +/** + * entry operations do not undergo inode version checking. + */ + +/* {{{ */ + +/* rmdir */ + +int32_t +changelog_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (rmdir, frame, op_ret, op_errno, + preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, int xflags, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT_NOCHECK (this, frame->local, + NULL, loc->inode->gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_rmdir_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir, + loc, xflags, xdata); + return 0; +} + +/* unlink */ + +int32_t +changelog_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (unlink, frame, op_ret, op_errno, + preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, int xflags, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind); + + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_unlink_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink, + loc, xflags, xdata); + return 0; +} + +/* rename */ + +int32_t +changelog_rename_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *buf, struct iatt *preoldparent, + struct iatt *postoldparent, struct iatt *prenewparent, + struct iatt *postnewparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno, + buf, preoldparent, postoldparent, + prenewparent, postnewparent, xdata); + return 0; +} + + +int32_t +changelog_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + /* 3 == fop + oldloc + newloc */ + CHANGELOG_INIT_NOCHECK (this, frame->local, + NULL, oldloc->inode->gfid, 3); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, oldloc->pargfid, oldloc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + co++; + CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 3); + + wind: + STACK_WIND (frame, changelog_rename_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename, + oldloc, newloc, xdata); + return 0; +} + +/* link */ + +int32_t +changelog_link_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (link, frame, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_link (call_frame_t *frame, + xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind); + + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_link_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->link, + oldloc, newloc, xdata); + return 0; +} + +/* mkdir */ + +int32_t +changelog_mkdir_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (mkdir, frame, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = {0,}; + void *uuid_req = NULL; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get gfid from dict"); + goto wind; + } + uuid_copy (gfid, uuid_req); + + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_mkdir_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir, + loc, mode, umask, xdata); + return 0; +} + +/* symlink */ + +int32_t +changelog_symlink_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (symlink, frame, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_symlink (call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc, + mode_t umask, dict_t *xdata) +{ + int ret = -1; + size_t xtra_len = 0; + uuid_t gfid = {0,}; + void *uuid_req = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get gfid from dict"); + goto wind; + } + uuid_copy (gfid, uuid_req); + + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_symlink_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink, + linkname, loc, umask, xdata); + return 0; +} + +/* mknod */ + +int32_t +changelog_mknod_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (mknod, frame, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_mknod (call_frame_t *frame, + xlator_t *this, loc_t *loc, + mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = {0,}; + void *uuid_req = NULL; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get gfid from dict"); + goto wind; + } + uuid_copy (gfid, uuid_req); + + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_mknod_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod, + loc, mode, dev, umask, xdata); + return 0; +} + +/* creat */ + +int32_t +changelog_create_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct iatt *buf, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + CHANGELOG_STACK_UNWIND (create, frame, + op_ret, op_errno, fd, inode, + buf, preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = {0,}; + void *uuid_req = NULL; + changelog_opt_t *co = NULL; + changelog_priv_t *priv = NULL; + size_t xtra_len = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get gfid from dict"); + goto wind; + } + uuid_copy (gfid, uuid_req); + + /* init with two extra records */ + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, + entry_fn, entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + + wind: + STACK_WIND (frame, changelog_create_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, + loc, flags, mode, umask, fd, xdata); + return 0; +} + +/* }}} */ + + +/* Metadata modification fops - TYPE II */ + +/* {{{ */ + +/* {f}setattr */ + +int32_t +changelog_fsetattr_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preop_stbuf, + struct iatt *postop_stbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + + unwind: + CHANGELOG_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, + preop_stbuf, postop_stbuf, xdata); + + return 0; + + +} + +int32_t +changelog_fsetattr (call_frame_t *frame, + xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + fd->inode, fd->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_fsetattr_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr, + fd, stbuf, valid, xdata); + return 0; + + +} + +int32_t +changelog_setattr_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preop_stbuf, + struct iatt *postop_stbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + + unwind: + CHANGELOG_STACK_UNWIND (setattr, frame, op_ret, op_errno, + preop_stbuf, postop_stbuf, xdata); + + return 0; +} + +int32_t +changelog_setattr (call_frame_t *frame, + xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + loc->inode, loc->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_setattr_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr, + loc, stbuf, valid, xdata); + return 0; +} + +/* {f}removexattr */ + +int32_t +changelog_fremovexattr_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + + unwind: + CHANGELOG_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + fd->inode, fd->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_fremovexattr_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->fremovexattr, + fd, name, xdata); + return 0; +} + +int32_t +changelog_removexattr_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + + unwind: + CHANGELOG_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + loc->inode, loc->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_removexattr_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->removexattr, + loc, name, xdata); + return 0; +} + +/* {f}setxattr */ + +int32_t +changelog_setxattr_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + + unwind: + CHANGELOG_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_setxattr (call_frame_t *frame, + xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + loc->inode, loc->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_setxattr_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; +} + +int32_t +changelog_fsetxattr_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + + unwind: + CHANGELOG_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_fsetxattr (call_frame_t *frame, + xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + fd->inode, fd->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_fsetxattr_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, + fd, dict, flags, xdata); + return 0; +} + +/* }}} */ + + +/* Data modification fops - TYPE I */ + +/* {{{ */ + +/* {f}truncate() */ + +int32_t +changelog_truncate_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_DATA); + + unwind: + CHANGELOG_STACK_UNWIND (truncate, frame, + op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +changelog_truncate (call_frame_t *frame, + xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + loc->inode, loc->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_truncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, + loc, offset, xdata); + return 0; +} + +int32_t +changelog_ftruncate_cbk (call_frame_t *frame, + void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_DATA); + + unwind: + CHANGELOG_STACK_UNWIND (ftruncate, frame, + op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +changelog_ftruncate (call_frame_t *frame, + xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + fd->inode, fd->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_ftruncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, + fd, offset, xdata); + return 0; +} + +/* writev() */ + +int32_t +changelog_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO (priv, ((op_ret <= 0) || !local), unwind); + + changelog_update (this, priv, local, CHANGELOG_TYPE_DATA); + + unwind: + CHANGELOG_STACK_UNWIND (writev, frame, + op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +changelog_writev (call_frame_t *frame, + xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + CHANGELOG_INIT (this, frame->local, + fd->inode, fd->inode->gfid, 0); + + wind: + STACK_WIND (frame, changelog_writev_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->writev, fd, vector, + count, offset, flags, iobref, xdata); + return 0; +} + +/* }}} */ + +/** + * The + * - @init () + * - @fini () + * - @reconfigure () + * ... and helper routines + */ + +/** + * needed if there are more operation modes in the future. + */ +static void +changelog_assign_opmode (changelog_priv_t *priv, char *mode) +{ + if ( strncmp (mode, "realtime", 8) == 0 ) { + priv->op_mode = CHANGELOG_MODE_RT; + } +} + +static void +changelog_assign_encoding (changelog_priv_t *priv, char *enc) +{ + if ( strncmp (enc, "binary", 6) == 0 ) { + priv->encode_mode = CHANGELOG_ENCODE_BINARY; + } else if ( strncmp (enc, "ascii", 5) == 0 ) { + priv->encode_mode = CHANGELOG_ENCODE_ASCII; + } +} + +/* cleanup any helper threads that are running */ +static void +changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv) +{ + if (priv->cr.rollover_th) { + changelog_thread_cleanup (this, priv->cr.rollover_th); + priv->cr.rollover_th = 0; + } + + if (priv->cf.fsync_th) { + changelog_thread_cleanup (this, priv->cf.fsync_th); + priv->cf.fsync_th = 0; + } +} + +/* spawn helper thread; cleaning up in case of errors */ +static int +changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + priv->cr.this = this; + ret = gf_thread_create (&priv->cr.rollover_th, + NULL, changelog_rollover, priv); + if (ret) + goto out; + + if (priv->fsync_interval) { + priv->cf.this = this; + ret = gf_thread_create (&priv->cf.fsync_th, + NULL, changelog_fsync_thread, priv); + } + + if (ret) + changelog_cleanup_helper_threads (this, priv); + + out: + return ret; +} + +/* cleanup the notifier thread */ +static int +changelog_cleanup_notifier (xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + if (priv->cn.notify_th) { + changelog_thread_cleanup (this, priv->cn.notify_th); + priv->cn.notify_th = 0; + + ret = close (priv->wfd); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "error closing writer end of notifier pipe" + " (reason: %s)", strerror (errno)); + } + + return ret; +} + +/* spawn the notifier thread - nop if already running */ +static int +changelog_spawn_notifier (xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + int flags = 0; + int pipe_fd[2] = {0, 0}; + + if (priv->cn.notify_th) + goto out; /* notifier thread already running */ + + ret = pipe (pipe_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Cannot create pipe (reason: %s)", strerror (errno)); + goto out; + } + + /* writer is non-blocking */ + flags = fcntl (pipe_fd[1], F_GETFL); + flags |= O_NONBLOCK; + + ret = fcntl (pipe_fd[1], F_SETFL, flags); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set O_NONBLOCK flag"); + goto out; + } + + priv->wfd = pipe_fd[1]; + + priv->cn.this = this; + priv->cn.rfd = pipe_fd[0]; + + ret = gf_thread_create (&priv->cn.notify_th, + NULL, changelog_notifier, priv); + + out: + return ret; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_changelog_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, "Memory accounting" + " init failed"); + return ret; + } + + return ret; +} + +static int +changelog_init (xlator_t *this, changelog_priv_t *priv) +{ + int i = 0; + int ret = -1; + struct timeval tv = {0,}; + changelog_log_data_t cld = {0,}; + + ret = gettimeofday (&tv, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "gettimeofday() failure"); + goto out; + } + + priv->slice.tv_start = tv; + + priv->maps[CHANGELOG_TYPE_DATA] = "D "; + priv->maps[CHANGELOG_TYPE_METADATA] = "M "; + priv->maps[CHANGELOG_TYPE_ENTRY] = "E "; + + for (; i < CHANGELOG_MAX_TYPE; i++) { + /* start with version 1 */ + priv->slice.changelog_version[i] = 1; + } + + if (!priv->active) + return ret; + + /* spawn the notifier thread */ + ret = changelog_spawn_notifier (this, priv); + if (ret) + goto out; + + /** + * start with a fresh changelog file every time. this is done + * in case there was an encoding change. so... things are kept + * simple here. + */ + ret = changelog_fill_rollover_data (&cld, _gf_false); + if (ret) + goto out; + + LOCK (&priv->lock); + { + ret = changelog_inject_single_event (this, priv, &cld); + } + UNLOCK (&priv->lock); + + /* ... and finally spawn the helpers threads */ + ret = changelog_spawn_helper_threads (this, priv); + + out: + return ret; +} + +int +reconfigure (xlator_t *this, dict_t *options) +{ + int ret = 0; + char *tmp = NULL; + changelog_priv_t *priv = NULL; + gf_boolean_t active_earlier = _gf_true; + gf_boolean_t active_now = _gf_true; + changelog_time_slice_t *slice = NULL; + changelog_log_data_t cld = {0,}; + + priv = this->private; + if (!priv) + goto out; + + ret = -1; + active_earlier = priv->active; + + /* first stop the rollover and the fsync thread */ + changelog_cleanup_helper_threads (this, priv); + + GF_OPTION_RECONF ("changelog-dir", tmp, options, str, out); + if (!tmp) { + gf_log (this->name, GF_LOG_ERROR, + "\"changelog-dir\" option is not set"); + goto out; + } + + GF_FREE (priv->changelog_dir); + priv->changelog_dir = gf_strdup (tmp); + if (!priv->changelog_dir) + goto out; + + ret = mkdir_p (priv->changelog_dir, 0600, _gf_true); + if (ret) + goto out; + + GF_OPTION_RECONF ("changelog", active_now, options, bool, out); + + /** + * changelog_handle_change() handles changes that could possibly + * have been submit changes before changelog deactivation. + */ + if (!active_now) + priv->active = _gf_false; + + GF_OPTION_RECONF ("op-mode", tmp, options, str, out); + changelog_assign_opmode (priv, tmp); + + tmp = NULL; + + GF_OPTION_RECONF ("encoding", tmp, options, str, out); + changelog_assign_encoding (priv, tmp); + + GF_OPTION_RECONF ("rollover-time", + priv->rollover_time, options, int32, out); + GF_OPTION_RECONF ("fsync-interval", + priv->fsync_interval, options, int32, out); + + if (active_now || active_earlier) { + ret = changelog_fill_rollover_data (&cld, !active_now); + if (ret) + goto out; + + slice = &priv->slice; + + LOCK (&priv->lock); + { + ret = changelog_inject_single_event (this, priv, &cld); + if (!ret && active_now) + SLICE_VERSION_UPDATE (slice); + } + UNLOCK (&priv->lock); + + if (ret) + goto out; + + if (active_now) { + ret = changelog_spawn_notifier (this, priv); + if (!ret) + ret = changelog_spawn_helper_threads (this, + priv); + } else + ret = changelog_cleanup_notifier (this, priv); + } + + out: + if (ret) { + ret = changelog_cleanup_notifier (this, priv); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "changelog reconfigured"); + if (active_now) + priv->active = _gf_true; + } + + return ret; +} + +int32_t +init (xlator_t *this) +{ + int ret = -1; + char *tmp = NULL; + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("changelog", this, out); + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator needs a single subvolume"); + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_ERROR, + "dangling volume. please check volfile"); + goto out; + } + + priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t); + if (!priv) + goto out; + + this->local_pool = mem_pool_new (changelog_local_t, 64); + if (!this->local_pool) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create local memory pool"); + goto out; + } + + LOCK_INIT (&priv->lock); + + GF_OPTION_INIT ("changelog-brick", tmp, str, out); + if (!tmp) { + gf_log (this->name, GF_LOG_ERROR, + "\"changelog-brick\" option is not set"); + goto out; + } + + priv->changelog_brick = gf_strdup (tmp); + if (!priv->changelog_brick) + goto out; + tmp = NULL; + + GF_OPTION_INIT ("changelog-dir", tmp, str, out); + if (!tmp) { + gf_log (this->name, GF_LOG_ERROR, + "\"changelog-dir\" option is not set"); + goto out; + } + + priv->changelog_dir = gf_strdup (tmp); + if (!priv->changelog_dir) + goto out; + tmp = NULL; + + /** + * create the directory even if change-logging would be inactive + * so that consumers can _look_ into it (finding nothing...) + */ + ret = mkdir_p (priv->changelog_dir, 0600, _gf_true); + if (ret) + goto out; + + GF_OPTION_INIT ("changelog", priv->active, bool, out); + + GF_OPTION_INIT ("op-mode", tmp, str, out); + changelog_assign_opmode (priv, tmp); + + tmp = NULL; + + GF_OPTION_INIT ("encoding", tmp, str, out); + changelog_assign_encoding (priv, tmp); + + GF_OPTION_INIT ("rollover-time", priv->rollover_time, int32, out); + + GF_OPTION_INIT ("fsync-interval", priv->fsync_interval, int32, out); + + changelog_encode_change(priv); + + GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode); + priv->cb = &cb_bootstrap[priv->op_mode]; + + /* ... now bootstrap the logger */ + ret = priv->cb->ctor (this, &priv->cd); + if (ret) + goto out; + + priv->changelog_fd = -1; + ret = changelog_init (this, priv); + if (ret) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "changelog translator loaded"); + + out: + if (ret) { + if (this->local_pool) + mem_pool_destroy (this->local_pool); + if (priv->cb) { + ret = priv->cb->dtor (this, &priv->cd); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "error in cleanup during init()"); + } + GF_FREE (priv->changelog_brick); + GF_FREE (priv->changelog_dir); + GF_FREE (priv); + this->private = NULL; + } else + this->private = priv; + + return ret; +} + +void +fini (xlator_t *this) +{ + int ret = -1; + changelog_priv_t *priv = NULL; + + priv = this->private; + + if (priv) { + ret = priv->cb->dtor (this, &priv->cd); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "error in fini"); + mem_pool_destroy (this->local_pool); + GF_FREE (priv->changelog_brick); + GF_FREE (priv->changelog_dir); + GF_FREE (priv); + } + + this->private = NULL; + + return; +} + +struct xlator_fops fops = { + .mknod = changelog_mknod, + .mkdir = changelog_mkdir, + .create = changelog_create, + .symlink = changelog_symlink, + .writev = changelog_writev, + .truncate = changelog_truncate, + .ftruncate = changelog_ftruncate, + .link = changelog_link, + .rename = changelog_rename, + .unlink = changelog_unlink, + .rmdir = changelog_rmdir, + .setattr = changelog_setattr, + .fsetattr = changelog_fsetattr, + .setxattr = changelog_setxattr, + .fsetxattr = changelog_fsetxattr, + .removexattr = changelog_removexattr, + .fremovexattr = changelog_fremovexattr, +}; + +struct xlator_cbks cbks = { + .forget = changelog_forget, +}; + +struct volume_options options[] = { + {.key = {"changelog"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable change-logging" + }, + {.key = {"changelog-brick"}, + .type = GF_OPTION_TYPE_PATH, + .description = "brick path to generate unique socket file name." + " should be the export directory of the volume strictly." + }, + {.key = {"changelog-dir"}, + .type = GF_OPTION_TYPE_PATH, + .description = "directory for the changelog files" + }, + {.key = {"op-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "realtime", + .value = {"realtime"}, + .description = "operation mode - futuristic operation modes" + }, + {.key = {"encoding"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "ascii", + .value = {"binary", "ascii"}, + .description = "encoding type for changelogs" + }, + {.key = {"rollover-time"}, + .default_value = "60", + .type = GF_OPTION_TYPE_TIME, + .description = "time to switch to a new changelog file (in seconds)" + }, + {.key = {"fsync-interval"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "0", + .description = "do not open CHANGELOG file with O_SYNC mode." + " instead perform fsync() at specified intervals" + }, + {.key = {NULL} + }, +}; diff --git a/xlators/features/compress/Makefile.am b/xlators/features/compress/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/features/compress/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am new file mode 100644 index 000000000..4a64b52a9 --- /dev/null +++ b/xlators/features/compress/src/Makefile.am @@ -0,0 +1,17 @@ +xlator_LTLIBRARIES = cdc.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +noinst_HEADERS = cdc.h cdc-mem-types.h + +cdc_la_LDFLAGS = -module -avoidversion $(LIBZ_LIBS) + +cdc_la_SOURCES = cdc.c cdc-helper.c +cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ +-shared -nostartfiles $(LIBZ_CFLAGS) + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c new file mode 100644 index 000000000..54432ff45 --- /dev/null +++ b/xlators/features/compress/src/cdc-helper.c @@ -0,0 +1,547 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" + +#include "cdc.h" +#include "cdc-mem-types.h" + +#ifdef HAVE_LIB_Z +#include "zlib.h" +#endif + +#ifdef HAVE_LIB_Z +/* gzip header looks something like this + * (RFC 1950) + * + * +---+---+---+---+---+---+---+---+---+---+ + * |ID1|ID2|CM |FLG| MTIME |XFL|OS | + * +---+---+---+---+---+---+---+---+---+---+ + * + * Data is usually sent without this header i.e + * Data sent = <compressed-data> + trailer(8) + * The trailer contains the checksum. + * + * gzip_header is added only during debugging. + * Refer to the function cdc_dump_iovec_to_disk + */ +static const char gzip_header[10] = + { + '\037', '\213', Z_DEFLATED, 0, + 0, 0, 0, 0, + 0, GF_CDC_OS_ID + }; + +static int32_t +cdc_next_iovec (xlator_t *this, cdc_info_t *ci) +{ + int ret = -1; + + ci->ncount++; + /* check for iovec overflow -- should not happen */ + if (ci->ncount == MAX_IOVEC) { + gf_log (this->name, GF_LOG_ERROR, + "Zlib output buffer overflow" + " ->ncount (%d) | ->MAX_IOVEC (%d)", + ci->ncount, MAX_IOVEC); + goto out; + } + + ret = 0; + + out: + return ret; +} + +static void +cdc_put_long (unsigned char *string, unsigned long x) +{ + string[0] = (unsigned char) (x & 0xff); + string[1] = (unsigned char) ((x & 0xff00) >> 8); + string[2] = (unsigned char) ((x & 0xff0000) >> 16); + string[3] = (unsigned char) ((x & 0xff000000) >> 24); +} + +static unsigned long +cdc_get_long (unsigned char *buf) +{ + return ((unsigned long) buf[0]) + | (((unsigned long) buf[1]) << 8) + | (((unsigned long) buf[2]) << 16) + | (((unsigned long) buf[3]) << 24); +} + +static int32_t +cdc_init_gzip_trailer (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci) +{ + int ret = -1; + char *buf = NULL; + + ret = cdc_next_iovec (this, ci); + if (ret) + goto out; + + buf = CURR_VEC(ci).iov_base = + (char *) GF_CALLOC (1, GF_CDC_VALIDATION_SIZE, + gf_cdc_mt_gzip_trailer_t); + + if (!CURR_VEC(ci).iov_base) + goto out; + + CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE; + + cdc_put_long ((unsigned char *)&buf[0], ci->crc); + cdc_put_long ((unsigned char *)&buf[4], ci->stream.total_in); + + ret = 0; + + out: + return ret; +} + +static int32_t +cdc_alloc_iobuf_and_init_vec (xlator_t *this, + cdc_priv_t *priv, cdc_info_t *ci, + int size) +{ + int ret = -1; + int alloc_len = 0; + struct iobuf *iobuf = NULL; + + ret = cdc_next_iovec (this, ci); + if (ret) + goto out; + + alloc_len = size ? size : ci->buffer_size; + + iobuf = iobuf_get2 (this->ctx->iobuf_pool, alloc_len); + if (!iobuf) + goto out; + + ret = iobref_add (ci->iobref, iobuf); + if (ret) + goto out; + + /* Initialize this iovec */ + CURR_VEC(ci).iov_base = iobuf->ptr; + CURR_VEC(ci).iov_len = alloc_len; + + ret = 0; + + out: + return ret; +} + +static void +cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size) +{ + ci->stream.next_out = (unsigned char *) CURR_VEC(ci).iov_base; + ci->stream.avail_out = size ? size : ci->buffer_size; +} + +/* This routine is for testing and debugging only. + * Data written = header(10) + <compressed-data> + trailer(8) + * So each gzip dump file is at least 18 bytes in size. + */ +void +cdc_dump_iovec_to_disk (xlator_t *this, cdc_info_t *ci, const char *file) +{ + int i = 0; + int fd = 0; + size_t writen = 0; + size_t total_writen = 0; + + fd = open (file, O_WRONLY|O_CREAT|O_TRUNC, 0777 ); + if (fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Cannot open file: %s", file); + return; + } + + writen = write (fd, (char *) gzip_header, 10); + total_writen += writen; + for (i = 0; i < ci->ncount; i++) { + writen = write (fd, (char *) ci->vec[i].iov_base, ci->vec[i].iov_len); + total_writen += writen; + } + + gf_log (this->name, GF_LOG_DEBUG, + "dump'd %zu bytes to %s", total_writen, GF_CDC_DEBUG_DUMP_FILE ); + + close (fd); +} + +static int32_t +cdc_flush_libz_buffer (cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci, + int (*libz_func)(z_streamp, int), + int flush) +{ + int32_t ret = Z_OK; + int done = 0; + unsigned int deflate_len = 0; + + for (;;) { + deflate_len = ci->buffer_size - ci->stream.avail_out; + + if (deflate_len != 0) { + CURR_VEC(ci).iov_len = deflate_len; + + ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); + if (ret) { + ret = Z_MEM_ERROR; + break; + } + + /* Re-position Zlib output buffer */ + cdc_init_zlib_output_stream (priv, ci, 0); + } + + if (done) { + ci->ncount--; + break; + } + + ret = libz_func (&ci->stream, flush); + + if (ret == Z_BUF_ERROR) { + ret = Z_OK; + ci->ncount--; + break; + } + + done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END); + + if (ret != Z_OK && ret != Z_STREAM_END) + break; + } + + return ret; +} + +static int32_t +do_cdc_compress (struct iovec *vec, xlator_t *this, cdc_priv_t *priv, + cdc_info_t *ci) +{ + int ret = -1; + + /* Initialize defalte */ + ret = deflateInit2 (&ci->stream, priv->cdc_level, Z_DEFLATED, + priv->window_size, priv->mem_level, + Z_DEFAULT_STRATEGY); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "unable to init Zlib (retval: %d)", ret); + goto out; + } + + ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); + if (ret) + goto out; + + /* setup output buffer */ + cdc_init_zlib_output_stream (priv, ci, 0); + + /* setup input buffer */ + ci->stream.next_in = (unsigned char *) vec->iov_base; + ci->stream.avail_in = vec->iov_len; + + ci->crc = crc32 (ci->crc, (const Bytef *) vec->iov_base, vec->iov_len); + + gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d", + ci->crc, ci->stream.avail_in, ci->buffer_size); + + /* compress !! */ + while (ci->stream.avail_in != 0) { + if (ci->stream.avail_out == 0) { + + CURR_VEC(ci).iov_len = ci->buffer_size; + + ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); + if (ret) + break; + + /* Re-position Zlib output buffer */ + cdc_init_zlib_output_stream (priv, ci, 0); + } + + ret = deflate (&ci->stream, Z_NO_FLUSH); + if (ret != Z_OK) + break; + } + + out: + return ret; +} + +int32_t +cdc_compress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, + dict_t **xdata) +{ + int ret = -1; + int i = 0; + + ci->iobref = iobref_new (); + if (!ci->iobref) + goto out; + + if (!*xdata) { + *xdata = dict_new (); + if (!*xdata) { + gf_log (this->name, GF_LOG_ERROR, "Cannot allocate xdata" + " dict"); + goto out; + } + } + + /* data */ + for (i = 0; i < ci->count; i++) { + ret = do_cdc_compress (&ci->vector[i], this, priv, ci); + if (ret != Z_OK) + goto deflate_cleanup_out; + } + + /* flush zlib buffer */ + ret = cdc_flush_libz_buffer (priv, this, ci, deflate, Z_FINISH); + if (!(ret == Z_OK || ret == Z_STREAM_END)) { + gf_log (this->name, GF_LOG_ERROR, + "Compression Error: ret (%d)", ret); + ret = -1; + goto deflate_cleanup_out; + } + + /* trailer */ + ret = cdc_init_gzip_trailer (this, priv, ci); + if (ret) + goto deflate_cleanup_out; + + gf_log (this->name, GF_LOG_DEBUG, + "Compressed %ld to %ld bytes", + ci->stream.total_in, ci->stream.total_out); + + ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE; + + /* set deflated canary value for identification */ + ret = dict_set_int32 (*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1); + if (ret) { + /* Send uncompressed data if we can't _tell_ the client + * that deflated data is on it's way. So, we just log + * the faliure and continue as usual. + */ + gf_log (this->name, GF_LOG_ERROR, + "Data deflated, but could not set canary" + " value in dict for identification"); + } + + /* This is to be used in testing */ + if ( priv->debug ) { + cdc_dump_iovec_to_disk (this, ci, GF_CDC_DEBUG_DUMP_FILE ); + } + + deflate_cleanup_out: + (void) deflateEnd(&ci->stream); + + out: + return ret; +} + + +/* deflate content is checked by the presence of a canary + * value in the dict as the key + */ +static int32_t +cdc_check_content_for_deflate (dict_t *xdata) +{ + return dict_get (xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0; +} + +static unsigned long +cdc_extract_crc (char *trailer) +{ + return cdc_get_long ((unsigned char *) &trailer[0]); +} + +static unsigned long +cdc_extract_size (char *trailer) +{ + return cdc_get_long ((unsigned char *) &trailer[4]); +} + +static int32_t +cdc_validate_inflate (cdc_info_t *ci, unsigned long crc, + unsigned long len) +{ + return !((crc == ci->crc) + /* inflated length is hidden inside + * Zlib stream struct */ + && (len == ci->stream.total_out)); +} + +static int32_t +do_cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci) +{ + int ret = -1; + int i = 0; + int len = 0; + char *inflte = NULL; + char *trailer = NULL; + struct iovec vec = {0,}; + unsigned long computed_crc = 0; + unsigned long computed_len = 0; + + ret = inflateInit2 (&ci->stream, priv->window_size); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Zlib: Unable to initialize inflate"); + goto out; + } + + vec = THIS_VEC(ci, 0); + + trailer = (char *) (((char *) vec.iov_base) + vec.iov_len + - GF_CDC_VALIDATION_SIZE); + + /* CRC of uncompressed data */ + computed_crc = cdc_extract_crc (trailer); + + /* size of uncomrpessed data */ + computed_len = cdc_extract_size (trailer); + + gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d", + computed_crc, computed_len, ci->buffer_size); + + inflte = vec.iov_base ; + len = vec.iov_len - GF_CDC_VALIDATION_SIZE; + + /* allocate buffer of the original length of the data */ + ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); + if (ret) + goto out; + + /* setup output buffer */ + cdc_init_zlib_output_stream (priv, ci, 0); + + /* setup input buffer */ + ci->stream.next_in = (unsigned char *) inflte; + ci->stream.avail_in = len; + + while (ci->stream.avail_in != 0) { + if (ci->stream.avail_out == 0) { + CURR_VEC(ci).iov_len = ci->buffer_size; + + ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); + if (ret) + break; + + /* Re-position Zlib output buffer */ + cdc_init_zlib_output_stream (priv, ci, 0); + } + + ret = inflate (&ci->stream, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR) + break; + } + + /* flush zlib buffer */ + ret = cdc_flush_libz_buffer (priv, this, ci, inflate, Z_SYNC_FLUSH); + if (!(ret == Z_OK || ret == Z_STREAM_END)) { + gf_log (this->name, GF_LOG_ERROR, + "Decompression Error: ret (%d)", ret); + ret = -1; + goto out; + } + + /* compute CRC of the uncompresses data to check for + * correctness */ + + for (i = 0; i < ci->ncount; i++) { + ci->crc = crc32 (ci->crc, + (const Bytef *) ci->vec[i].iov_base, + ci->vec[i].iov_len); + } + + /* validate inflated data */ + ret = cdc_validate_inflate (ci, computed_crc, computed_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Checksum or length mismatched in inflated data"); + } + + out: + return ret; +} + +int32_t +cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, + dict_t *xdata) +{ + int32_t ret = -1; + + /* check for deflate content */ + if (!cdc_check_content_for_deflate (xdata)) { + gf_log (this->name, GF_LOG_DEBUG, + "Content not deflated, passing through ..."); + goto passthrough_out; + } + + ci->iobref = iobref_new (); + if (!ci->iobref) + goto passthrough_out; + + /* do we need to do this? can we assume that one iovec + * will hold per request data everytime? + * + * server/client protocol seems to deal with a single + * iovec even if op_ret > 1M. So, it looks ok to + * assume that a single iovec will contain all the + * data (This saves us a lot from finding the trailer + * and the data since it could have been split-up onto + * two adjacent iovec's. + * + * But, in case this translator is loaded above quick-read + * for some reason, then it's entirely possible that we get + * multiple iovec's... + * + * This case (handled below) is not tested. (by loading the + * xlator below quick-read) + */ + + /* @@ I_HOPE_THIS_IS_NEVER_HIT */ + if (ci->count > 1) { + gf_log (this->name, GF_LOG_WARNING, "unable to handle" + " multiple iovecs (%d in number)", ci->count); + goto inflate_cleanup_out; + /* TODO: coallate all iovecs in one */ + } + + ret = do_cdc_decompress (this, priv, ci); + if (ret) + goto inflate_cleanup_out; + + ci->nbytes = ci->stream.total_out; + + gf_log (this->name, GF_LOG_DEBUG, + "Inflated %ld to %ld bytes", + ci->stream.total_in, ci->stream.total_out); + + inflate_cleanup_out: + (void) inflateEnd (&ci->stream); + + passthrough_out: + return ret; +} + +#endif diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h new file mode 100644 index 000000000..efa008059 --- /dev/null +++ b/xlators/features/compress/src/cdc-mem-types.h @@ -0,0 +1,22 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CDC_MEM_TYPES_H +#define __CDC_MEM_TYPES_H + +#include "mem-types.h" + +enum gf_cdc_mem_types { + gf_cdc_mt_priv_t = gf_common_mt_end + 1, + gf_cdc_mt_vec_t = gf_common_mt_end + 2, + gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3, +}; + +#endif diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c new file mode 100644 index 000000000..eb7d87c56 --- /dev/null +++ b/xlators/features/compress/src/cdc.c @@ -0,0 +1,342 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <sys/uio.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "logging.h" + +#include "cdc.h" +#include "cdc-mem-types.h" + +static void +cdc_cleanup_iobref (cdc_info_t *ci) +{ + assert(ci->iobref != NULL); + iobref_clear (ci->iobref); +} + +int32_t +cdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) +{ + int ret = -1; + cdc_priv_t *priv = NULL; + cdc_info_t ci = {0,}; + + GF_VALIDATE_OR_GOTO ("cdc", this, default_out); + GF_VALIDATE_OR_GOTO (this->name, frame, default_out); + + priv = this->private; + + if (op_ret <= 0) + goto default_out; + + if ( (priv->min_file_size != 0) + && (op_ret < priv->min_file_size) ) + goto default_out; + + ci.count = count; + ci.ibytes = op_ret; + ci.vector = vector; + ci.buf = NULL; + ci.iobref = NULL; + ci.ncount = 0; + ci.crc = 0; + ci.buffer_size = GF_CDC_DEF_BUFFERSIZE; + +/* A readv compresses on the server side and decompresses on the client side + */ + if (priv->op_mode == GF_CDC_MODE_SERVER) { + ret = cdc_compress (this, priv, &ci, &xdata); + } else if (priv->op_mode == GF_CDC_MODE_CLIENT) { + ret = cdc_decompress (this, priv, &ci, xdata); + } else { + gf_log (this->name, GF_LOG_ERROR, + "Invalid operation mode (%d)", priv->op_mode); + } + + if (ret) + goto default_out; + + STACK_UNWIND_STRICT (readv, frame, ci.nbytes, op_errno, + ci.vec, ci.ncount, stbuf, iobref, + xdata); + cdc_cleanup_iobref (&ci); + return 0; + + default_out: + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, + vector, count, stbuf, iobref, xdata); + return 0; +} + +int32_t +cdc_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset, uint32_t flags, + dict_t *xdata) +{ + fop_readv_cbk_t cbk = NULL; + +#ifdef HAVE_LIB_Z + cbk = cdc_readv_cbk; +#else + cbk = default_readv_cbk; +#endif + STACK_WIND (frame, cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; +} + +int32_t +cdc_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +cdc_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset, + uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + int ret = -1; + cdc_priv_t *priv = NULL; + cdc_info_t ci = {0,}; + size_t isize = 0; + + GF_VALIDATE_OR_GOTO ("cdc", this, default_out); + GF_VALIDATE_OR_GOTO (this->name, frame, default_out); + + priv = this->private; + + isize = iov_length(vector, count); + + if (isize <= 0) + goto default_out; + + if ( (priv->min_file_size != 0) + && (isize < priv->min_file_size) ) + goto default_out; + + ci.count = count; + ci.ibytes = isize; + ci.vector = vector; + ci.buf = NULL; + ci.iobref = NULL; + ci.ncount = 0; + ci.crc = 0; + ci.buffer_size = GF_CDC_DEF_BUFFERSIZE; + +/* A writev compresses on the client side and decompresses on the server side + */ + if (priv->op_mode == GF_CDC_MODE_CLIENT) { + ret = cdc_compress (this, priv, &ci, &xdata); + } else if (priv->op_mode == GF_CDC_MODE_SERVER) { + ret = cdc_decompress (this, priv, &ci, xdata); + } else { + gf_log (this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", priv->op_mode); + } + + if (ret) + goto default_out; + + STACK_WIND (frame, + cdc_writev_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->writev, + fd, ci.vec, ci.ncount, offset, flags, + iobref, xdata); + + cdc_cleanup_iobref (&ci); + return 0; + + default_out: + STACK_WIND (frame, + cdc_writev_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset, flags, + iobref, xdata); + return 0; +} + +int32_t +init (xlator_t *this) +{ + int ret = -1; + char *temp_str = NULL; + cdc_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("cdc", this, err); + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "Need subvolume == 1"); + goto err; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Dangling volume. Check volfile"); + } + + priv = GF_CALLOC (1, sizeof (*priv), gf_cdc_mt_priv_t); + if (!priv) { + goto err; + } + + /* Check if debug mode is turned on */ + GF_OPTION_INIT ("debug", priv->debug, bool, err); + if( priv->debug ) { + gf_log (this->name, GF_LOG_DEBUG, "CDC debug option turned on"); + } + + /* Set Gzip Window Size */ + GF_OPTION_INIT ("window-size", priv->window_size, int32, err); + if ( (priv->window_size > GF_CDC_MAX_WINDOWSIZE) + || (priv->window_size < GF_CDC_DEF_WINDOWSIZE) ) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid gzip window size (%d), using default", + priv->window_size); + priv->window_size = GF_CDC_DEF_WINDOWSIZE; + } + + /* Set Gzip (De)Compression Level */ + GF_OPTION_INIT ("compression-level", priv->cdc_level, int32, err); + if ( ((priv->cdc_level < 1) || (priv->cdc_level > 9)) + && (priv->cdc_level != GF_CDC_DEF_COMPRESSION) ) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid gzip (de)compression level (%d)," + " using default", priv->cdc_level); + priv->cdc_level = GF_CDC_DEF_COMPRESSION; + } + + /* Set Gzip Memory Level */ + GF_OPTION_INIT ("mem-level", priv->mem_level, int32, err); + if ( (priv->mem_level < 1) || (priv->mem_level > 9) ) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid gzip memory level, using the default"); + priv->mem_level = GF_CDC_DEF_MEMLEVEL; + } + + /* Set min file size to enable compression */ + GF_OPTION_INIT ("min-size", priv->min_file_size, int32, err); + + /* Mode of operation - Server/Client */ + ret = dict_get_str (this->options, "mode", &temp_str); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Operation mode not specified !!"); + goto err; + } + + if (GF_CDC_MODE_IS_CLIENT (temp_str)) { + priv->op_mode = GF_CDC_MODE_CLIENT; + } else if (GF_CDC_MODE_IS_SERVER (temp_str)) { + priv->op_mode = GF_CDC_MODE_SERVER; + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Bogus operation mode (%s) specified", temp_str); + goto err; + } + + this->private = priv; + gf_log (this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",temp_str); + return 0; + + err: + if (priv) + GF_FREE (priv); + + return -1; +} + +void +fini (xlator_t *this) +{ + cdc_priv_t *priv = this->private; + + if (priv) + GF_FREE (priv); + this->private = NULL; + return; +} + +struct xlator_fops fops = { + .readv = cdc_readv, + .writev = cdc_writev, +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"window-size"}, + .default_value = "-15", + .type = GF_OPTION_TYPE_INT, + .description = "Size of the zlib history buffer." + }, + { .key = {"mem-level"}, + .default_value = "8", + .type = GF_OPTION_TYPE_INT, + .description = "Memory allocated for internal compression state.\ + 1 uses minimum memory but is slow and reduces \ + compression ratio; memLevel=9 uses maximum memory \ + for optimal speed. The default value is 8." + }, + { .key = {"compression-level"}, + .default_value = "-1", + .type = GF_OPTION_TYPE_INT, + .description = "Compression levels \ + 0 : no compression, 1 : best speed, \ + 9 : best compression, -1 : default compression " + }, + { .key = {"min-size"}, + .default_value = "0", + .type = GF_OPTION_TYPE_INT, + .description = "Data is compressed only when its size exceeds this." + }, + { .key = {"mode"}, + .value = {"server", "client"}, + .type = GF_OPTION_TYPE_STR, + .description = "Set on the basis of where the xlator is loaded." + }, + { .key = {"debug"}, + .default_value = "false", + .type = GF_OPTION_TYPE_BOOL, + .description = "This is used in testing. Will dump compressed data \ + to disk as a gzip file." + }, + { .key = {NULL} + }, +}; diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h new file mode 100644 index 000000000..71f4d2317 --- /dev/null +++ b/xlators/features/compress/src/cdc.h @@ -0,0 +1,107 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CDC_H +#define __CDC_H + +#ifdef HAVE_LIB_Z +#include "zlib.h" +#endif + +#include "xlator.h" + +#ifndef MAX_IOVEC +#define MAX_IOVEC 16 +#endif + +typedef struct cdc_priv { + int window_size; + int mem_level; + int cdc_level; + int min_file_size; + int op_mode; + gf_boolean_t debug; + gf_lock_t lock; +} cdc_priv_t; + +typedef struct cdc_info { + /* input bits */ + int count; + int32_t ibytes; + struct iovec *vector; + struct iatt *buf; + + /* output bits */ + int ncount; + int nbytes; + int buffer_size; + struct iovec vec[MAX_IOVEC]; + struct iobref *iobref; + + /* zlib bits */ +#ifdef HAVE_LIB_Z + z_stream stream; +#endif + unsigned long crc; +} cdc_info_t; + +#define NVEC(ci) (ci->ncount - 1) +#define CURR_VEC(ci) ci->vec[ci->ncount - 1] +#define THIS_VEC(ci, i) ci->vector[i] + +/* Gzip defaults */ +#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */ +#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */ + +#ifdef HAVE_LIB_Z +#define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION +#else +#define GF_CDC_DEF_COMPRESSION -1 +#endif + +#define GF_CDC_DEF_MEMLEVEL 8 +#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size + +/* Operation mode + * If xlator is loaded on client, readv decompresses and writev compresses + * If xlator is loaded on server, readv compresses and writev decompresses + */ +#define GF_CDC_MODE_CLIENT 0 +#define GF_CDC_MODE_SERVER 1 + +/* min size of data to do cmpression + * 0 == compress even 1byte + */ +#define GF_CDC_MIN_CHUNK_SIZE 0 + +#define GF_CDC_VALIDATION_SIZE 8 + +#define GF_CDC_OS_ID 0xFF +#define GF_CDC_DEFLATE_CANARY_VAL "deflate" +#define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz" + +#define GF_CDC_MODE_IS_CLIENT(m) \ + (strcmp (m, "client") == 0) + +#define GF_CDC_MODE_IS_SERVER(m) \ + (strcmp (m, "server") == 0) + +int32_t +cdc_compress (xlator_t *this, + cdc_priv_t *priv, + cdc_info_t *ci, + dict_t **xdata); +int32_t +cdc_decompress (xlator_t *this, + cdc_priv_t *priv, + cdc_info_t *ci, + dict_t *xdata); + +#endif diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am index cda5f0767..d1fda8b0a 100644 --- a/xlators/features/filter/src/Makefile.am +++ b/xlators/features/filter/src/Makefile.am @@ -1,13 +1,16 @@ xlator_LTLIBRARIES = filter.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features -filter_la_LDFLAGS = -module -avoidversion +filter_la_LDFLAGS = -module -avoid-version filter_la_SOURCES = filter.c filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +noinst_HEADERS = filter-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/filter/src/filter-mem-types.h new file mode 100644 index 000000000..47a17249b --- /dev/null +++ b/xlators/features/filter/src/filter-mem-types.h @@ -0,0 +1,20 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __FILTER_MEM_TYPES_H__ +#define __FILTER_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_filter_mem_types_ { + gf_filter_mt_gf_filter = gf_common_mt_end + 1, + gf_filter_mt_end +}; +#endif + diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c index 0f5e67fb0..1d4887b71 100644 --- a/xlators/features/filter/src/filter.c +++ b/xlators/features/filter/src/filter.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -30,6 +20,7 @@ #include "logging.h" #include "dict.h" #include "xlator.h" +#include "filter-mem-types.h" #define GF_FILTER_NOBODY_UID 65534 #define GF_FILTER_NOBODY_GID 65534 @@ -1355,6 +1346,25 @@ filter_removexattr (call_frame_t *frame, return 0; } +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_filter_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + int32_t init (xlator_t *this) { @@ -1384,7 +1394,7 @@ init (xlator_t *this) "dangling volume. check volfile "); } - filter = CALLOC (sizeof (*filter), 1); + filter = GF_CALLOC (sizeof (*filter), 1, gf_filter_mt_gf_filter); ERR_ABORT (filter); if (dict_get (this->options, "read-only")) { @@ -1419,11 +1429,11 @@ init (xlator_t *this) option_data = dict_get (this->options, "translate-uid"); value = strtok_r (option_data->data, ",", &tmp_str); while (value) { - dup_str = strdup (value); + dup_str = gf_strdup (value); input_value_str1 = strtok_r (dup_str, "=", &tmp_str1); if (input_value_str1) { /* Check for n-m */ - char *temp_string = strdup (input_value_str1); + char *temp_string = gf_strdup (input_value_str1); input_value_str2 = strtok_r (temp_string, "-", &tmp_str2); if (gf_string2int (input_value_str2, &input_value) != 0) { gf_log (this->name, GF_LOG_ERROR, @@ -1442,7 +1452,7 @@ init (xlator_t *this) } } filter->translate_input_uid[filter->translate_num_uid_entries][1] = input_value; - FREE (temp_string); + GF_FREE (temp_string); output_value_str = strtok_r (NULL, "=", &tmp_str1); if (output_value_str) { if (gf_string2int (output_value_str, &output_value) != 0) { @@ -1471,7 +1481,7 @@ init (xlator_t *this) if (filter->translate_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED) break; value = strtok_r (NULL, ",", &tmp_str); - FREE (dup_str); + GF_FREE (dup_str); } } @@ -1483,11 +1493,11 @@ init (xlator_t *this) option_data = dict_get (this->options, "translate-gid"); value = strtok_r (option_data->data, ",", &tmp_str); while (value) { - dup_str = strdup (value); + dup_str = gf_strdup (value); input_value_str1 = strtok_r (dup_str, "=", &tmp_str1); if (input_value_str1) { /* Check for n-m */ - char *temp_string = strdup (input_value_str1); + char *temp_string = gf_strdup (input_value_str1); input_value_str2 = strtok_r (temp_string, "-", &tmp_str2); if (gf_string2int (input_value_str2, &input_value) != 0) { gf_log (this->name, GF_LOG_ERROR, @@ -1506,7 +1516,7 @@ init (xlator_t *this) } } filter->translate_input_gid[filter->translate_num_gid_entries][1] = input_value; - FREE (temp_string); + GF_FREE (temp_string); output_value_str = strtok_r (NULL, "=", &tmp_str1); if (output_value_str) { if (gf_string2int (output_value_str, &output_value) != 0) { @@ -1536,7 +1546,7 @@ init (xlator_t *this) if (filter->translate_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED) break; value = strtok_r (NULL, ",", &tmp_str); - FREE (dup_str); + GF_FREE (dup_str); } } @@ -1547,7 +1557,7 @@ init (xlator_t *this) option_data = dict_get (this->options, "filter-uid"); value = strtok_r (option_data->data, ",", &tmp_str); while (value) { - dup_str = strdup (value); + dup_str = gf_strdup (value); /* Check for n-m */ input_value_str1 = strtok_r (dup_str, "-", &tmp_str1); if (gf_string2int (input_value_str1, &input_value) != 0) { @@ -1577,7 +1587,7 @@ init (xlator_t *this) if (filter->filter_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED) break; value = strtok_r (NULL, ",", &tmp_str); - FREE (dup_str); + GF_FREE (dup_str); } filter->partial_filter = 1; } @@ -1589,7 +1599,7 @@ init (xlator_t *this) option_data = dict_get (this->options, "filter-gid"); value = strtok_r (option_data->data, ",", &tmp_str); while (value) { - dup_str = strdup (value); + dup_str = gf_strdup (value); /* Check for n-m */ input_value_str1 = strtok_r (dup_str, "-", &tmp_str1); if (gf_string2int (input_value_str1, &input_value) != 0) { @@ -1619,7 +1629,7 @@ init (xlator_t *this) if (filter->filter_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED) break; value = strtok_r (NULL, ",", &tmp_str); - FREE (dup_str); + GF_FREE (dup_str); } gf_log (this->name, GF_LOG_ERROR, "this option is not supported currently.. exiting"); return -1; @@ -1660,7 +1670,7 @@ fini (xlator_t *this) { struct gf_filter *filter = this->private; - FREE (filter); + GF_FREE (filter); return; } @@ -1692,9 +1702,6 @@ struct xlator_fops fops = { .fsetattr = filter_fsetattr, }; -struct xlator_mops mops = { -}; - struct xlator_cbks cbks = { }; diff --git a/xlators/features/gfid-access/Makefile.am b/xlators/features/gfid-access/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/features/gfid-access/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am new file mode 100644 index 000000000..db53affaa --- /dev/null +++ b/xlators/features/gfid-access/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = gfid-access.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +gfid_access_la_LDFLAGS = -module -avoid-version + +gfid_access_la_SOURCES = gfid-access.c +gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = gfid-access.h gfid-access-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h new file mode 100644 index 000000000..168d67b43 --- /dev/null +++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h @@ -0,0 +1,23 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GFID_ACCESS_MEM_TYPES_H +#define _GFID_ACCESS_MEM_TYPES_H + +#include "mem-types.h" + +enum gf_changelog_mem_types { + gf_gfid_access_mt_priv_t = gf_common_mt_end + 1, + gf_gfid_access_mt_gfid_t, + gf_gfid_access_mt_end +}; + +#endif + diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c new file mode 100644 index 000000000..da0ba7e50 --- /dev/null +++ b/xlators/features/gfid-access/src/gfid-access.c @@ -0,0 +1,1172 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "gfid-access.h" +#include "inode.h" +#include "byte-order.h" + + + +void +ga_newfile_args_free (ga_newfile_args_t *args) +{ + if (!args) + goto out; + + GF_FREE (args->bname); + + if (S_ISLNK (args->st_mode) && args->args.symlink.linkpath) { + GF_FREE (args->args.symlink.linkpath); + args->args.symlink.linkpath = NULL; + } + + mem_put (args); +out: + return; +} + + +void +ga_heal_args_free (ga_heal_args_t *args) +{ + if (!args) + goto out; + + GF_FREE (args->bname); + + mem_put (args); +out: + return; +} + + +ga_newfile_args_t * +ga_newfile_parse_args (xlator_t *this, data_t *data) +{ + ga_newfile_args_t *args = NULL; + ga_private_t *priv = NULL; + int len = 0; + int blob_len = 0; + int min_len = 0; + void *blob = NULL; + + priv = this->private; + + blob = data->data; + blob_len = data->len; + + min_len = sizeof (args->uid) + sizeof (args->gid) + sizeof (args->gfid) + + sizeof (args->st_mode) + 2 + 2; + if (blob_len < min_len) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid length: Total length is less " + "than minimum length."); + goto err; + } + + args = mem_get0 (priv->newfile_args_pool); + if (args == NULL) + goto err; + + args->uid = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + + args->gid = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + + memcpy (args->gfid, blob, sizeof (args->gfid)); + blob += sizeof (args->gfid); + blob_len -= sizeof (args->gfid); + + args->st_mode = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + + len = strnlen (blob, blob_len); + if (len == blob_len) + if (len == blob_len) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. No null byte present.", + args->gfid); + goto err; + } + + args->bname = GF_CALLOC (1, (len + 1), gf_common_mt_char); + if (args->bname == NULL) + goto err; + + memcpy (args->bname, blob, (len + 1)); + blob += (len + 1); + blob_len -= (len + 1); + + if (S_ISDIR (args->st_mode)) { + if (blob_len < sizeof (uint32_t)) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mkdir.mode = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + + if (blob_len < sizeof (uint32_t)) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mkdir.umask = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + if (blob_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + } else if (S_ISLNK (args->st_mode)) { + len = strnlen (blob, blob_len); + if (len == blob_len) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.symlink.linkpath = GF_CALLOC (1, len + 1, + gf_common_mt_char); + if (args->args.symlink.linkpath == NULL) + goto err; + + memcpy (args->args.symlink.linkpath, blob, (len + 1)); + blob += (len + 1); + blob_len -= (len + 1); + } else { + if (blob_len < sizeof (uint32_t)) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mknod.mode = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + + if (blob_len < sizeof (uint32_t)) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mknod.rdev = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + + if (blob_len < sizeof (uint32_t)) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mknod.umask = ntoh32 (*(uint32_t *)blob); + blob += sizeof (uint32_t); + blob_len -= sizeof (uint32_t); + } + + if (blob_len) { + gf_log (this->name, GF_LOG_ERROR, + "gfid: %s. Invalid length", + args->gfid); + goto err; + } + + return args; + +err: + if (args) + ga_newfile_args_free (args); + + return NULL; +} + +ga_heal_args_t * +ga_heal_parse_args (xlator_t *this, data_t *data) +{ + ga_heal_args_t *args = NULL; + ga_private_t *priv = NULL; + void *blob = NULL; + int len = 0; + int blob_len = 0; + + blob = data->data; + blob_len = data->len; + + priv = this->private; + + /* bname should at least contain a character */ + if (blob_len < (sizeof (args->gfid) + 2)) + goto err; + + args = mem_get0 (priv->heal_args_pool); + if (!args) + goto err; + + memcpy (args->gfid, blob, sizeof (args->gfid)); + blob += sizeof (args->gfid); + blob_len -= sizeof (args->gfid); + + len = strnlen (blob, blob_len); + if (len == blob_len) + goto err; + + args->bname = GF_CALLOC (1, len + 1, gf_common_mt_char); + if (!args->bname) + goto err; + + memcpy (args->bname, blob, len); + blob_len -= (len + 1); + + if (blob_len) + goto err; + + return args; + +err: + if (args) + ga_heal_args_free (args); + + return NULL; +} + +static int32_t +ga_fill_tmp_loc (loc_t *loc, xlator_t *this, char *gfid, + char *bname, dict_t *xdata, loc_t *new_loc) +{ + int ret = -1; + uint64_t value = 0; + inode_t *parent = NULL; + + parent = loc->inode; + ret = inode_ctx_get (loc->inode, this, &value); + if (!ret) { + parent = (void *)value; + } + + /* parent itself should be looked up */ + uuid_copy (new_loc->pargfid, parent->gfid); + new_loc->parent = inode_ref (parent); + + new_loc->inode = inode_grep (parent->table, parent, bname); + if (!new_loc->inode) + new_loc->inode = inode_new (parent->table); + + loc_path (new_loc, bname); + new_loc->name = basename (new_loc->path); + + /* As GFID would not be set on the entry yet, lets not send entry + gfid in the request */ + /*uuid_copy (new_loc->gfid, (const unsigned char *)gfid); */ + + ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16); + if (ret < 0) + goto out; + + ret = 0; + +out: + return ret; +} + + + +static gf_boolean_t +__is_gfid_access_dir (uuid_t gfid) +{ + uuid_t aux_gfid; + + memset (aux_gfid, 0, 16); + aux_gfid[15] = GF_AUX_GFID; + + if (uuid_compare (gfid, aux_gfid) == 0) + return _gf_true; + + return _gf_false; +} + +int32_t +ga_forget (xlator_t *this, inode_t *inode) +{ + int ret = -1; + uint64_t value = 0; + inode_t *tmp_inode = NULL; + + ret = inode_ctx_del (inode, this, &value); + if (ret) + goto out; + + tmp_inode = (void *)value; + inode_unref (tmp_inode); + +out: + return 0; +} + + +static int +ga_heal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stat, dict_t *dict, + struct iatt *postparent) +{ + call_frame_t *orig_frame = NULL; + + orig_frame = frame->local; + frame->local = NULL; + + /* don't worry about inode linking and other stuff. They'll happen on + * the next lookup. + */ + STACK_DESTROY (frame->root); + + STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, dict); + + return 0; +} + +static int +ga_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + call_frame_t *orig_frame = NULL; + + orig_frame = frame->local; + frame->local = NULL; + + /* don't worry about inode linking and other stuff. They'll happen on + * the next lookup. + */ + STACK_DESTROY (frame->root); + + STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +ga_new_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data, + dict_t *xdata) +{ + int ret = -1; + ga_newfile_args_t *args = NULL; + loc_t tmp_loc = {0,}; + call_frame_t *new_frame = NULL; + mode_t mode = 0; + + args = ga_newfile_parse_args (this, data); + if (!args) + goto out; + + if (!xdata) + xdata = dict_new (); + + ret = ga_fill_tmp_loc (loc, this, args->gfid, + args->bname, xdata, &tmp_loc); + if (ret) + goto out; + + new_frame = copy_frame (frame); + if (!new_frame) + goto out; + new_frame->local = (void *)frame; + + new_frame->root->uid = args->uid; + new_frame->root->gid = args->gid; + + if (S_ISDIR (args->st_mode)) { + STACK_WIND (new_frame, ga_newentry_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, args->args.mkdir.mode, + args->args.mkdir.umask, xdata); + } else if (S_ISLNK (args->st_mode)) { + STACK_WIND (new_frame, ga_newentry_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, + args->args.symlink.linkpath, + &tmp_loc, 0, xdata); + } else { + /* use 07777 (4 7s) for considering the Sticky bits etc) */ + mode = (S_IFMT & args->st_mode) | + (07777 | args->args.mknod.mode);; + + STACK_WIND (new_frame, ga_newentry_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, + &tmp_loc, mode, + args->args.mknod.rdev, args->args.mknod.umask, + xdata); + } + + ret = 0; +out: + ga_newfile_args_free (args); + + return ret; +} + +int32_t +ga_heal_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data, + dict_t *xdata) +{ + int ret = -1; + ga_heal_args_t *args = NULL; + loc_t tmp_loc = {0,}; + call_frame_t *new_frame = NULL; + + args = ga_heal_parse_args (this, data); + if (!args) + goto out; + + if (!xdata) + xdata = dict_new (); + + ret = ga_fill_tmp_loc (loc, this, args->gfid, args->bname, + xdata, &tmp_loc); + if (ret) + goto out; + + new_frame = copy_frame (frame); + if (!new_frame) + goto out; + new_frame->local = (void *)frame; + + STACK_WIND (new_frame, ga_heal_cbk, FIRST_CHILD (this), + FIRST_CHILD(this)->fops->lookup, + &tmp_loc, xdata); + + ret = 0; +out: + if (args) + ga_heal_args_free (args); + + return ret; +} + +int32_t +ga_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + dict_t *xdata) +{ + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +ga_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + + data_t *data = NULL; + int op_errno = ENOMEM; + int ret = 0; + inode_t *unref = NULL; + + if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && + ((loc->parent && + __is_root_gfid (loc->parent->gfid)) || + __is_root_gfid (loc->pargfid))) { + op_errno = EPERM; + goto err; + } + + data = dict_get (dict, GF_FUSE_AUX_GFID_NEWFILE); + if (data) { + ret = ga_new_entry (frame, this, loc, data, xdata); + if (ret) + goto err; + return 0; + } + + data = dict_get (dict, GF_FUSE_AUX_GFID_HEAL); + if (data) { + ret = ga_heal_entry (frame, this, loc, data, xdata); + if (ret) + goto err; + return 0; + } + + //If the inode is a virtual inode change the inode otherwise perform + //the operation on same inode + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, ga_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + if (unref) + inode_unref (unref); + + return 0; +err: + STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata); + return 0; +} + + +int32_t +ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + int j = 0; + int i = 0; + int ret = 0; + uint64_t temp_ino = 0; + inode_t *cbk_inode = NULL; + inode_t *true_inode = NULL; + uuid_t random_gfid = {0,}; + + if (frame->local) + cbk_inode = frame->local; + else + cbk_inode = inode; + + frame->local = NULL; + if (op_ret) + goto unwind; + + if (!IA_ISDIR (buf->ia_type)) + goto unwind; + + /* need to send back a different inode for linking in itable */ + if (cbk_inode == inode) { + /* check if the inode is in the 'itable' or + if its just previously discover()'d inode */ + true_inode = inode_find (inode->table, buf->ia_gfid); + if (!true_inode) { + cbk_inode = inode_new (inode->table); + + if (!cbk_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + /* the inode is not present in itable, ie, the actual + path is not yet looked up. Use the current inode + itself for now */ + inode_ref (inode); + } else { + /* 'inode_ref()' has been done in inode_find() */ + inode = true_inode; + } + + ret = inode_ctx_put (cbk_inode, this, (uint64_t)inode); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set the inode ctx with" + "the actual inode"); + if (inode) + inode_unref (inode); + } + inode = NULL; + } + + if (!uuid_is_null (cbk_inode->gfid)) { + /* if the previous linked inode is used, use the + same gfid */ + uuid_copy (random_gfid, cbk_inode->gfid); + } else { + /* replace the buf->ia_gfid to a random gfid + for directory, for files, what we received is fine */ + uuid_generate (random_gfid); + } + + uuid_copy (buf->ia_gfid, random_gfid); + + for (i = 15; i > (15 - 8); i--) { + temp_ino += (uint64_t)(buf->ia_gfid[i]) << j; + j += 8; + } + buf->ia_ino = temp_ino; + +unwind: + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf, + xdata, postparent); + + return 0; +} + +int32_t +ga_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + ga_private_t *priv = NULL; + + /* if the entry in question is not 'root', + then follow the normal path */ + if (op_ret || !__is_root_gfid(buf->ia_gfid)) + goto unwind; + + priv = this->private; + + /* do we need to copy root stbuf everytime? */ + /* mostly yes, as we want to have the 'stat' info show latest + in every _cbk() */ + + /* keep the reference for root stat buf */ + priv->root_stbuf = *buf; + priv->gfiddir_stbuf = priv->root_stbuf; + priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID; + priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID; + +unwind: + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); + return 0; +} + +int32_t +ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + ga_private_t *priv = NULL; + int ret = -1; + uuid_t tmp_gfid = {0,}; + loc_t tmp_loc = {0,}; + uint64_t value = 0; + inode_t *inode = NULL; + inode_t *true_inode = NULL; + int32_t op_errno = ENOENT; + + /* if its discover(), no need for any action here */ + if (!loc->name) + goto wind; + + /* if its revalidate, and inode is not of type directory, + proceed with 'wind' */ + if (loc->inode && loc->inode->ia_type && + !IA_ISDIR (loc->inode->ia_type)) + goto wind; + + priv = this->private; + + /* need to check if the lookup is on virtual dir */ + if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && + ((loc->parent && __is_root_gfid (loc->parent->gfid)) || + __is_root_gfid (loc->pargfid))) { + /* this means, the query is on '/.gfid', return the fake stat, + and say success */ + + STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode, + &priv->gfiddir_stbuf, xdata, + &priv->root_stbuf); + return 0; + } + + /* now, check if the lookup() is on an existing entry, + but on gfid-path */ + if (!((loc->parent && __is_gfid_access_dir (loc->parent->gfid)) || + __is_gfid_access_dir (loc->pargfid))) + goto wind; + + /* make sure the 'basename' is actually a 'canonical-gfid', + otherwise, return error */ + ret = uuid_parse (loc->name, tmp_gfid); + if (ret) + goto err; + + /* if its fresh lookup, go ahead and send it down, if not, + for directory, we need indirection to actual dir inode */ + if (!(loc->inode && loc->inode->ia_type)) + goto discover; + + /* revalidate on directory */ + ret = inode_ctx_get (loc->inode, this, &value); + if (ret) + goto err; + + inode = (void *)value; + + /* valid inode, already looked up, work on that */ + if (inode->ia_type) + goto discover; + + /* check if the inode is in the 'itable' or + if its just previously discover()'d inode */ + true_inode = inode_find (loc->inode->table, tmp_gfid); + if (true_inode) { + /* time do another lookup and update the context + with proper inode */ + op_errno = ESTALE; + goto err; + } + +discover: + /* for the virtual entries, we don't need to send 'gfid-req' key, as + for these entries, we don't want to 'set' a new gfid */ + if (xdata) + dict_del (xdata, "gfid-req"); + + uuid_copy (tmp_loc.gfid, tmp_gfid); + + /* if revalidate, then we need to have the proper reference */ + if (inode) { + tmp_loc.inode = inode_ref (inode); + frame->local = loc->inode; + } else { + tmp_loc.inode = inode_ref (loc->inode); + } + + STACK_WIND (frame, ga_virtual_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata); + + inode_unref (tmp_loc.inode); + + return 0; + +wind: + /* used for all the normal lookup path */ + STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + + return 0; + +err: + STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, loc->inode, + &priv->gfiddir_stbuf, xdata, + &priv->root_stbuf); + return 0; +} + +int +ga_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + int op_errno = 0; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, + xdata); + + return 0; + +err: + STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, loc->inode, + NULL, NULL, NULL, xdata); + return 0; +} + + +int +ga_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + int op_errno = 0; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + STACK_WIND (frame, default_create_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, + loc, flags, mode, umask, fd, xdata); + return 0; +err: + STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, + NULL, NULL, NULL, NULL, xdata); + + return 0; + +} + +int +ga_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + int op_errno = 0; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + STACK_WIND (frame, default_symlink_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, + linkname, loc, umask, xdata); + return 0; +err: + STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, + NULL, NULL, NULL, xdata); + + return 0; +} + +int +ga_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + int op_errno = 0; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, + umask, xdata); + + return 0; +err: + STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, + NULL, NULL, NULL, xdata); + + return 0; +} + +int +ga_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + int op_errno = 0; + inode_t *unref = NULL; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, default_rmdir_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, + loc, flag, xdata); + if (unref) + inode_unref (unref); + + return 0; +err: + STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, + NULL, xdata); + + return 0; +} + +int +ga_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, + dict_t *xdata) +{ + int op_errno = 0; + inode_t *unref = NULL; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, default_unlink_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, + loc, xflag, xdata); + + if (unref) + inode_unref (unref); + + return 0; +err: + STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, + NULL, xdata); + + return 0; +} + +int +ga_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + int op_errno = 0; + inode_t *oldloc_unref = NULL; + inode_t *newloc_unref = NULL; + + GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err); + + GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref, + handle_newloc); + +handle_newloc: + GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind); + +wind: + STACK_WIND (frame, default_rename_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, + oldloc, newloc, xdata); + + if (oldloc_unref) + inode_unref (oldloc_unref); + + if (newloc_unref) + inode_unref (newloc_unref); + + return 0; +err: + STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, + NULL, NULL, NULL, NULL, xdata); + + return 0; +} + + +int +ga_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + int op_errno = 0; + inode_t *oldloc_unref = NULL; + inode_t *newloc_unref = NULL; + + GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err); + + GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref, + handle_newloc); + +handle_newloc: + GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind); + +wind: + STACK_WIND (frame, default_link_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + + if (oldloc_unref) + inode_unref (oldloc_unref); + + if (newloc_unref) + inode_unref (newloc_unref); + + return 0; +err: + STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, + NULL, NULL, NULL, xdata); + + return 0; +} + +int32_t +ga_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, dict_t *xdata) +{ + int op_errno = 0; + + GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + + /* also check if the loc->inode itself is virtual + inode, if yes, return with failure, mainly because we + can't handle all the readdirp and other things on it. */ + if (inode_ctx_get (loc->inode, this, NULL) == 0) { + op_errno = ENOTSUP; + goto err; + } + + STACK_WIND (frame, default_opendir_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, + loc, fd, xdata); + return 0; +err: + STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata); + + return 0; +} + +int32_t +ga_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + inode_t *unref = NULL; + + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + + if (unref) + inode_unref (unref); + + return 0; +} + +int32_t +ga_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + inode_t *unref = NULL; + + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + if (unref) + inode_unref (unref); + + return 0; +} + +int32_t +ga_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) +{ + inode_t *unref = NULL; + + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, + xdata); + if (unref) + inode_unref (unref); + + return 0; +} + +int32_t +ga_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + inode_t *unref = NULL; + + GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind); + +wind: + STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + if (unref) + inode_unref (unref); + + return 0; +} + + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_gfid_access_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, "Memory accounting" + " init failed"); + return ret; + } + + return ret; +} + +int32_t +init (xlator_t *this) +{ + ga_private_t *priv = NULL; + int ret = -1; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "not configured with exactly one child. exiting"); + goto out; + } + + /* This can be the top of graph in certain cases */ + if (!this->parents) { + gf_log (this->name, GF_LOG_DEBUG, + "dangling volume. check volfile "); + } + + /* TODO: define a mem-type structure */ + priv = GF_CALLOC (1, sizeof (*priv), gf_gfid_access_mt_priv_t); + if (!priv) + goto out; + + priv->newfile_args_pool = mem_pool_new (ga_newfile_args_t, 512); + if (!priv->newfile_args_pool) + goto out; + + priv->heal_args_pool = mem_pool_new (ga_heal_args_t, 512); + if (!priv->heal_args_pool) + goto out; + + this->private = priv; + + ret = 0; +out: + if (ret && priv) { + if (priv->newfile_args_pool) + mem_pool_destroy (priv->newfile_args_pool); + GF_FREE (priv); + } + + return ret; +} + +void +fini (xlator_t *this) +{ + ga_private_t *priv = NULL; + priv = this->private; + this->private = NULL; + + if (priv) { + if (priv->newfile_args_pool) + mem_pool_destroy (priv->newfile_args_pool); + if (priv->heal_args_pool) + mem_pool_destroy (priv->heal_args_pool); + GF_FREE (priv); + } + + return; +} + + +struct xlator_fops fops = { + .lookup = ga_lookup, + + /* entry fops */ + .mkdir = ga_mkdir, + .mknod = ga_mknod, + .create = ga_create, + .symlink = ga_symlink, + .link = ga_link, + .unlink = ga_unlink, + .rmdir = ga_rmdir, + .rename = ga_rename, + + /* handle any other directory operations here */ + .opendir = ga_opendir, + .stat = ga_stat, + .setattr = ga_setattr, + .getxattr = ga_getxattr, + .removexattr = ga_removexattr, + + /* special fop to handle more entry creations */ + .setxattr = ga_setxattr, +}; + +struct xlator_cbks cbks = { + .forget = ga_forget, +}; + +struct volume_options options[] = { + /* This translator doesn't take any options, or provide any options */ + { .key = {NULL} }, +}; diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h new file mode 100644 index 000000000..e13c9b724 --- /dev/null +++ b/xlators/features/gfid-access/src/gfid-access.h @@ -0,0 +1,128 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __GFID_ACCESS_H__ +#define __GFID_ACCESS_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "defaults.h" +#include "gfid-access-mem-types.h" + +#define UUID_CANONICAL_FORM_LEN 36 + +#define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile" +#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal" + +#define GF_GFID_KEY "GLUSTERFS_GFID" +#define GF_GFID_DIR ".gfid" +#define GF_AUX_GFID 0xd + +#define GFID_ACCESS_GET_VALID_DIR_INODE(x,l,unref,lbl) do { \ + int ret = 0; \ + uint64_t value = 0; \ + inode_t *tmp_inode = NULL; \ + \ + /* if its an entry operation, on the virtual */ \ + /* directory inode as parent, we need to handle */ \ + /* it properly */ \ + if (l->parent) { \ + ret = inode_ctx_get (l->parent, x, &value); \ + if (ret) \ + goto lbl; \ + tmp_inode = (inode_t *)value; \ + unref = inode_ref (tmp_inode); \ + l->parent = tmp_inode; \ + /* if parent is virtual, no need to handle */ \ + /* loc->inode */ \ + break; \ + } \ + \ + /* if its an inode operation, on the virtual */ \ + /* directory inode itself, we need to handle */ \ + /* it properly */ \ + if (l->inode) { \ + ret = inode_ctx_get (l->inode, x, &value); \ + if (ret) \ + goto lbl; \ + tmp_inode = (inode_t *)value; \ + unref = inode_ref (tmp_inode); \ + l->inode = tmp_inode; \ + } \ + \ + } while (0) + +#define GFID_ACCESS_ENTRY_OP_CHECK(loc,err,lbl) do { \ + /* need to check if the lookup is on virtual dir */ \ + if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && \ + ((loc->parent && \ + __is_root_gfid (loc->parent->gfid)) || \ + __is_root_gfid (loc->pargfid))) { \ + err = EEXIST; \ + goto lbl; \ + } \ + \ + /* now, check if the lookup() is on an existing */ \ + /* entry, but on gfid-path */ \ + if ((loc->parent && \ + __is_gfid_access_dir (loc->parent->gfid)) || \ + __is_gfid_access_dir (loc->pargfid)) { \ + err = EPERM; \ + goto lbl; \ + } \ + } while (0) + + +typedef struct { + unsigned int uid; + unsigned int gid; + char gfid[UUID_CANONICAL_FORM_LEN + 1]; + unsigned int st_mode; + char *bname; + + union { + struct _symlink_in { + char *linkpath; + } __attribute__ ((__packed__)) symlink; + + struct _mknod_in { + unsigned int mode; + unsigned int rdev; + unsigned int umask; + } __attribute__ ((__packed__)) mknod; + + struct _mkdir_in { + unsigned int mode; + unsigned int umask; + } __attribute__ ((__packed__)) mkdir; + } __attribute__ ((__packed__)) args; +} __attribute__((__packed__)) ga_newfile_args_t; + +typedef struct { + char gfid[UUID_CANONICAL_FORM_LEN + 1]; + char *bname; /* a null terminated basename */ +} __attribute__((__packed__)) ga_heal_args_t; + +struct ga_private { + /* root inode's stbuf */ + struct iatt root_stbuf; + struct iatt gfiddir_stbuf; + struct mem_pool *newfile_args_pool; + struct mem_pool *heal_args_pool; +}; +typedef struct ga_private ga_private_t; + +#endif /* __GFID_ACCESS_H__ */ diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/features/glupy/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md new file mode 100644 index 000000000..2d7b30ef6 --- /dev/null +++ b/xlators/features/glupy/doc/README.md @@ -0,0 +1,44 @@ +This is just the very start for a GlusterFS[1] meta-translator that will +allow translator code to be written in Python. It's based on the standard +Python embedding (not extending) techniques, plus a dash of the ctypes module. +The interface is a pretty minimal adaptation of the dispatches and callbacks +from the C API[2] to Python, as follows: + +* Dispatch functions and callbacks must be defined on an "xlator" class + derived from gluster.Translator so that they'll be auto-registered with + the C translator during initialization. + +* For each dispatch or callback function you want to intercept, you define a + Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator. + +* The arguments for each operation are different, so you'll need to refer to + the C API. GlusterFS-specific types are used (though only loc\_t is fully + defined so far) and type correctness is enforced by ctypes. + +* If you do intercept a dispatch function, it is your responsibility to call + xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass + the request to the next translator. If you do not intercept a function, it + will default the same way as for C (pass through to the same operation with + the same arguments on the first child translator). + +* If you intercept a callback function, it is your responsibility to call + xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back + to the caller. + +So far only the lookup and create operations are handled this way, to support +the "negative lookup" example. Now that the basic infrastructure is in place, +adding more functions should be very quick, though with that much boilerplate I +might pause to write a code generator. I also plan to add structure +definitions and interfaces for some of the utility functions in libglusterfs +(especially those having to do with inode and fd context) in the fairly near +future. Note that you can also use ctypes to get at anything not explicitly +exposed to Python already. + +_If you're coming here because of the Linux Journal article, please note that +the code has evolved since that was written. The version that matches the +article is here:_ + +https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a + +[1] http://www.gluster.org +[2] http://hekafs.org/dist/xlator_api_2.html diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING new file mode 100644 index 000000000..e05f17f49 --- /dev/null +++ b/xlators/features/glupy/doc/TESTING @@ -0,0 +1,9 @@ +Loading a translator written in Python using the glupy meta translator +------------------------------------------------------------------------------- +'test.vol' is a simple volfile with the debug-trace Python translator on top +of a brick. The volfile can be mounted using the following command. + +$ glusterfs --debug -f test.vol /path/to/mntpt + +If then file operations are performed on the newly mounted file system, log +output would be printed by the Python translator on the standard output. diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol new file mode 100644 index 000000000..0751a488c --- /dev/null +++ b/xlators/features/glupy/doc/test.vol @@ -0,0 +1,10 @@ +volume vol-posix + type storage/posix + option directory /path/to/brick +end-volume + +volume vol-glupy + type features/glupy + option module-name debug-trace + subvolumes vol-posix +end-volume diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am new file mode 100644 index 000000000..960862839 --- /dev/null +++ b/xlators/features/glupy/src/Makefile.am @@ -0,0 +1,20 @@ +xlator_LTLIBRARIES = glupy.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +glupydir = $(xlatordir)/glupy + +glupy_PYTHON = gluster.py negative.py helloworld.py debug-trace.py + +glupy_la_LDFLAGS = -module -avoid-version -shared -nostartfiles +glupy_la_SOURCES = glupy.c +glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + -lpthread -l$(BUILD_PYTHON_LIB) + +noinst_HEADERS = glupy.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC) + +AM_CFLAGS = -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/glupy/src/debug-trace.py b/xlators/features/glupy/src/debug-trace.py new file mode 100644 index 000000000..53e76546b --- /dev/null +++ b/xlators/features/glupy/src/debug-trace.py @@ -0,0 +1,774 @@ +import sys +import stat +from uuid import UUID +from time import strftime, localtime +from gluster import * +# This translator was written primarily to test the fop entry point definitions +# and structure definitions in 'gluster.py'. +# It is similar to the debug-trace translator, one of the already available +# translator types written in C, that logs the arguments passed to the fops and +# their corresponding cbk functions. + +dl.get_id.restype = c_long +dl.get_id.argtypes = [ POINTER(call_frame_t) ] + +dl.get_rootunique.restype = c_uint64 +dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ] + +def uuid2str (gfid): + return str(UUID(''.join(map("{0:02x}".format, gfid)))) + + +def st_mode_from_ia (prot, filetype): + st_mode = 0 + type_bit = 0 + prot_bit = 0 + + if filetype == IA_IFREG: + type_bit = stat.S_IFREG + elif filetype == IA_IFDIR: + type_bit = stat.S_IFDIR + elif filetype == IA_IFLNK: + type_bit = stat.S_IFLNK + elif filetype == IA_IFBLK: + type_bit = stat.S_IFBLK + elif filetype == IA_IFCHR: + type_bit = stat.S_IFCHR + elif filetype == IA_IFIFO: + type_bit = stat.S_IFIFO + elif filetype == IA_IFSOCK: + type_bit = stat.S_IFSOCK + elif filetype == IA_INVAL: + pass + + + if prot.suid: + prot_bit |= stat.S_ISUID + if prot.sgid: + prot_bit |= stat.S_ISGID + if prot.sticky: + prot_bit |= stat.S_ISVTX + + if prot.owner.read: + prot_bit |= stat.S_IRUSR + if prot.owner.write: + prot_bit |= stat.S_IWUSR + if prot.owner.execn: + prot_bit |= stat.S_IXUSR + + if prot.group.read: + prot_bit |= stat.S_IRGRP + if prot.group.write: + prot_bit |= stat.S_IWGRP + if prot.group.execn: + prot_bit |= stat.S_IXGRP + + if prot.other.read: + prot_bit |= stat.S_IROTH + if prot.other.write: + prot_bit |= stat.S_IWOTH + if prot.other.execn: + prot_bit |= stat.S_IXOTH + + st_mode = (type_bit | prot_bit) + + return st_mode + + +def trace_stat2str (buf): + gfid = uuid2str(buf.contents.ia_gfid) + mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type) + atime_buf = strftime("[%b %d %H:%M:%S]", + localtime(buf.contents.ia_atime)) + mtime_buf = strftime("[%b %d %H:%M:%S]", + localtime(buf.contents.ia_mtime)) + ctime_buf = strftime("[%b %d %H:%M:%S]", + localtime(buf.contents.ia_ctime)) + return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+ + "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+ + "ctime={10:s})").format(gfid, buf.contents.ia_no, mode, + buf.contents.ia_nlink, + buf.contents.ia_uid, + buf.contents.ia_gid, + buf.contents.ia_size, + buf.contents.ia_blocks, + atime_buf, mtime_buf, + ctime_buf) + +class xlator(Translator): + + def __init__(self, c_this): + Translator.__init__(self, c_this) + self.gfids = {} + + def lookup_fop(self, frame, this, loc, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.gfid) + print("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " + + "path={2:s}").format(unique, gfid, loc.contents.path) + self.gfids[key] = gfid + dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata) + return 0 + + def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, + inode, buf, xdata, postparent): + unique =dl.get_rootunique(frame) + key =dl.get_id(frame) + if op_ret == 0: + gfid = uuid2str(buf.contents.ia_gfid) + statstr = trace_stat2str(buf) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; *buf={3:s}; " + + "*postparent={4:s}").format(unique, gfid, + op_ret, statstr, + postparentstr) + else: + gfid = self.gfids[key] + print("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" + + " op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, + inode, buf, xdata, postparent) + return 0 + + def create_fop(self, frame, this, loc, flags, mode, umask, fd, + xdata): + unique = dl.get_rootunique(frame) + gfid = uuid2str(loc.contents.gfid) + print("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " + + "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " + + "umask=0{6:o}").format(unique, gfid, loc.contents.path, + fd, flags, mode, umask) + dl.wind_create(frame, POINTER(xlator_t)(), loc, flags,mode, + umask, fd, xdata) + return 0 + + def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd, + inode, buf, preparent, postparent, xdata): + unique = dl.get_rootunique(frame) + if op_ret >= 0: + gfid = uuid2str(inode.contents.gfid) + statstr = trace_stat2str(buf) + preparentstr = trace_stat2str(preparent) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" + + " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " + + "*preparent={5:s};" + + " *postparent={6:s}").format(unique, gfid, op_ret, + fd, statstr, + preparentstr, + postparentstr) + else: + print ("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " + + "op_errno={2:d}").format(unique, op_ret, op_errno) + dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd, + inode, buf, preparent, postparent, xdata) + return 0 + + def open_fop(self, frame, this, loc, flags, fd, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+ + "flags={3:d}; fd={4:s}").format(unique, gfid, + loc.contents.path, flags, + fd) + self.gfids[key] = gfid + dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata) + return 0 + + def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; " + "op_errno={3:d}; *fd={4:s}").format(unique, gfid, + op_ret, op_errno, fd) + del self.gfids[key] + dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd, + xdata) + return 0 + + def readv_fop(self, frame, this, fd, size, offset, flags, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+ + "fd={2:s}; size ={3:d}; offset={4:d}; " + + "flags=0{5:x}").format(unique, gfid, fd, size, offset, + flags) + self.gfids[key] = gfid + dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset, + flags, xdata) + return 0 + + def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector, + count, buf, iobref, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret >= 0: + statstr = trace_stat2str(buf) + print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+ + "op_ret={2:d}; *buf={3:s};").format(unique, gfid, + op_ret, + statstr) + + else: + print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_readv (frame, cookie, this, op_ret, op_errno, + vector, count, buf, iobref, xdata) + return 0 + + def writev_fop(self, frame, this, fd, vector, count, offset, flags, + iobref, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " + + "fd={2:s}; count={3:d}; offset={4:d}; " + + "flags=0{5:x}").format(unique, gfid, fd, count, offset, + flags) + self.gfids[key] = gfid + dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count, + offset, flags, iobref, xdata) + return 0 + + def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + if op_ret >= 0: + preopstr = trace_stat2str(prebuf) + postopstr = trace_stat2str(postbuf) + print("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " + + "*prebuf={2:s}; " + + "*postbuf={3:s}").format(unique, op_ret, preopstr, + postopstr) + else: + gfid = self.gfids[key] + print("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_writev (frame, cookie, this, op_ret, op_errno, + prebuf, postbuf, xdata) + return 0 + + def opendir_fop(self, frame, this, loc, fd, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+ + "fd={3:s}").format(unique, gfid, loc.contents.path, fd) + self.gfids[key] = gfid + dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata) + return 0 + + def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd, + xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+ + " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret, + op_errno, fd) + del self.gfids[key] + dl.unwind_opendir(frame, cookie, this, op_ret, op_errno, + fd, xdata) + return 0 + + def readdir_fop(self, frame, this, fd, size, offset, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " + + "size={3:d}; offset={4:d}").format(unique, gfid, fd, size, + offset) + self.gfids[key] = gfid + dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset, + xdata) + return 0 + + def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf, + xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+ + " op_errno={3:d}").format(unique, gfid, op_ret, op_errno) + del self.gfids[key] + dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf, + xdata) + return 0 + + def readdirp_fop(self, frame, this, fd, size, offset, dictionary): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+ + " size={3:d}; offset={4:d}").format(unique, gfid, fd, size, + offset) + self.gfids[key] = gfid + dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset, + dictionary) + return 0 + + def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf, + xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, gfid, + op_ret, op_errno) + del self.gfids[key] + dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf, + xdata) + return 0 + + def mkdir_fop(self, frame, this, loc, mode, umask, xdata): + unique = dl.get_rootunique(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " + + "mode={3:d}; umask=0{4:o}").format(unique, gfid, + loc.contents.path, mode, + umask) + dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask, + xdata) + return 0 + + def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf, + preparent, postparent, xdata): + unique = dl.get_rootunique(frame) + if op_ret == 0: + gfid = uuid2str(inode.contents.gfid) + statstr = trace_stat2str(buf) + preparentstr = trace_stat2str(preparent) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+ + "*postbuf={5:s} ").format(unique, gfid, op_ret, + statstr, + preparentstr, + postparentstr) + else: + print("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+ + "op_errno={2:d}").format(unique, op_ret, op_errno) + dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode, + buf, preparent, postparent, xdata) + return 0 + + def rmdir_fop(self, frame, this, loc, flags, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+ + "flags={3:d}").format(unique, gfid, loc.contents.path, + flags) + self.gfids[key] = gfid + dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata) + return 0 + + def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent, + postparent, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + preparentstr = trace_stat2str(preparent) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; *prebuf={3:s}; "+ + "*postbuf={4:s}").format(unique, gfid, op_ret, + preparentstr, + postparentstr) + else: + print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata) + return 0 + + def stat_fop(self, frame, this, loc, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " + + " path={2:s}").format(unique, gfid, loc.contents.path) + self.gfids[key] = gfid + dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata) + return 0 + + def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf, + xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + statstr = trace_stat2str(buf) + print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; *buf={3:s};").format(unique, + gfid, + op_ret, + statstr) + else: + print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_stat(frame, cookie, this, op_ret, op_errno, + buf, xdata) + return 0 + + def fstat_fop(self, frame, this, fd, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " + + "fd={2:s}").format(unique, gfid, fd) + self.gfids[key] = gfid + dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata) + return 0 + + def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf, + xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + statstr = trace_stat2str(buf) + print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+ + " op_ret={2:d}; *buf={3:s}").format(unique, + gfid, + op_ret, + statstr) + else: + print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+ + "op_ret={2:d}; op_errno={3:d}").format(unique. + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_fstat(frame, cookie, this, op_ret, op_errno, + buf, xdata) + return 0 + + def statfs_fop(self, frame, this, loc, xdata): + unique = dl.get_rootunique(frame) + if loc.contents.inode: + gfid = uuid2str(loc.contents.inode.contents.gfid) + else: + gfid = "0" + print("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+ + "path={2:s}").format(unique, gfid, loc.contents.path) + dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata) + return 0 + + def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf, + xdata): + unique = dl.get_rootunique(frame) + if op_ret == 0: + #TBD: print buf (pointer to an iovec type object) + print("GLUPY TRACE STATFS CBK {0:d}: "+ + "op_ret={1:d}").format(unique, op_ret) + else: + print("GLUPY TRACE STATFS CBK- {0:d}"+ + "op_ret={1:d}; op_errno={2:d}").format(unique, + op_ret, + op_errno) + dl.unwind_statfs(frame, cookie, this, op_ret, op_errno, + buf, xdata) + return 0 + + def getxattr_fop(self, frame, this, loc, name, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+ + " name={3:s}").format(unique, gfid, loc.contents.path, + name) + self.gfids[key]=gfid + dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata) + return 0 + + def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno, + dictionary, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}; "+ + " dictionary={4:s}").format(unique, gfid, op_ret, op_errno, + dictionary) + del self.gfids[key] + dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno, + dictionary, xdata) + return 0 + + def fgetxattr_fop(self, frame, this, fd, name, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+ + "name={3:s}").format(unique, gfid, fd, name) + self.gfids[key] = gfid + dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata) + return 0 + + def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, + dictionary, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d};"+ + " dictionary={4:s}").format(unique, gfid, op_ret, + op_errno, dictionary) + del self.gfids[key] + dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno, + dictionary, xdata) + return 0 + + def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+ + " flags={3:d}").format(unique, gfid, loc.contents.path, + flags) + self.gfids[key] = gfid + dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary, + flags, xdata) + return 0 + + def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, gfid, + op_ret, op_errno) + del self.gfids[key] + dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno, + xdata) + return 0 + + def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(fd.contents.inode.contents.gfid) + print("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+ + "flags={3:d}").format(unique, gfid, fd, flags) + self.gfids[key] = gfid + dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary, + flags, xdata) + return 0 + + def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, gfid, + op_ret, op_errno) + del self.gfids[key] + dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno, + xdata) + return 0 + + def removexattr_fop(self, frame, this, loc, name, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+ + "path={2:s}; name={3:s}").format(unique, gfid, + loc.contents.path, + name) + self.gfids[key] = gfid + dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name, + xdata) + return 0 + + def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno, + xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + print("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+ + " op_ret={2:d}; op_errno={3:d}").format(unique, gfid, + op_ret, op_errno) + del self.gfids[key] + dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno, + xdata) + return 0 + + def link_fop(self, frame, this, oldloc, newloc, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + if (newloc.contents.inode): + newgfid = uuid2str(newloc.contents.inode.contents.gfid) + else: + newgfid = "0" + oldgfid = uuid2str(oldloc.contents.inode.contents.gfid) + print("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+ + "newgfid={3:s};"+ + "newpath={4:s}").format(unique, oldgfid, + oldloc.contents.path, + newgfid, + newloc.contents.path) + self.gfids[key] = oldgfid + dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc, + xdata) + return 0 + + def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf, + preparent, postparent, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + statstr = trace_stat2str(buf) + preparentstr = trace_stat2str(preparent) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+ + "*stbuf={2:s}; *prebuf={3:s}; "+ + "*postbuf={4:s} ").format(unique, op_ret, statstr, + preparentstr, + postparentstr) + else: + print("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; "+ + "op_errno={3:d}").format(unique, gfid, + op_ret, op_errno) + del self.gfids[key] + dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode, + buf, preparent, postparent, xdata) + return 0 + + def unlink_fop(self, frame, this, loc, xflag, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+ + "flag={3:d}").format(unique, gfid, loc.contents.path, + xflag) + self.gfids[key] = gfid + dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag, + xdata) + return 0 + + def unlink_cbk(self, frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + preparentstr = trace_stat2str(preparent) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+ + "op_ret={2:d}; *prebuf={3:s}; "+ + "*postbuf={4:s} ").format(unique, gfid, op_ret, + preparentstr, + postparentstr) + else: + print("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+ + "op_ret={2:d}; "+ + "op_errno={3:d}").format(unique, gfid, op_ret, + op_errno) + del self.gfids[key] + dl.unwind_unlink(frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata) + return 0 + + def readlink_fop(self, frame, this, loc, size, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+ + " size={3:d}").format(unique, gfid, loc.contents.path, + size) + self.gfids[key] = gfid + dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size, + xdata) + return 0 + + def readlink_cbk(self, frame, cookie, this, op_ret, op_errno, + buf, stbuf, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + statstr = trace_stat2str(stbuf) + print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+ + " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+ + "*postbuf={5:s} ").format(unique, gfid, + op_ret, op_errno, + buf, statstr) + else: + print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+ + " op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf, + stbuf, xdata) + return 0 + + def symlink_fop(self, frame, this, linkpath, loc, umask, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = uuid2str(loc.contents.inode.contents.gfid) + print("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+ + "linkpath={2:s}; path={3:s};"+ + "umask=0{4:o}").format(unique, gfid, linkpath, + loc.contents.path, umask) + self.gfids[key] = gfid + dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc, + umask, xdata) + return 0 + + def symlink_cbk(self, frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata): + unique = dl.get_rootunique(frame) + key = dl.get_id(frame) + gfid = self.gfids[key] + if op_ret == 0: + statstr = trace_stat2str(buf) + preparentstr = trace_stat2str(preparent) + postparentstr = trace_stat2str(postparent) + print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+ + "*postparent={5:s}").format(unique, gfid, + op_ret, statstr, + preparentstr, + postparentstr) + else: + print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+ + "op_ret={2:d}; op_errno={3:d}").format(unique, + gfid, + op_ret, + op_errno) + del self.gfids[key] + dl.unwind_symlink(frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata) + return 0 diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c new file mode 100644 index 000000000..dc86c0071 --- /dev/null +++ b/xlators/features/glupy/src/glupy.c @@ -0,0 +1,2470 @@ +/* + Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <ctype.h> +#include <sys/uio.h> +#include <Python.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" +#include "defaults.h" + +#include "glupy.h" + +/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */ + +pthread_key_t gil_init_key; + +PyGILState_STATE +glupy_enter (void) +{ +#if 0 + if (!pthread_getspecific(gil_init_key)) { + PyEval_ReleaseLock(); + (void)pthread_setspecific(gil_init_key,(void *)1); + } +#endif + + return PyGILState_Ensure(); +} + +void +glupy_leave (PyGILState_STATE gstate) +{ + PyGILState_Release(gstate); +} + +/* FOP: LOOKUP */ + +int32_t +glupy_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_LOOKUP]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))( + frame, cookie, this, op_ret, op_errno, + inode, buf, xdata, postparent); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); + return 0; +} + +int32_t +glupy_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_LOOKUP]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))( + frame, this, loc, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} + +void +wind_lookup (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_lookup_cbk,xl,xl->fops->lookup,loc,xdata); +} + +void +unwind_lookup (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(lookup,frame,op_ret,op_errno, + inode,buf,xdata,postparent); +} + +void +set_lookup_fop (long py_this, fop_lookup_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_LOOKUP] = (long)fop; +} + +void +set_lookup_cbk (long py_this, fop_lookup_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_LOOKUP] = (long)cbk; +} + +/* FOP: CREATE */ + +int32_t +glupy_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_CREATE]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))( + frame, cookie, this, op_ret, op_errno, + fd, inode, buf, preparent, postparent, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +glupy_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_CREATE]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))( + frame, this, loc, flags, mode, umask, fd, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, + fd, xdata); + return 0; +} + +void +wind_create (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_create_cbk,xl, xl->fops->create, + loc, flags, mode, umask, fd, xdata); +} + +void +unwind_create (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); +} + +void +set_create_fop (long py_this, fop_create_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_CREATE] = (long)fop; +} + +void +set_create_cbk (long py_this, fop_create_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_CREATE] = (long)cbk; +} + +/* FOP: OPEN */ + +int32_t +glupy_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_OPEN]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))( + frame, cookie, this, op_ret, op_errno, + fd, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +glupy_open (call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_OPEN]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))( + frame, this, loc, flags, fd, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +void +wind_open (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_open_cbk, xl, xl->fops->open, loc, flags, + fd, xdata); +} + +void +unwind_open (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); +} + +void +set_open_fop (long py_this, fop_open_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + priv->fops[GLUPY_OPEN] = (long)fop; +} + +void +set_open_cbk (long py_this, fop_open_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + priv->cbks[GLUPY_OPEN] = (long)cbk; +} + +/* FOP: READV */ + +int32_t +glupy_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_READV]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))( + frame, cookie, this, op_ret, op_errno, + vector, count, stbuf, iobref, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, + count, stbuf, iobref, xdata); + return 0; +} + +int32_t +glupy_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_READV]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))( + frame, this, fd, size, offset, flags, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, + flags, xdata); + return 0; +} + +void +wind_readv (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size, + offset, flags, xdata); +} + +void +unwind_readv (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, + count, stbuf, iobref, xdata); +} + +void +set_readv_fop (long py_this, fop_readv_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + priv->fops[GLUPY_READV] = (long)fop; +} + +void +set_readv_cbk (long py_this, fop_readv_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + priv->cbks[GLUPY_READV] = (long)cbk; +} + +/* FOP: WRITEV */ + +int32_t +glupy_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_WRITEV]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))( + frame, cookie, this, op_ret, op_errno, + prebuf, postbuf, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +int32_t +glupy_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_WRITEV]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))( + frame, this, fd, vector, count, offset, flags, + iobref, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, + offset, flags, iobref, xdata); + return 0; +} + +void +wind_writev (call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector, + count, offset, flags, iobref, xdata); +} + +void +unwind_writev (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, + postbuf, xdata); +} + +void +set_writev_fop (long py_this, fop_writev_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + priv->fops[GLUPY_WRITEV] = (long)fop; +} + +void +set_writev_cbk (long py_this, fop_writev_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + priv->cbks[GLUPY_WRITEV] = (long)cbk; +} + + +/* FOP: OPENDIR */ + +int32_t +glupy_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_OPENDIR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))( + frame, cookie, this, op_ret, op_errno, + fd, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +glupy_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_OPENDIR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))( + frame, this, loc, fd, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +void +wind_opendir (call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_opendir_cbk,xl,xl->fops->opendir,loc,fd,xdata); +} + +void +unwind_opendir (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(opendir,frame,op_ret,op_errno, + fd,xdata); +} + +void +set_opendir_fop (long py_this, fop_opendir_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_OPENDIR] = (long)fop; +} + +void +set_opendir_cbk (long py_this, fop_opendir_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_OPENDIR] = (long)cbk; +} + +/* FOP: READDIR */ + +int32_t +glupy_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_READDIR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))( + frame, cookie, this, op_ret, op_errno, + entries, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, + xdata); + return 0; +} + +int32_t +glupy_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_READDIR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))( + frame, this, fd, size, offset, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir,fd, size, offset, xdata); + return 0; +} + +void +wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_readdir_cbk,xl,xl->fops->readdir,fd,size,offset,xdata); +} + +void +unwind_readdir (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(readdir,frame,op_ret,op_errno, + entries, xdata); +} + +void +set_readdir_fop (long py_this, fop_readdir_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_READDIR] = (long)fop; +} + +void +set_readdir_cbk (long py_this, fop_readdir_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_READDIR] = (long)cbk; +} + + +/* FOP: READDIRP */ + +int32_t +glupy_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_READDIRP]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))( + frame, cookie, this, op_ret, op_errno, + entries, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, + xdata); + return 0; +} + +int32_t +glupy_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_READDIRP]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))( + frame, this, fd, size, offset, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp,fd, size, offset, xdata); + return 0; +} + +void +wind_readdirp (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_readdirp_cbk,xl,xl->fops->readdirp,fd,size,offset,xdata); +} + +void +unwind_readdirp (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(readdirp,frame,op_ret,op_errno, + entries, xdata); +} + +void +set_readdirp_fop (long py_this, fop_readdirp_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_READDIRP] = (long)fop; +} + +void +set_readdirp_cbk (long py_this, fop_readdirp_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_READDIRP] = (long)cbk; +} + + +/* FOP:STAT */ + +int32_t +glupy_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_STAT]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))( + frame, cookie, this, op_ret, op_errno, + buf, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +glupy_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_STAT]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))( + frame, this, loc, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +void +wind_stat (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_stat_cbk,xl,xl->fops->stat,loc,xdata); +} + +void +unwind_stat (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(stat,frame,op_ret,op_errno, + buf,xdata); +} + +void +set_stat_fop (long py_this, fop_stat_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_STAT] = (long)fop; +} + +void +set_stat_cbk (long py_this, fop_stat_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_STAT] = (long)cbk; +} + + +/* FOP: FSTAT */ + +int32_t +glupy_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_FSTAT]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))( + frame, cookie, this, op_ret, op_errno, + buf, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +glupy_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_FSTAT]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))( + frame, this, fd, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; +} + +void +wind_fstat (call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_fstat_cbk,xl,xl->fops->fstat,fd,xdata); +} + +void +unwind_fstat (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(fstat,frame,op_ret,op_errno, + buf,xdata); +} + +void +set_fstat_fop (long py_this, fop_fstat_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_FSTAT] = (long)fop; +} + +void +set_fstat_cbk (long py_this, fop_fstat_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_FSTAT] = (long)cbk; +} + +/* FOP:STATFS */ + +int32_t +glupy_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_STATFS]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))( + frame, cookie, this, op_ret, op_errno, + buf, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +glupy_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_STATFS]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))( + frame, this, loc, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + return 0; +} + +void +wind_statfs (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND(frame,glupy_statfs_cbk,xl,xl->fops->statfs,loc,xdata); +} + +void +unwind_statfs (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT(statfs,frame,op_ret,op_errno, + buf,xdata); +} + +void +set_statfs_fop (long py_this, fop_statfs_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_STATFS] = (long)fop; +} + +void +set_statfs_cbk (long py_this, fop_statfs_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_STATFS] = (long)cbk; +} + + +/* FOP: SETXATTR */ + +int32_t +glupy_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_SETXATTR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))( + frame, cookie, this, op_ret, op_errno, + xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +glupy_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_SETXATTR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))( + frame, this, loc, dict, flags, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, + flags, xdata); + return 0; +} + +void +wind_setxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_setxattr_cbk, xl, xl->fops->setxattr, + loc, dict, flags, xdata); +} + + +void +unwind_setxattr (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + +} + +void +set_setxattr_fop (long py_this, fop_setxattr_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_SETXATTR] = (long)fop; +} + +void +set_setxattr_cbk (long py_this, fop_setxattr_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_SETXATTR] = (long)cbk; +} + +/* FOP: GETXATTR */ + +int32_t +glupy_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_GETXATTR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))( + frame, cookie, this, op_ret, op_errno, dict, + xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, + xdata); + return 0; +} + +int32_t +glupy_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_GETXATTR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))( + frame, this, loc, name, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, + xdata); + return 0; +} + +void +wind_getxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc, + const char *name, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_getxattr_cbk, xl, xl->fops->getxattr, + loc, name, xdata); +} + + +void +unwind_getxattr (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, + xdata); + +} + + +void +set_getxattr_fop (long py_this, fop_getxattr_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_GETXATTR] = (long)fop; +} + + +void +set_getxattr_cbk (long py_this, fop_getxattr_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_GETXATTR] = (long)cbk; +} + +/* FOP: FSETXATTR */ + +int32_t +glupy_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_FSETXATTR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))( + frame, cookie, this, op_ret, op_errno, + xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +glupy_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_FSETXATTR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))( + frame, this, fd, dict, flags, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, + flags, xdata); + return 0; +} + +void +wind_fsetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr, + fd, dict, flags, xdata); +} + + +void +unwind_fsetxattr (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + +} + +void +set_fsetxattr_fop (long py_this, fop_fsetxattr_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_FSETXATTR] = (long)fop; +} + +void +set_fsetxattr_cbk (long py_this, fop_fsetxattr_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_FSETXATTR] = (long)cbk; +} + +/* FOP: FGETXATTR */ + +int32_t +glupy_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_FGETXATTR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))( + frame, cookie, this, op_ret, op_errno, dict, + xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, + xdata); + return 0; +} + +int32_t +glupy_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_FGETXATTR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))( + frame, this, fd, name, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, + xdata); + return 0; +} + +void +wind_fgetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd, + const char *name, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr, + fd, name, xdata); +} + + +void +unwind_fgetxattr (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, + xdata); + +} + + +void +set_fgetxattr_fop (long py_this, fop_fgetxattr_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_FGETXATTR] = (long)fop; +} + + +void +set_fgetxattr_cbk (long py_this, fop_fgetxattr_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_FGETXATTR] = (long)cbk; +} + +/* FOP:REMOVEXATTR */ + +int32_t +glupy_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_REMOVEXATTR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))( + frame, cookie, this, op_ret, op_errno, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +glupy_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_REMOVEXATTR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))( + frame, this, loc, name, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + return 0; +} + +void +wind_removexattr (call_frame_t *frame, xlator_t *xl, loc_t *loc, + const char *name, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_removexattr_cbk, xl, xl->fops->removexattr, + loc, name, xdata); +} + + +void +unwind_removexattr (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); + +} + +void +set_removexattr_fop (long py_this, fop_removexattr_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_REMOVEXATTR] = (long)fop; +} + +void +set_removexattr_cbk (long py_this, fop_removexattr_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk; +} + + +/* FOP:FREMOVEXATTR */ + +int32_t +glupy_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_FREMOVEXATTR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))( + frame, cookie, this, op_ret, op_errno, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +glupy_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_FREMOVEXATTR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))( + frame, this, fd, name, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; +} + +void +wind_fremovexattr (call_frame_t *frame, xlator_t *xl, fd_t *fd, + const char *name, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr, + fd, name, xdata); +} + + +void +unwind_fremovexattr (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); + +} + +void +set_fremovexattr_fop (long py_this, fop_fremovexattr_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_FREMOVEXATTR] = (long)fop; +} + +void +set_fremovexattr_cbk (long py_this, fop_fremovexattr_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk; +} + + +/* FOP: LINK*/ +int32_t +glupy_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_LINK]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))( + frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_LINK]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))( + frame, this, oldloc, newloc, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, + xdata); + return 0; +} + +void +wind_link (call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_link_cbk, xl, xl->fops->link, + oldloc, newloc, xdata); +} + +void +unwind_link (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); +} + +void +set_link_fop (long py_this, fop_link_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_LINK] = (long)fop; +} + +void +set_link_cbk (long py_this, fop_link_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_LINK] = (long)cbk; +} + +/* FOP: SYMLINK*/ +int32_t +glupy_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_SYMLINK]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))( + frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_SYMLINK]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))( + frame, this, linkname, loc, umask, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, + umask, xdata); + return 0; +} + +void +wind_symlink (call_frame_t *frame, xlator_t *xl, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_symlink_cbk, xl, xl->fops->symlink, + linkname, loc, umask, xdata); +} + +void +unwind_symlink (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); +} + +void +set_symlink_fop (long py_this, fop_symlink_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_SYMLINK] = (long)fop; +} + +void +set_symlink_cbk (long py_this, fop_symlink_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_SYMLINK] = (long)cbk; +} + + +/* FOP: READLINK */ +int32_t +glupy_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_READLINK]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))( + frame, cookie, this, op_ret, op_errno, + path, buf, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, + buf, xdata); + return 0; +} + +int32_t +glupy_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, + size_t size, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_READLINK]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))( + frame, this, loc, size, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, + size, xdata); + return 0; +} + +void +wind_readlink (call_frame_t *frame, xlator_t *xl, loc_t *loc, + size_t size, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_readlink_cbk, xl, xl->fops->readlink, + loc, size, xdata); +} + +void +unwind_readlink (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, + xdata); +} + +void +set_readlink_fop (long py_this, fop_readlink_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_READLINK] = (long)fop; +} + +void +set_readlink_cbk (long py_this, fop_readlink_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_READLINK] = (long)cbk; +} + + +/* FOP: UNLINK */ + +int32_t +glupy_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_UNLINK]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))( + frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; +} + +int32_t +glupy_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflags, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_UNLINK]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))( + frame, this, loc, xflags, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, + xflags, xdata); + return 0; +} + +void +wind_unlink (call_frame_t *frame, xlator_t *xl, loc_t *loc, + int xflags, dict_t *xdata) +{ + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_unlink_cbk, xl, xl->fops->unlink, + loc, xflags, xdata); +} + +void +unwind_unlink (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, + preparent, postparent, xdata); +} + +void +set_unlink_fop (long py_this, fop_unlink_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_UNLINK] = (long)fop; +} + +void +set_unlink_cbk (long py_this, fop_unlink_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_UNLINK] = (long)cbk; +} + + +/* FOP: MKDIR */ + +int32_t +glupy_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_MKDIR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))( + frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +glupy_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_MKDIR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))( + frame, this, loc, mode, umask, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, + xdata); + return 0; +} + +void +wind_mkdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_mkdir_cbk, xl, xl->fops->mkdir, + loc, mode, umask, xdata); +} + +void +unwind_mkdir (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); +} + +void +set_mkdir_fop (long py_this, fop_mkdir_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_MKDIR] = (long)fop; +} + +void +set_mkdir_cbk (long py_this, fop_mkdir_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_MKDIR] = (long)cbk; +} + + +/* FOP: RMDIR */ + +int32_t +glupy_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + + if (!priv->cbks[GLUPY_RMDIR]) { + goto unwind; + } + + gstate = glupy_enter(); + ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))( + frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata); + glupy_leave(gstate); + + return ret; + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; +} + +int32_t +glupy_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflags, dict_t *xdata) +{ + glupy_private_t *priv = this->private; + PyGILState_STATE gstate; + int32_t ret; + static long next_id = 0; + + if (!priv->fops[GLUPY_RMDIR]) { + goto wind; + } + + gstate = glupy_enter(); + frame->local = (void *)++next_id; + ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))( + frame, this, loc, xflags, xdata); + glupy_leave(gstate); + + return ret; + +wind: + STACK_WIND (frame, glupy_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, + xflags, xdata); + return 0; +} + +void +wind_rmdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, + int xflags, dict_t *xdata) +{ + + xlator_t *this = THIS; + + if (!xl || (xl == this)) { + xl = FIRST_CHILD(this); + } + + STACK_WIND (frame, glupy_rmdir_cbk, xl, xl->fops->rmdir, + loc, xflags, xdata); +} + +void +unwind_rmdir (call_frame_t *frame, long cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + frame->local = NULL; + STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, + preparent, postparent, xdata); +} + +void +set_rmdir_fop (long py_this, fop_rmdir_t fop) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->fops[GLUPY_RMDIR] = (long)fop; +} + +void +set_rmdir_cbk (long py_this, fop_rmdir_cbk_t cbk) +{ + glupy_private_t *priv = ((xlator_t *)py_this)->private; + + priv->cbks[GLUPY_RMDIR] = (long)cbk; +} + + +/* NON-FOP-SPECIFIC CODE */ + + +long +get_id (call_frame_t *frame) +{ + return (long)(frame->local); +} + +uint64_t +get_rootunique (call_frame_t *frame) +{ + return frame->root->unique; +} + +int32_t +init (xlator_t *this) +{ + glupy_private_t *priv = NULL; + char *module_name = NULL; + PyObject *py_mod_name = NULL; + PyObject *py_init_func = NULL; + PyObject *py_args = NULL; + PyObject *syspath = NULL; + PyObject *path = NULL; + static gf_boolean_t py_inited = _gf_false; + void * err_cleanup = &&err_return; + + if (dict_get_str(this->options,"module-name",&module_name) != 0) { + gf_log (this->name, GF_LOG_ERROR, "missing module-name"); + return -1; + } + + priv = GF_CALLOC (1, sizeof (glupy_private_t), gf_glupy_mt_priv); + if (!priv) { + goto *err_cleanup; + } + this->private = priv; + err_cleanup = &&err_free_priv; + + if (!py_inited) { + Py_Initialize(); + PyEval_InitThreads(); +#if 0 + (void)pthread_key_create(&gil_init_key,NULL); + (void)pthread_setspecific(gil_init_key,(void *)1); +#endif + /* PyEval_InitThreads takes this "for" us. No thanks. */ + PyEval_ReleaseLock(); + py_inited = _gf_true; + } + + /* Adjust python's path */ + syspath = PySys_GetObject("path"); + path = PyString_FromString(GLUSTER_PYTHON_PATH); + PyList_Append(syspath, path); + Py_DECREF(path); + + py_mod_name = PyString_FromString(module_name); + if (!py_mod_name) { + gf_log (this->name, GF_LOG_ERROR, "could not create name"); + if (PyErr_Occurred()) { + PyErr_Print(); + } + goto *err_cleanup; + } + + gf_log (this->name, GF_LOG_ERROR, "py_mod_name = %s", module_name); + priv->py_module = PyImport_Import(py_mod_name); + Py_DECREF(py_mod_name); + if (!priv->py_module) { + gf_log (this->name, GF_LOG_ERROR, "Python import failed"); + if (PyErr_Occurred()) { + PyErr_Print(); + } + goto *err_cleanup; + } + err_cleanup = &&err_deref_module; + + py_init_func = PyObject_GetAttrString(priv->py_module, "xlator"); + if (!py_init_func || !PyCallable_Check(py_init_func)) { + gf_log (this->name, GF_LOG_ERROR, "missing init func"); + if (PyErr_Occurred()) { + PyErr_Print(); + } + goto *err_cleanup; + } + err_cleanup = &&err_deref_init; + + py_args = PyTuple_New(1); + if (!py_args) { + gf_log (this->name, GF_LOG_ERROR, "could not create args"); + if (PyErr_Occurred()) { + PyErr_Print(); + } + goto *err_cleanup; + } + PyTuple_SetItem(py_args,0,PyLong_FromLong((long)this)); + + /* TBD: pass in list of children */ + priv->py_xlator = PyObject_CallObject(py_init_func, py_args); + Py_DECREF(py_args); + if (!priv->py_xlator) { + gf_log (this->name, GF_LOG_ERROR, "Python init failed"); + if (PyErr_Occurred()) { + PyErr_Print(); + } + goto *err_cleanup; + } + gf_log (this->name, GF_LOG_INFO, "init returned %p", priv->py_xlator); + + return 0; + +err_deref_init: + Py_DECREF(py_init_func); +err_deref_module: + Py_DECREF(priv->py_module); +err_free_priv: + GF_FREE(priv); +err_return: + return -1; +} + +void +fini (xlator_t *this) +{ + glupy_private_t *priv = this->private; + + if (!priv) + return; + Py_DECREF(priv->py_xlator); + Py_DECREF(priv->py_module); + this->private = NULL; + GF_FREE (priv); + + return; +} + +struct xlator_fops fops = { + .lookup = glupy_lookup, + .create = glupy_create, + .open = glupy_open, + .readv = glupy_readv, + .writev = glupy_writev, + .opendir = glupy_opendir, + .readdir = glupy_readdir, + .stat = glupy_stat, + .fstat = glupy_fstat, + .setxattr = glupy_setxattr, + .getxattr = glupy_getxattr, + .fsetxattr = glupy_fsetxattr, + .fgetxattr = glupy_fgetxattr, + .removexattr = glupy_removexattr, + .fremovexattr = glupy_fremovexattr, + .link = glupy_link, + .unlink = glupy_unlink, + .readlink = glupy_readlink, + .symlink = glupy_symlink, + .mkdir = glupy_mkdir, + .rmdir = glupy_rmdir, + .statfs = glupy_statfs, + .readdirp = glupy_readdirp +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h new file mode 100644 index 000000000..8661fce88 --- /dev/null +++ b/xlators/features/glupy/src/glupy.h @@ -0,0 +1,69 @@ +/* + Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __GLUPY_H__ +#define __GLUPY_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include "mem-types.h" + +enum { + GLUPY_LOOKUP = 0, + GLUPY_CREATE, + GLUPY_OPEN, + GLUPY_READV, + GLUPY_WRITEV, + GLUPY_OPENDIR, + GLUPY_READDIR, + GLUPY_READDIRP, + GLUPY_STAT, + GLUPY_FSTAT, + GLUPY_STATFS, + GLUPY_SETXATTR, + GLUPY_GETXATTR, + GLUPY_FSETXATTR, + GLUPY_FGETXATTR, + GLUPY_REMOVEXATTR, + GLUPY_FREMOVEXATTR, + GLUPY_LINK, + GLUPY_UNLINK, + GLUPY_READLINK, + GLUPY_SYMLINK, + GLUPY_MKNOD, + GLUPY_MKDIR, + GLUPY_RMDIR, + GLUPY_N_FUNCS +}; + +typedef struct { + PyObject *py_module; + PyObject *py_xlator; + long fops[GLUPY_N_FUNCS]; + long cbks[GLUPY_N_FUNCS]; +} glupy_private_t; + +enum gf_glupy_mem_types_ { + gf_glupy_mt_priv = gf_common_mt_end + 1, + gf_glupy_mt_end +}; + +#endif /* __GLUPY_H__ */ diff --git a/xlators/features/glupy/src/gluster.py b/xlators/features/glupy/src/gluster.py new file mode 100644 index 000000000..a5daa77d3 --- /dev/null +++ b/xlators/features/glupy/src/gluster.py @@ -0,0 +1,841 @@ +import sys +from ctypes import * + +dl = CDLL("",RTLD_GLOBAL) + + +class call_frame_t (Structure): + pass + +class dev_t (Structure): + pass + + +class dict_t (Structure): + pass + + +class gf_dirent_t (Structure): + pass + + +class iobref_t (Structure): + pass + + +class iovec_t (Structure): + pass + + +class list_head (Structure): + pass + +list_head._fields_ = [ + ("next", POINTER(list_head)), + ("prev", POINTER(list_head)) + ] + + +class rwxperm_t (Structure): + _fields_ = [ + ("read", c_uint8, 1), + ("write", c_uint8, 1), + ("execn", c_uint8, 1) + ] + + +class statvfs_t (Structure): + pass + + +class xlator_t (Structure): + pass + + +class ia_prot_t (Structure): + _fields_ = [ + ("suid", c_uint8, 1), + ("sgid", c_uint8, 1), + ("sticky", c_uint8, 1), + ("owner", rwxperm_t), + ("group", rwxperm_t), + ("other", rwxperm_t) + ] + +# For checking file type. +(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO, + IA_IFSOCK) = xrange(8) + + +class iatt_t (Structure): + _fields_ = [ + ("ia_no", c_uint64), + ("ia_gfid", c_ubyte * 16), + ("ia_dev", c_uint64), + ("ia_type", c_uint), + ("ia_prot", ia_prot_t), + ("ia_nlink", c_uint32), + ("ia_uid", c_uint32), + ("ia_gid", c_uint32), + ("ia_rdev", c_uint64), + ("ia_size", c_uint64), + ("ia_blksize", c_uint32), + ("ia_blocks", c_uint64), + ("ia_atime", c_uint32 ), + ("ia_atime_nsec", c_uint32), + ("ia_mtime", c_uint32), + ("ia_mtime_nsec", c_uint32), + ("ia_ctime", c_uint32), + ("ia_ctime_nsec", c_uint32) + ] + + +class mem_pool (Structure): + _fields_ = [ + ("list", list_head), + ("hot_count", c_int), + ("cold_count", c_int), + ("lock", c_void_p), + ("padded_sizeof_type", c_ulong), + ("pool", c_void_p), + ("pool_end", c_void_p), + ("real_sizeof_type", c_int), + ("alloc_count", c_uint64), + ("pool_misses", c_uint64), + ("max_alloc", c_int), + ("curr_stdalloc", c_int), + ("max_stdalloc", c_int), + ("name", c_char_p), + ("global_list", list_head) + ] + + +class U_ctx_key_inode (Union): + _fields_ = [ + ("key", c_uint64), + ("xl_key", POINTER(xlator_t)) + ] + + +class U_ctx_value1 (Union): + _fields_ = [ + ("value1", c_uint64), + ("ptr1", c_void_p) + ] + + +class U_ctx_value2 (Union): + _fields_ = [ + ("value2", c_uint64), + ("ptr2", c_void_p) + ] + +class inode_ctx (Structure): + _anonymous_ = ("u_key","u_value1","u_value2",) + _fields_ = [ + ("u_key", U_ctx_key_inode), + ("u_value1", U_ctx_value1), + ("u_value2", U_ctx_value2) + ] + +class inode_t (Structure): + pass + +class inode_table_t (Structure): + _fields_ = [ + ("lock", c_void_p), + ("hashsize", c_size_t), + ("name", c_char_p), + ("root", POINTER(inode_t)), + ("xl", POINTER(xlator_t)), + ("lru_limit", c_uint32), + ("inode_hash", POINTER(list_head)), + ("name_hash", POINTER(list_head)), + ("active", list_head), + ("active_size", c_uint32), + ("lru", list_head), + ("lru_size", c_uint32), + ("purge", list_head), + ("purge_size", c_uint32), + ("inode_pool", POINTER(mem_pool)), + ("dentry_pool", POINTER(mem_pool)), + ("fd_mem_pool", POINTER(mem_pool)) + ] + +inode_t._fields_ = [ + ("table", POINTER(inode_table_t)), + ("gfid", c_ubyte * 16), + ("lock", c_void_p), + ("nlookup", c_uint64), + ("fd_count", c_uint32), + ("ref", c_uint32), + ("ia_type", c_uint), + ("fd_list", list_head), + ("dentry_list", list_head), + ("hashv", list_head), + ("listv", list_head), + ("ctx", POINTER(inode_ctx)) + ] + + + +class U_ctx_key_fd (Union): + _fields_ = [ + ("key", c_uint64), + ("xl_key", c_void_p) + ] + +class fd_lk_ctx (Structure): + _fields_ = [ + ("lk_list", list_head), + ("ref", c_int), + ("lock", c_void_p) + ] + +class fd_ctx (Structure): + _anonymous_ = ("u_key","u_value1") + _fields_ = [ + ("u_key", U_ctx_key_fd), + ("u_value1", U_ctx_value1) + ] + +class fd_t (Structure): + _fields_ = [ + ("pid", c_uint64), + ("flags", c_int32), + ("refcount", c_int32), + ("inode_list", list_head), + ("inode", POINTER(inode_t)), + ("lock", c_void_p), + ("ctx", POINTER(fd_ctx)), + ("xl_count", c_int), + ("lk_ctx", POINTER(fd_lk_ctx)), + ("anonymous", c_uint) + ] + +class loc_t (Structure): + _fields_ = [ + ("path", c_char_p), + ("name", c_char_p), + ("inode", POINTER(inode_t)), + ("parent", POINTER(inode_t)), + ("gfid", c_ubyte * 16), + ("pargfid", c_ubyte * 16), + ] + + + +def _init_op (a_class, fop, cbk, wind, unwind): + # Decorators, used by translators. We could pass the signatures as + # parameters, but it's actually kind of nice to keep them around for + # inspection. + a_class.fop_type = apply(CFUNCTYPE,a_class.fop_sig) + a_class.cbk_type = apply(CFUNCTYPE,a_class.cbk_sig) + # Dispatch-function registration. + fop.restype = None + fop.argtypes = [ c_long, a_class.fop_type ] + # Callback-function registration. + cbk.restype = None + cbk.argtypes = [ c_long, a_class.cbk_type ] + # STACK_WIND function. + wind.restype = None + wind.argtypes = list(a_class.fop_sig[1:]) + # STACK_UNWIND function. + unwind.restype = None + unwind.argtypes = list(a_class.cbk_sig[1:]) + +class OpLookup: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(inode_t), POINTER(iatt_t), + POINTER(dict_t), POINTER(iatt_t)) +_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk, + dl.wind_lookup, dl.unwind_lookup) + +class OpCreate: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t), + POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(fd_t), POINTER(inode_t), + POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t), + POINTER(dict_t)) +_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk, + dl.wind_create, dl.unwind_create) + +class OpOpen: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(fd_t), POINTER(dict_t)) +_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk, + dl.wind_open, dl.unwind_open) + +class OpReadv: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t), + POINTER(iobref_t), POINTER(dict_t)) +_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk, + dl.wind_readv, dl.unwind_readv) +class OpWritev: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32, + POINTER(iobref_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(iatt_t), POINTER(iatt_t), + POINTER(dict_t)) +_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk, + dl.wind_writev, dl.unwind_writev) + +class OpOpendir: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), POINTER(fd_t) ,POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(fd_t), POINTER(dict_t)) +_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk, + dl.wind_opendir, dl.unwind_opendir) + +class OpReaddir: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), c_size_t, c_long, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t)) +_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk, + dl.wind_readdir, dl.unwind_readdir) + +class OpReaddirp: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), c_size_t, c_long, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t)) +_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk, + dl.wind_readdirp, dl.unwind_readdirp) + +class OpStat: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(iatt_t), POINTER(dict_t)) +_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk, + dl.wind_stat, dl.unwind_stat) + +class OpFstat: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(iatt_t), POINTER(dict_t)) +_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk, + dl.wind_fstat, dl.unwind_fstat) + +class OpStatfs: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(statvfs_t), POINTER(dict_t)) +_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk, + dl.wind_statfs, dl.unwind_statfs) + + +class OpSetxattr: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), POINTER(dict_t), c_int32, + POINTER (dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(dict_t)) +_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk, + dl.wind_setxattr, dl.unwind_setxattr) + +class OpGetxattr: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_char_p, POINTER (dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(dict_t), POINTER(dict_t)) +_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk, + dl.wind_getxattr, dl.unwind_getxattr) + +class OpFsetxattr: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), POINTER(dict_t), c_int32, + POINTER (dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(dict_t)) +_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk, + dl.wind_fsetxattr, dl.unwind_fsetxattr) + +class OpFgetxattr: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), c_char_p, POINTER (dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(dict_t), POINTER(dict_t)) +_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk, + dl.wind_fgetxattr, dl.unwind_fgetxattr) + +class OpRemovexattr: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_char_p, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(dict_t)) +_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk, + dl.wind_removexattr, dl.unwind_removexattr) + + +class OpFremovexattr: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), c_char_p, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(dict_t)) +_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk, + dl.wind_fremovexattr, dl.unwind_fremovexattr) + +class OpLink: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), POINTER(loc_t), POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(inode_t), POINTER(iatt_t), + POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t)) +_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk, + dl.wind_link, dl.unwind_link) + +class OpSymlink: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(inode_t), POINTER(iatt_t), + POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t)) +_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk, + dl.wind_symlink, dl.unwind_symlink) + +class OpUnlink: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_int, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(iatt_t), POINTER(iatt_t), + POINTER(dict_t)) +_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk, + dl.wind_unlink, dl.unwind_unlink) + +class OpReadlink: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_size_t, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t)) +_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk, + dl.wind_readlink, dl.unwind_readlink) + +class OpMkdir: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_uint, c_uint, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(inode_t), POINTER(iatt_t), + POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t)) +_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk, + dl.wind_mkdir, dl.unwind_mkdir) + +class OpRmdir: + fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(loc_t), c_int, POINTER(dict_t)) + cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), + c_int, c_int, POINTER(iatt_t), POINTER(iatt_t), + POINTER(dict_t)) +_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk, + dl.wind_rmdir, dl.unwind_rmdir) + + +class Translator: + def __init__ (self, c_this): + # This is only here to keep references to the stubs we create, + # because ctypes doesn't and glupy.so can't because it doesn't + # get a pointer to the actual Python object. It's a dictionary + # instead of a list in case we ever allow changing fops/cbks + # after initialization and need to look them up. + self.stub_refs = {} + funcs = dir(self.__class__) + if "lookup_fop" in funcs: + @OpLookup.fop_type + def stub (frame, this, loc, xdata, s=self): + return s.lookup_fop (frame, this, loc, xdata) + self.stub_refs["lookup_fop"] = stub + dl.set_lookup_fop(c_this,stub) + if "lookup_cbk" in funcs: + @OpLookup.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, inode, + buf, xdata, postparent, s=self): + return s.lookup_cbk(frame, cookie, this, op_ret, + op_errno, inode, buf, xdata, + postparent) + self.stub_refs["lookup_cbk"] = stub + dl.set_lookup_cbk(c_this,stub) + if "create_fop" in funcs: + @OpCreate.fop_type + def stub (frame, this, loc, flags, mode, umask, fd, + xdata, s=self): + return s.create_fop (frame, this, loc, flags, + mode, umask, fd, xdata) + self.stub_refs["create_fop"] = stub + dl.set_create_fop(c_this,stub) + if "create_cbk" in funcs: + @OpCreate.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, fd, + inode, buf, preparent, postparent, xdata, + s=self): + return s.create_cbk (frame, cookie, this, + op_ret, op_errno, fd, + inode, buf, preparent, + postparent, xdata) + self.stub_refs["create_cbk"] = stub + dl.set_create_cbk(c_this,stub) + if "open_fop" in funcs: + @OpOpen.fop_type + def stub (frame, this, loc, flags, fd, + xdata, s=self): + return s.open_fop (frame, this, loc, flags, + fd, xdata) + self.stub_refs["open_fop"] = stub + dl.set_open_fop(c_this,stub) + if "open_cbk" in funcs: + @OpOpen.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, fd, + xdata, s=self): + return s.open_cbk (frame, cookie, this, + op_ret, op_errno, fd, + xdata) + self.stub_refs["open_cbk"] = stub + dl.set_open_cbk(c_this,stub) + if "readv_fop" in funcs: + @OpReadv.fop_type + def stub (frame, this, fd, size, offset, flags, + xdata, s=self): + return s.readv_fop (frame, this, fd, size, + offset, flags, xdata) + self.stub_refs["readv_fop"] = stub + dl.set_readv_fop(c_this,stub) + if "readv_cbk" in funcs: + @OpReadv.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + vector, count, stbuf, iobref, xdata, + s=self): + return s.readv_cbk (frame, cookie, this, + op_ret, op_errno, vector, + count, stbuf, iobref, + xdata) + self.stub_refs["readv_cbk"] = stub + dl.set_readv_cbk(c_this,stub) + if "writev_fop" in funcs: + @OpWritev.fop_type + def stub (frame, this, fd, vector, count, + offset, flags, iobref, xdata, s=self): + return s.writev_fop (frame, this, fd, vector, + count, offset, flags, + iobref, xdata) + self.stub_refs["writev_fop"] = stub + dl.set_writev_fop(c_this,stub) + if "writev_cbk" in funcs: + @OpWritev.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + prebuf, postbuf, xdata, s=self): + return s.writev_cbk (frame, cookie, this, + op_ret, op_errno, prebuf, + postbuf, xdata) + self.stub_refs["writev_cbk"] = stub + dl.set_writev_cbk(c_this,stub) + if "opendir_fop" in funcs: + @OpOpendir.fop_type + def stub (frame, this, loc, fd, xdata, s=self): + return s.opendir_fop (frame, this, loc, fd, + xdata) + self.stub_refs["opendir_fop"] = stub + dl.set_opendir_fop(c_this,stub) + if "opendir_cbk" in funcs: + @OpOpendir.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, fd, + xdata, s=self): + return s.opendir_cbk(frame, cookie, this, + op_ret, op_errno, fd, + xdata) + self.stub_refs["opendir_cbk"] = stub + dl.set_opendir_cbk(c_this,stub) + if "readdir_fop" in funcs: + @OpReaddir.fop_type + def stub (frame, this, fd, size, offset, xdata, s=self): + return s.readdir_fop (frame, this, fd, size, + offset, xdata) + self.stub_refs["readdir_fop"] = stub + dl.set_readdir_fop(c_this,stub) + if "readdir_cbk" in funcs: + @OpReaddir.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + entries, xdata, s=self): + return s.readdir_cbk(frame, cookie, this, + op_ret, op_errno, entries, + xdata) + self.stub_refs["readdir_cbk"] = stub + dl.set_readdir_cbk(c_this,stub) + if "readdirp_fop" in funcs: + @OpReaddirp.fop_type + def stub (frame, this, fd, size, offset, xdata, s=self): + return s.readdirp_fop (frame, this, fd, size, + offset, xdata) + self.stub_refs["readdirp_fop"] = stub + dl.set_readdirp_fop(c_this,stub) + if "readdirp_cbk" in funcs: + @OpReaddirp.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + entries, xdata, s=self): + return s.readdirp_cbk (frame, cookie, this, + op_ret, op_errno, + entries, xdata) + self.stub_refs["readdirp_cbk"] = stub + dl.set_readdirp_cbk(c_this,stub) + if "stat_fop" in funcs: + @OpStat.fop_type + def stub (frame, this, loc, xdata, s=self): + return s.stat_fop (frame, this, loc, xdata) + self.stub_refs["stat_fop"] = stub + dl.set_stat_fop(c_this,stub) + if "stat_cbk" in funcs: + @OpStat.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, buf, + xdata, s=self): + return s.stat_cbk(frame, cookie, this, op_ret, + op_errno, buf, xdata) + self.stub_refs["stat_cbk"] = stub + dl.set_stat_cbk(c_this,stub) + if "fstat_fop" in funcs: + @OpFstat.fop_type + def stub (frame, this, fd, xdata, s=self): + return s.fstat_fop (frame, this, fd, xdata) + self.stub_refs["fstat_fop"] = stub + dl.set_fstat_fop(c_this,stub) + if "fstat_cbk" in funcs: + @OpFstat.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, buf, + xdata, s=self): + return s.fstat_cbk(frame, cookie, this, op_ret, + op_errno, buf, xdata) + self.stub_refs["fstat_cbk"] = stub + dl.set_fstat_cbk(c_this,stub) + if "statfs_fop" in funcs: + @OpStatfs.fop_type + def stub (frame, this, loc, xdata, s=self): + return s.statfs_fop (frame, this, loc, xdata) + self.stub_refs["statfs_fop"] = stub + dl.set_statfs_fop(c_this,stub) + if "statfs_cbk" in funcs: + @OpStatfs.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, buf, + xdata, s=self): + return s.statfs_cbk (frame, cookie, this, + op_ret, op_errno, buf, + xdata) + self.stub_refs["statfs_cbk"] = stub + dl.set_statfs_cbk(c_this,stub) + if "setxattr_fop" in funcs: + @OpSetxattr.fop_type + def stub (frame, this, loc, dictionary, flags, xdata, + s=self): + return s.setxattr_fop (frame, this, loc, + dictionary, flags, + xdata) + self.stub_refs["setxattr_fop"] = stub + dl.set_setxattr_fop(c_this,stub) + if "setxattr_cbk" in funcs: + @OpSetxattr.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, xdata, + s=self): + return s.setxattr_cbk(frame, cookie, this, + op_ret, op_errno, xdata) + self.stub_refs["setxattr_cbk"] = stub + dl.set_setxattr_cbk(c_this,stub) + if "getxattr_fop" in funcs: + @OpGetxattr.fop_type + def stub (frame, this, loc, name, xdata, s=self): + return s.getxattr_fop (frame, this, loc, name, + xdata) + self.stub_refs["getxattr_fop"] = stub + dl.set_getxattr_fop(c_this,stub) + if "getxattr_cbk" in funcs: + @OpGetxattr.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + dictionary, xdata, s=self): + return s.getxattr_cbk(frame, cookie, this, + op_ret, op_errno, + dictionary, xdata) + self.stub_refs["getxattr_cbk"] = stub + dl.set_getxattr_cbk(c_this,stub) + if "fsetxattr_fop" in funcs: + @OpFsetxattr.fop_type + def stub (frame, this, fd, dictionary, flags, xdata, + s=self): + return s.fsetxattr_fop (frame, this, fd, + dictionary, flags, + xdata) + self.stub_refs["fsetxattr_fop"] = stub + dl.set_fsetxattr_fop(c_this,stub) + if "fsetxattr_cbk" in funcs: + @OpFsetxattr.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, xdata, + s=self): + return s.fsetxattr_cbk(frame, cookie, this, + op_ret, op_errno, xdata) + self.stub_refs["fsetxattr_cbk"] = stub + dl.set_fsetxattr_cbk(c_this,stub) + if "fgetxattr_fop" in funcs: + @OpFgetxattr.fop_type + def stub (frame, this, fd, name, xdata, s=self): + return s.fgetxattr_fop (frame, this, fd, name, + xdata) + self.stub_refs["fgetxattr_fop"] = stub + dl.set_fgetxattr_fop(c_this,stub) + if "fgetxattr_cbk" in funcs: + @OpFgetxattr.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + dictionary, xdata, s=self): + return s.fgetxattr_cbk(frame, cookie, this, + op_ret, op_errno, + dictionary, xdata) + self.stub_refs["fgetxattr_cbk"] = stub + dl.set_fgetxattr_cbk(c_this,stub) + if "removexattr_fop" in funcs: + @OpRemovexattr.fop_type + def stub (frame, this, loc, name, xdata, s=self): + return s.removexattr_fop (frame, this, loc, + name, xdata) + self.stub_refs["removexattr_fop"] = stub + dl.set_removexattr_fop(c_this,stub) + if "removexattr_cbk" in funcs: + @OpRemovexattr.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + xdata, s=self): + return s.removexattr_cbk(frame, cookie, this, + op_ret, op_errno, + xdata) + self.stub_refs["removexattr_cbk"] = stub + dl.set_removexattr_cbk(c_this,stub) + if "fremovexattr_fop" in funcs: + @OpFremovexattr.fop_type + def stub (frame, this, fd, name, xdata, s=self): + return s.fremovexattr_fop (frame, this, fd, + name, xdata) + self.stub_refs["fremovexattr_fop"] = stub + dl.set_fremovexattr_fop(c_this,stub) + if "fremovexattr_cbk" in funcs: + @OpFremovexattr.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + xdata, s=self): + return s.fremovexattr_cbk(frame, cookie, this, + op_ret, op_errno, + xdata) + self.stub_refs["fremovexattr_cbk"] = stub + dl.set_fremovexattr_cbk(c_this,stub) + if "link_fop" in funcs: + @OpLink.fop_type + def stub (frame, this, oldloc, newloc, + xdata, s=self): + return s.link_fop (frame, this, oldloc, + newloc, xdata) + self.stub_refs["link_fop"] = stub + dl.set_link_fop(c_this,stub) + if "link_cbk" in funcs: + @OpLink.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata, + s=self): + return s.link_cbk (frame, cookie, this, + op_ret, op_errno, inode, + buf, preparent, + postparent, xdata) + self.stub_refs["link_cbk"] = stub + dl.set_link_cbk(c_this,stub) + if "symlink_fop" in funcs: + @OpSymlink.fop_type + def stub (frame, this, linkname, loc, + umask, xdata, s=self): + return s.symlink_fop (frame, this, linkname, + loc, umask, xdata) + self.stub_refs["symlink_fop"] = stub + dl.set_symlink_fop(c_this,stub) + if "symlink_cbk" in funcs: + @OpSymlink.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + inode, buf, preparent, postparent, xdata, + s=self): + return s.symlink_cbk (frame, cookie, this, + op_ret, op_errno, inode, + buf, preparent, + postparent, xdata) + self.stub_refs["symlink_cbk"] = stub + dl.set_symlink_cbk(c_this,stub) + if "unlink_fop" in funcs: + @OpUnlink.fop_type + def stub (frame, this, loc, xflags, + xdata, s=self): + return s.unlink_fop (frame, this, loc, + xflags, xdata) + self.stub_refs["unlink_fop"] = stub + dl.set_unlink_fop(c_this,stub) + if "unlink_cbk" in funcs: + @OpUnlink.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata, s=self): + return s.unlink_cbk (frame, cookie, this, + op_ret, op_errno, + preparent, postparent, + xdata) + self.stub_refs["unlink_cbk"] = stub + dl.set_unlink_cbk(c_this,stub) + if "readlink_fop" in funcs: + @OpReadlink.fop_type + def stub (frame, this, loc, size, + xdata, s=self): + return s.readlink_fop (frame, this, loc, + size, xdata) + self.stub_refs["readlink_fop"] = stub + dl.set_readlink_fop(c_this,stub) + if "readlink_cbk" in funcs: + @OpReadlink.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + path, buf, xdata, s=self): + return s.readlink_cbk (frame, cookie, this, + op_ret, op_errno, + path, buf, xdata) + self.stub_refs["readlink_cbk"] = stub + dl.set_readlink_cbk(c_this,stub) + if "mkdir_fop" in funcs: + @OpMkdir.fop_type + def stub (frame, this, loc, mode, umask, xdata, + s=self): + return s.mkdir_fop (frame, this, loc, mode, + umask, xdata) + self.stub_refs["mkdir_fop"] = stub + dl.set_mkdir_fop(c_this,stub) + if "mkdir_cbk" in funcs: + @OpMkdir.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, inode, + buf, preparent, postparent, xdata, s=self): + return s.mkdir_cbk (frame, cookie, this, + op_ret, op_errno, inode, + buf, preparent, + postparent, xdata) + self.stub_refs["mkdir_cbk"] = stub + dl.set_mkdir_cbk(c_this,stub) + if "rmdir_fop" in funcs: + @OpRmdir.fop_type + def stub (frame, this, loc, xflags, + xdata, s=self): + return s.rmdir_fop (frame, this, loc, + xflags, xdata) + self.stub_refs["rmdir_fop"] = stub + dl.set_rmdir_fop(c_this,stub) + if "rmdir_cbk" in funcs: + @OpRmdir.cbk_type + def stub (frame, cookie, this, op_ret, op_errno, + preparent, postparent, xdata, s=self): + return s.rmdir_cbk (frame, cookie, this, + op_ret, op_errno, + preparent, postparent, + xdata) + self.stub_refs["rmdir_cbk"] = stub + dl.set_rmdir_cbk(c_this,stub) diff --git a/xlators/features/glupy/src/helloworld.py b/xlators/features/glupy/src/helloworld.py new file mode 100644 index 000000000..8fe403711 --- /dev/null +++ b/xlators/features/glupy/src/helloworld.py @@ -0,0 +1,19 @@ +import sys +from gluster import * + +class xlator (Translator): + + def __init__(self, c_this): + Translator.__init__(self, c_this) + + def lookup_fop(self, frame, this, loc, xdata): + print "Python xlator: Hello!" + dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata) + return 0 + + def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf, + xdata, postparent): + print "Python xlator: Hello again!" + dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf, + xdata, postparent) + return 0 diff --git a/xlators/features/glupy/src/negative.py b/xlators/features/glupy/src/negative.py new file mode 100644 index 000000000..1023602b9 --- /dev/null +++ b/xlators/features/glupy/src/negative.py @@ -0,0 +1,92 @@ +import sys +from uuid import UUID +from gluster import * + +# Negative-lookup-caching example. If a file wasn't there the last time we +# looked, it's probably still not there. This translator keeps track of +# those failed lookups for us, and returns ENOENT without needing to pass the +# call any further for repeated requests. + +# If we were doing this for real, we'd need separate caches for each xlator +# instance. The easiest way to do this would be to have xlator.__init__ +# "register" each instance in a module-global dict, with the key as the C +# translator address and the value as the xlator object itself. For testing +# and teaching, it's sufficient just to have one cache. The keys are parent +# GFIDs, and the entries are lists of names within that parent that we know +# don't exist. +cache = {} + +# TBD: we need a better way of handling per-request data (frame->local in C). +dl.get_id.restype = c_long +dl.get_id.argtypes = [ POINTER(call_frame_t) ] + +def uuid2str (gfid): + return str(UUID(''.join(map("{0:02x}".format, gfid)))) + +class xlator (Translator): + + def __init__ (self, c_this): + self.requests = {} + Translator.__init__(self,c_this) + + def lookup_fop (self, frame, this, loc, xdata): + pargfid = uuid2str(loc.contents.pargfid) + print "lookup FOP: %s:%s" % (pargfid, loc.contents.name) + # Check the cache. + if cache.has_key(pargfid): + if loc.contents.name in cache[pargfid]: + print "short-circuiting for %s:%s" % (pargfid, + loc.contents.name) + dl.unwind_lookup(frame,0,this,-1,2,None,None,None,None) + return 0 + key = dl.get_id(frame) + self.requests[key] = (pargfid, loc.contents.name[:]) + # TBD: get real child xl from init, pass it here + dl.wind_lookup(frame,POINTER(xlator_t)(),loc,xdata) + return 0 + + def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf, + xdata, postparent): + print "lookup CBK: %d (%d)" % (op_ret, op_errno) + key = dl.get_id(frame) + pargfid, name = self.requests[key] + # Update the cache. + if op_ret == 0: + print "found %s, removing from cache" % name + if cache.has_key(pargfid): + cache[pargfid].discard(name) + elif op_errno == 2: # ENOENT + print "failed to find %s, adding to cache" % name + if cache.has_key(pargfid): + cache[pargfid].add(name) + else: + cache[pargfid] = set([name]) + del self.requests[key] + dl.unwind_lookup(frame,cookie,this,op_ret,op_errno, + inode,buf,xdata,postparent) + return 0 + + def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata): + pargfid = uuid2str(loc.contents.pargfid) + print "create FOP: %s:%s" % (pargfid, loc.contents.name) + key = dl.get_id(frame) + self.requests[key] = (pargfid, loc.contents.name[:]) + # TBD: get real child xl from init, pass it here + dl.wind_create(frame,POINTER(xlator_t)(),loc,flags,mode,umask,fd,xdata) + return 0 + + def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode, + buf, preparent, postparent, xdata): + print "create CBK: %d (%d)" % (op_ret, op_errno) + key = dl.get_id(frame) + pargfid, name = self.requests[key] + # Update the cache. + if op_ret == 0: + print "created %s, removing from cache" % name + if cache.has_key(pargfid): + cache[pargfid].discard(name) + del self.requests[key] + dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf, + preparent,postparent,xdata) + return 0 + diff --git a/xlators/features/index/Makefile.am b/xlators/features/index/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/features/index/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am new file mode 100644 index 000000000..73bb8972e --- /dev/null +++ b/xlators/features/index/src/Makefile.am @@ -0,0 +1,17 @@ +xlator_LTLIBRARIES = index.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +index_la_LDFLAGS = -module -avoid-version + +index_la_SOURCES = index.c +index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = index.h index-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) \ + -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h new file mode 100644 index 000000000..553d492df --- /dev/null +++ b/xlators/features/index/src/index-mem-types.h @@ -0,0 +1,22 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QUIESCE_MEM_TYPES_H__ +#define __QUIESCE_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_index_mem_types_ { + gf_index_mt_priv_t = gf_common_mt_end + 1, + gf_index_inode_ctx_t = gf_common_mt_end + 2, + gf_index_fd_ctx_t = gf_common_mt_end + 3, + gf_index_mt_end +}; +#endif diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c new file mode 100644 index 000000000..9253120f3 --- /dev/null +++ b/xlators/features/index/src/index.c @@ -0,0 +1,1489 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "index.h" +#include "options.h" +#include "glusterfs3-xdr.h" +#include "syncop.h" + +#define XATTROP_SUBDIR "xattrop" +#define BASE_INDICES_HOLDER_SUBDIR "base_indices_holder" + +call_stub_t * +__index_dequeue (struct list_head *callstubs) +{ + call_stub_t *stub = NULL; + + if (!list_empty (callstubs)) { + stub = list_entry (callstubs->next, call_stub_t, list); + list_del_init (&stub->list); + } + + return stub; +} + +inline static void +__index_enqueue (struct list_head *callstubs, call_stub_t *stub) +{ + list_add_tail (&stub->list, callstubs); +} + +static void +worker_enqueue (xlator_t *this, call_stub_t *stub) +{ + index_priv_t *priv = NULL; + + priv = this->private; + pthread_mutex_lock (&priv->mutex); + { + __index_enqueue (&priv->callstubs, stub); + pthread_cond_signal (&priv->cond); + } + pthread_mutex_unlock (&priv->mutex); +} + +void * +index_worker (void *data) +{ + index_priv_t *priv = NULL; + xlator_t *this = NULL; + call_stub_t *stub = NULL; + int ret = 0; + + THIS = data; + this = data; + priv = this->private; + + for (;;) { + pthread_mutex_lock (&priv->mutex); + { + while (list_empty (&priv->callstubs)) { + ret = pthread_cond_wait (&priv->cond, + &priv->mutex); + } + + stub = __index_dequeue (&priv->callstubs); + } + pthread_mutex_unlock (&priv->mutex); + + if (stub) /* guard against spurious wakeups */ + call_resume (stub); + } + + return NULL; +} +int +__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) +{ + int ret = 0; + index_inode_ctx_t *ictx = NULL; + uint64_t tmpctx = 0; + + ret = __inode_ctx_get (inode, this, &tmpctx); + if (!ret) { + ictx = (index_inode_ctx_t*) (long) tmpctx; + goto out; + } + ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t); + if (!ictx) { + ret = -1; + goto out; + } + + INIT_LIST_HEAD (&ictx->callstubs); + ret = __inode_ctx_put (inode, this, (uint64_t)ictx); + if (ret) { + GF_FREE (ictx); + ictx = NULL; + goto out; + } +out: + if (ictx) + *ctx = ictx; + return ret; +} + +int +index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) +{ + int ret = 0; + + LOCK (&inode->lock); + { + ret = __index_inode_ctx_get (inode, this, ctx); + } + UNLOCK (&inode->lock); + + return ret; +} + +static void +make_index_dir_path (char *base, const char *subdir, + char *index_dir, size_t len) +{ + snprintf (index_dir, len, "%s/%s", base, subdir); +} + +int +index_dir_create (xlator_t *this, const char *subdir) +{ + int ret = 0; + struct stat st = {0}; + char fullpath[PATH_MAX] = {0}; + char path[PATH_MAX] = {0}; + char *dir = NULL; + index_priv_t *priv = NULL; + size_t len = 0; + size_t pathlen = 0; + + priv = this->private; + make_index_dir_path (priv->index_basepath, subdir, fullpath, + sizeof (fullpath)); + ret = stat (fullpath, &st); + if (!ret) { + if (!S_ISDIR (st.st_mode)) + ret = -2; + goto out; + } + + pathlen = strlen (fullpath); + if ((pathlen > 1) && fullpath[pathlen - 1] == '/') + fullpath[pathlen - 1] = '\0'; + dir = strchr (fullpath, '/'); + while (dir) { + dir = strchr (dir + 1, '/'); + if (dir) + len = pathlen - strlen (dir); + else + len = pathlen; + strncpy (path, fullpath, len); + path[len] = '\0'; + ret = mkdir (path, 0600); + if (ret && (errno != EEXIST)) + goto out; + } + ret = 0; +out: + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to " + "create (%s)", priv->index_basepath, subdir, + strerror (errno)); + } else if (ret == -2) { + gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to create, " + "path exists, not a directory ", priv->index_basepath, + subdir); + } + return ret; +} + +void +index_get_index (index_priv_t *priv, uuid_t index) +{ + LOCK (&priv->lock); + { + uuid_copy (index, priv->index); + } + UNLOCK (&priv->lock); +} + +void +index_generate_index (index_priv_t *priv, uuid_t index) +{ + LOCK (&priv->lock); + { + //To prevent duplicate generates. + //This method fails if number of contending threads is greater + //than MAX_LINK count of the fs + if (!uuid_compare (priv->index, index)) + uuid_generate (priv->index); + uuid_copy (index, priv->index); + } + UNLOCK (&priv->lock); +} + +static void +make_index_path (char *base, const char *subdir, uuid_t index, + char *index_path, size_t len) +{ + make_index_dir_path (base, subdir, index_path, len); + snprintf (index_path + strlen (index_path), len - strlen (index_path), + "/%s-%s", subdir, uuid_utoa (index)); +} + +static void +make_gfid_path (char *base, const char *subdir, uuid_t gfid, + char *gfid_path, size_t len) +{ + make_index_dir_path (base, subdir, gfid_path, len); + snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path), + "/%s", uuid_utoa (gfid)); +} + +static void +make_file_path (char *base, const char *subdir, const char *filename, + char *file_path, size_t len) +{ + make_index_dir_path (base, subdir, file_path, len); + snprintf (file_path + strlen (file_path), len - strlen (file_path), + "/%s", filename); +} + +static void +check_delete_stale_index_file (xlator_t *this, char *filename) +{ + int ret = 0; + struct stat st = {0}; + struct stat base_index_st = {0}; + char filepath[PATH_MAX] = {0}; + char filepath_under_base_indices_holder[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + + priv = this->private; + if (priv->to_be_healed_states != synced_state) + return; + + make_file_path (priv->index_basepath, XATTROP_SUBDIR, + filename, filepath, sizeof (filepath)); + + make_file_path (priv->index_basepath, BASE_INDICES_HOLDER_SUBDIR, + filename, filepath_under_base_indices_holder, + sizeof (filepath_under_base_indices_holder)); + + + ret = stat (filepath_under_base_indices_holder, &base_index_st); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Base index is not created" + "under index/base_indices_holder"); + return; + } + + ret = stat (filepath, &st); + if (!ret && st.st_nlink == 2) { + unlink (filepath); + unlink (filepath_under_base_indices_holder); + } +} + +static int +index_fill_readdir (fd_t *fd, DIR *dir, off_t off, + size_t size, gf_dirent_t *entries, readdir_directory type) +{ + off_t in_case = -1; + size_t filled = 0; + int count = 0; + char entrybuf[sizeof(struct dirent) + 256 + 8]; + struct dirent *entry = NULL; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + xlator_t *this = NULL; + + this = THIS; + if (!off) { + rewinddir (dir); + } else { + seekdir (dir, off); + } + + while (filled <= size) { + in_case = telldir (dir); + + if (in_case == -1) { + gf_log (THIS->name, GF_LOG_ERROR, + "telldir failed on dir=%p: %s", + dir, strerror (errno)); + goto out; + } + + errno = 0; + entry = NULL; + readdir_r (dir, (struct dirent *)entrybuf, &entry); + + if (!entry) { + if (errno == EBADF) { + gf_log (THIS->name, GF_LOG_WARNING, + "readdir failed on dir=%p: %s", + dir, strerror (errno)); + goto out; + } + break; + } + + if (!strncmp (entry->d_name, XATTROP_SUBDIR"-", + strlen (XATTROP_SUBDIR"-")) && + (type == INDEX_XATTROP)) { + check_delete_stale_index_file (this, entry->d_name); + continue; + } + + this_size = max (sizeof (gf_dirent_t), + sizeof (gfs3_dirplist)) + + strlen (entry->d_name) + 1; + + if (this_size + filled > size) { + seekdir (dir, in_case); + break; + } + + this_entry = gf_dirent_for_name (entry->d_name); + + if (!this_entry) { + gf_log (THIS->name, GF_LOG_ERROR, + "could not create gf_dirent for entry %s: (%s)", + entry->d_name, strerror (errno)); + goto out; + } + this_entry->d_off = telldir (dir); + this_entry->d_ino = entry->d_ino; + + list_add_tail (&this_entry->list, &entries->list); + + filled += this_size; + count ++; + } + + if ((!readdir (dir) && (errno == 0))) + /* Indicate EOF */ + errno = ENOENT; +out: + return count; +} + +int +sync_base_indices (void *index_priv) +{ + index_priv_t *priv = NULL; + DIR *dir_base_holder = NULL; + DIR *xattrop_dir = NULL; + struct dirent *entry = NULL; + char base_indices_holder[PATH_MAX] = {0}; + char xattrop_directory[PATH_MAX] = {0}; + char base_index_path[PATH_MAX] = {0}; + char xattrop_index_path[PATH_MAX] = {0}; + int ret = 0; + + priv = index_priv; + + snprintf (base_indices_holder, PATH_MAX, "%s/%s", priv->index_basepath, + BASE_INDICES_HOLDER_SUBDIR); + snprintf (xattrop_directory, PATH_MAX, "%s/%s", priv->index_basepath, + XATTROP_SUBDIR); + + if ((dir_base_holder = opendir(base_indices_holder)) == NULL) { + ret = -1; + goto out; + } + if ((xattrop_dir = opendir (xattrop_directory)) == NULL) { + ret = -1; + goto out; + } + + priv->to_be_healed_states = sync_started; + while ((entry = readdir(xattrop_dir)) != NULL) { + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) { + continue; + } + if (strncmp (entry->d_name, XATTROP_SUBDIR"-", + strlen (XATTROP_SUBDIR"-"))) { + continue; + } + if (!strncmp (entry->d_name, XATTROP_SUBDIR"-", + strlen (XATTROP_SUBDIR"-"))) { + + snprintf (xattrop_index_path, PATH_MAX, "%s/%s", + xattrop_directory, entry->d_name); + + snprintf (base_index_path, PATH_MAX, "%s/%s", + base_indices_holder, entry->d_name); + + ret = link (xattrop_index_path, base_index_path); + if (ret && errno != EEXIST) + goto out; + + } + } + ret = closedir (xattrop_dir); + if (ret) + goto out; + ret = closedir (dir_base_holder); + if (ret) + goto out; + + ret = 0; +out: + return ret; + +} + +int +base_indices_syncing_done (int ret, call_frame_t *frame, void *data) +{ + index_priv_t *priv = NULL; + priv = data; + + if (!priv) + goto out; + + if (ret) { + priv->to_be_healed_states = sync_not_started; + } else { + priv->to_be_healed_states = synced_state; + } + + STACK_DESTROY (frame->root); + +out: + return 0; +} + +int +sync_base_indices_from_xattrop (xlator_t *this) +{ + + index_priv_t *priv = NULL; + char base_indices_holder[PATH_MAX] = {0}; + int ret = 0; + struct stat st = {0}; + DIR *dir = NULL; + struct dirent *entry = NULL; + call_frame_t *frame = NULL; + + priv = this->private; + + if (priv->to_be_healed_states != sync_not_started) { + ret = -1; + goto out; + } + + snprintf (base_indices_holder, PATH_MAX, "%s/%s", priv->index_basepath, + BASE_INDICES_HOLDER_SUBDIR); + + ret = stat (base_indices_holder, &st); + + if (ret && (errno != ENOENT)) { + goto out; + } else if (errno == ENOENT) { + ret = index_dir_create (this, BASE_INDICES_HOLDER_SUBDIR); + if (ret) + goto out; + } else { + if ((dir = opendir (base_indices_holder)) == NULL) { + ret = -1; + goto out; + } + while ((entry = readdir (dir)) != NULL) { + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name,"..")) { + continue; + } + ret = unlink (entry->d_name); + if (ret) + goto out; + } + closedir (dir); + } + + /*At this point of time we have index/base_indicies_holder directory + *is with no entries*/ + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + ret = -1; + goto out; + } + set_lk_owner_from_ptr (&frame->root->lk_owner, frame->root); + + frame->root->pid = LOW_PRIO_PROC_PID; + + ret = synctask_new (this->ctx->env, sync_base_indices, + base_indices_syncing_done,frame, priv); + + + +out: + return ret; + +} + +int +index_add (xlator_t *this, uuid_t gfid, const char *subdir) +{ + int32_t op_errno = 0; + char gfid_path[PATH_MAX] = {0}; + char index_path[PATH_MAX] = {0}; + char base_path[PATH_MAX] = {0}; + int ret = 0; + uuid_t index = {0}; + index_priv_t *priv = NULL; + struct stat st = {0}; + int fd = 0; + int index_created = 0; + + priv = this->private; + GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid), + out, op_errno, EINVAL); + + make_gfid_path (priv->index_basepath, subdir, gfid, + gfid_path, sizeof (gfid_path)); + + ret = stat (gfid_path, &st); + if (!ret) + goto out; + index_get_index (priv, index); + make_index_path (priv->index_basepath, subdir, + index, index_path, sizeof (index_path)); + ret = link (index_path, gfid_path); + if (!ret || (errno == EEXIST)) { + ret = 0; + index_created = 1; + goto out; + } + + + op_errno = errno; + if (op_errno == ENOENT) { + ret = index_dir_create (this, subdir); + if (ret) + goto out; + } else if (op_errno == EMLINK) { + index_generate_index (priv, index); + make_index_path (priv->index_basepath, subdir, + index, index_path, sizeof (index_path)); + } else { + goto out; + } + + fd = creat (index_path, 0); + if ((fd < 0) && (errno != EEXIST)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "%s: Not able to " + "create index (%s)", uuid_utoa (gfid), + strerror (errno)); + goto out; + } + + if (fd >= 0) + close (fd); + + ret = link (index_path, gfid_path); + if (ret && (errno != EEXIST)) { + gf_log (this->name, GF_LOG_ERROR, "%s: Not able to " + "add to index (%s)", uuid_utoa (gfid), + strerror (errno)); + goto out; + } else { + index_created = 1; + } + + if (priv->to_be_healed_states != sync_not_started) { + make_index_path (priv->index_basepath, + GF_BASE_INDICES_HOLDER_GFID, + index, base_path, sizeof (base_path)); + ret = link (index_path, base_path); + if (ret) + goto out; + } + + ret = 0; +out: + /*If base_indices_holder is not created: create and sync + *If directory is present: delete contents and start syncing + *If syncing is in progress :No need to do any thing + *If syncing is done: No need to do anything*/ + if (!ret) { + switch (priv->to_be_healed_states) { + case sync_not_started: + ret = sync_base_indices_from_xattrop (this); + break; + case sync_started: + case synced_state: + /*No need to do anything*/ + break; + } + } + return ret; +} + +int +index_del (xlator_t *this, uuid_t gfid, const char *subdir) +{ + int32_t op_errno __attribute__((unused)) = 0; + index_priv_t *priv = NULL; + int ret = 0; + char gfid_path[PATH_MAX] = {0}; + + priv = this->private; + GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid), + out, op_errno, EINVAL); + make_gfid_path (priv->index_basepath, subdir, gfid, + gfid_path, sizeof (gfid_path)); + ret = unlink (gfid_path); + if (ret && (errno != ENOENT)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to delete from index (%s)", + gfid_path, strerror (errno)); + ret = -errno; + goto out; + } + ret = 0; +out: + return ret; +} + +static int +_check_key_is_zero_filled (dict_t *d, char *k, data_t *v, + void *tmp) +{ + if (mem_0filled ((const char*)v->data, v->len)) { + /* -1 means, no more iterations, treat as 'break' */ + return -1; + } + return 0; +} + + +void +_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +{ + gf_boolean_t zero_xattr = _gf_true; + index_inode_ctx_t *ctx = NULL; + int ret = 0; + + ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL); + if (ret == -1) + zero_xattr = _gf_false; + + ret = index_inode_ctx_get (inode, this, &ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index", + zero_xattr?"add":"del", uuid_utoa (inode->gfid)); + goto out; + } + if (zero_xattr) { + if (ctx->state == NOTIN) + goto out; + ret = index_del (this, inode->gfid, XATTROP_SUBDIR); + if (!ret) + ctx->state = NOTIN; + } else { + if (ctx->state == IN) + goto out; + ret = index_add (this, inode->gfid, XATTROP_SUBDIR); + if (!ret) + ctx->state = IN; + } +out: + return; +} + +void +fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +{ + _xattrop_index_action (this, inode, xattr); +} + +void +fop_fxattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +{ + _xattrop_index_action (this, inode, xattr); +} + +inline gf_boolean_t +index_xattrop_track (loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict) +{ + return (flags == GF_XATTROP_ADD_ARRAY); +} + +inline gf_boolean_t +index_fxattrop_track (fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) +{ + return (flags == GF_XATTROP_ADD_ARRAY); +} + +int +__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +{ + int ret = 0; + index_fd_ctx_t *fctx = NULL; + uint64_t tmpctx = 0; + char index_dir[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + + priv = this->private; + if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) { + ret = -EINVAL; + goto out; + } + + ret = __fd_ctx_get (fd, this, &tmpctx); + if (!ret) { + fctx = (index_fd_ctx_t*) (long) tmpctx; + goto out; + } + + fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t); + if (!fctx) { + ret = -ENOMEM; + goto out; + } + + make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR, + index_dir, sizeof (index_dir)); + fctx->dir = opendir (index_dir); + if (!fctx->dir) { + ret = -errno; + GF_FREE (fctx); + fctx = NULL; + goto out; + } + + ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx); + if (ret) { + GF_FREE (fctx); + fctx = NULL; + ret = -EINVAL; + goto out; + } +out: + if (fctx) + *ctx = fctx; + return ret; +} + +int +index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +{ + int ret = 0; + LOCK (&fd->lock); + { + ret = __index_fd_ctx_get (fd, this, ctx); + } + UNLOCK (&fd->lock); + return ret; +} + +//new - Not NULL means start a fop +//new - NULL means done processing the fop +void +index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new) +{ + call_stub_t *stub = NULL; + index_inode_ctx_t *ctx = NULL; + int ret = 0; + call_frame_t *frame = NULL; + + LOCK (&inode->lock); + { + ret = __index_inode_ctx_get (inode, this, &ctx); + if (ret) + goto unlock; + + if (new) { + __index_enqueue (&ctx->callstubs, new); + new = NULL; + } else { + ctx->processing = _gf_false; + } + + if (!ctx->processing) { + stub = __index_dequeue (&ctx->callstubs); + if (stub) + ctx->processing = _gf_true; + else + ctx->processing = _gf_false; + } + } +unlock: + UNLOCK (&inode->lock); + + if (ret && new) { + frame = new->frame; + if (new->fop == GF_FOP_XATTROP) { + INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, + NULL, NULL); + } else if (new->fop == GF_FOP_FXATTROP) { + INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, + NULL, NULL); + } + call_stub_destroy (new); + } else if (stub) { + call_resume (stub); + } + return; +} + +int32_t +index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = inode_ref (frame->local); + if (op_ret < 0) + goto out; + fop_xattrop_index_action (this, frame->local, xattr); +out: + INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata); + index_queue_process (this, inode, NULL); + inode_unref (inode); + + return 0; +} + +int32_t +index_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = inode_ref (frame->local); + if (op_ret < 0) + goto out; + + fop_fxattrop_index_action (this, frame->local, xattr); +out: + INDEX_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, xattr, xdata); + index_queue_process (this, inode, NULL); + inode_unref (inode); + + return 0; +} + +int +index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, + xdata); + return 0; +} + +int +index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, + xdata); + return 0; +} + +int32_t +index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!index_xattrop_track (loc, flags, dict)) + goto out; + + frame->local = inode_ref (loc->inode); + stub = fop_xattrop_stub (frame, index_xattrop_wrapper, + loc, flags, dict, xdata); + if (!stub) { + INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + index_queue_process (this, loc->inode, stub); + return 0; +out: + STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata); + return 0; +} + +int32_t +index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!index_fxattrop_track (fd, flags, dict)) + goto out; + + frame->local = inode_ref (fd->inode); + stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper, + fd, flags, dict, xdata); + if (!stub) { + INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata); + return 0; + } + + index_queue_process (this, fd->inode, stub); + return 0; +out: + STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata); + return 0; +} + +int32_t +index_getxattr_wrapper (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + index_priv_t *priv = NULL; + dict_t *xattr = NULL; + int ret = 0; + + priv = this->private; + + xattr = dict_new (); + if (!xattr) { + ret = -ENOMEM; + goto done; + } + + if (!strcmp (name, GF_XATTROP_INDEX_GFID)) { + + ret = dict_set_static_bin (xattr, (char*)name, + priv->xattrop_vgfid, + sizeof (priv->xattrop_vgfid)); + + } else if (!strcmp (name, GF_BASE_INDICES_HOLDER_GFID)) { + + ret = dict_set_static_bin (xattr, (char*)name, + priv->base_indices_holder_vgfid, + sizeof (priv->base_indices_holder_vgfid)); + } + if (ret) { + ret = -ENOMEM; + gf_log (THIS->name, GF_LOG_ERROR, "xattrop index " + "gfid set failed"); + goto done; + } +done: + if (ret) + STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, xdata); + else + STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, xdata); + + if (xattr) + dict_unref (xattr); + + return 0; +} + +int32_t +index_lookup_wrapper (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + index_priv_t *priv = NULL; + struct stat lstatbuf = {0}; + int ret = 0; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + char path[PATH_MAX] = {0}; + struct iatt stbuf = {0, }; + struct iatt postparent = {0,}; + dict_t *xattr = NULL; + gf_boolean_t is_dir = _gf_false; + + priv = this->private; + + VALIDATE_OR_GOTO (loc, done); + if (!uuid_compare (loc->gfid, priv->xattrop_vgfid)) { + make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR, + path, sizeof (path)); + is_dir = _gf_true; + } else if (!uuid_compare (loc->pargfid, priv->xattrop_vgfid)) { + make_file_path (priv->index_basepath, XATTROP_SUBDIR, + loc->name, path, sizeof (path)); + } else if (!uuid_compare (loc->gfid,priv->base_indices_holder_vgfid)){ + make_index_dir_path (priv->index_basepath, + BASE_INDICES_HOLDER_SUBDIR, path, + sizeof (path)); + is_dir = _gf_true; + } else if (!uuid_compare (loc->pargfid, priv->base_indices_holder_vgfid)) { + make_file_path (priv->index_basepath, + BASE_INDICES_HOLDER_SUBDIR,loc->name, path, + sizeof (path)); + } + + ret = lstat (path, &lstatbuf); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir " + "(%s)", strerror (errno)); + op_errno = errno; + goto done; + } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) { + gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir, " + "not a directory"); + op_errno = ENOENT; + goto done; + } + xattr = dict_new (); + if (!xattr) { + op_errno = ENOMEM; + goto done; + } + + iatt_from_stat (&stbuf, &lstatbuf); + if (is_dir && !uuid_compare (loc->gfid, priv->xattrop_vgfid)) { + uuid_copy (stbuf.ia_gfid, priv->xattrop_vgfid); + } else if (is_dir && + !uuid_compare (loc->gfid, priv->base_indices_holder_vgfid)) { + uuid_copy (stbuf.ia_gfid, priv->base_indices_holder_vgfid); + } else { + uuid_generate (stbuf.ia_gfid); + } + stbuf.ia_ino = -1; + op_ret = 0; +done: + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, + loc->inode, &stbuf, xattr, &postparent); + if (xattr) + dict_unref (xattr); + return 0; +} + +int32_t +base_indices_readdir_wrapper (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) +{ + index_priv_t *priv = NULL; + char base_indices_holder[PATH_MAX] = {0}; + DIR *dir = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int count = 0; + gf_dirent_t entries; + + priv = this->private; + + make_index_dir_path (priv->index_basepath, BASE_INDICES_HOLDER_SUBDIR, + base_indices_holder, sizeof (base_indices_holder)); + + dir = opendir (base_indices_holder); + if (!dir) { + op_errno = EINVAL; + goto done; + } + + + INIT_LIST_HEAD (&entries.list); + + count = index_fill_readdir (fd, dir, off, size, &entries, + BASE_INDICES_HOLDER); + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + closedir (dir); +done: + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata); + gf_dirent_free (&entries); + return 0; +} + +int32_t +index_readdir_wrapper (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) +{ + index_fd_ctx_t *fctx = NULL; + DIR *dir = NULL; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int count = 0; + gf_dirent_t entries; + + INIT_LIST_HEAD (&entries.list); + + ret = index_fd_ctx_get (fd, this, &fctx); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; + goto done; + } + + dir = fctx->dir; + + if (!dir) { + gf_log (this->name, GF_LOG_WARNING, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; + goto done; + } + + count = index_fill_readdir (fd, dir, off, size, &entries, + INDEX_XATTROP); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; +done: + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata); + gf_dirent_free (&entries); + return 0; +} + +int +index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + index_priv_t *priv = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int ret = 0; + struct iatt preparent = {0}; + struct iatt postparent = {0}; + char index_dir[PATH_MAX] = {0}; + struct stat lstatbuf = {0}; + uuid_t gfid = {0}; + + priv = this->private; + make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR, + index_dir, sizeof (index_dir)); + ret = lstat (index_dir, &lstatbuf); + if (ret < 0) { + op_ret = -1; + op_errno = errno; + goto done; + } + + iatt_from_stat (&preparent, &lstatbuf); + uuid_copy (preparent.ia_gfid, priv->xattrop_vgfid); + preparent.ia_ino = -1; + uuid_parse (loc->name, gfid); + ret = index_del (this, gfid, XATTROP_SUBDIR); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto done; + } + memset (&lstatbuf, 0, sizeof (lstatbuf)); + ret = lstat (index_dir, &lstatbuf); + if (ret < 0) { + op_ret = -1; + op_errno = errno; + goto done; + } + iatt_from_stat (&postparent, &lstatbuf); + uuid_copy (postparent.ia_gfid, priv->xattrop_vgfid); + postparent.ia_ino = -1; +done: + INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent, + &postparent, xdata); + return 0; +} + +int32_t +index_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!name) + goto out; + if (strcmp (GF_XATTROP_INDEX_GFID, name) && + strcmp (GF_BASE_INDICES_HOLDER_GFID, name)) + goto out; + + stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name, + xdata); + if (!stub) { + STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + worker_enqueue (this, stub); + return 0; +out: + STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +} + +int32_t +index_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + call_stub_t *stub = NULL; + index_priv_t *priv = NULL; + + priv = this->private; + + if (uuid_compare (loc->gfid, priv->xattrop_vgfid) && + uuid_compare (loc->pargfid, priv->xattrop_vgfid) && + uuid_compare (loc->gfid, priv->base_indices_holder_vgfid) && + uuid_compare (loc->pargfid, priv->base_indices_holder_vgfid)) + goto normal; + + stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req); + if (!stub) { + STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode, + NULL, NULL, NULL); + return 0; + } + worker_enqueue (this, stub); + return 0; +normal: + STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + + return 0; +} + +int32_t +index_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) +{ + call_stub_t *stub = NULL; + index_priv_t *priv = NULL; + + priv = this->private; + if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid) && + uuid_compare (fd->inode->gfid, priv->base_indices_holder_vgfid)) + goto out; + + if (!uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) { + stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size, + off, xdata); + } else if (!uuid_compare (fd->inode->gfid, + priv->base_indices_holder_vgfid)) { + stub = fop_readdir_stub (frame, base_indices_readdir_wrapper, + fd, size, off, xdata); + } + + if (!stub) { + STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + worker_enqueue (this, stub); + return 0; +out: + STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); + return 0; +} + +int +index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + index_priv_t *priv = NULL; + + priv = this->private; + if (uuid_compare (loc->pargfid, priv->xattrop_vgfid)) + goto out; + + stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, + NULL); + return 0; + } + worker_enqueue (this, stub); + return 0; +out: + STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_index_mt_end + 1); + + return ret; +} + +int +init (xlator_t *this) +{ + int ret = -1; + index_priv_t *priv = NULL; + pthread_t thread; + pthread_attr_t w_attr; + gf_boolean_t mutex_inited = _gf_false; + gf_boolean_t cond_inited = _gf_false; + gf_boolean_t attr_inited = _gf_false; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "'index' not configured with exactly one child"); + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t); + if (!priv) + goto out; + + LOCK_INIT (&priv->lock); + if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "pthread_cond_init failed (%d)", ret); + goto out; + } + cond_inited = _gf_true; + + if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "pthread_mutex_init failed (%d)", ret); + goto out; + } + mutex_inited = _gf_true; + + if ((ret = pthread_attr_init (&w_attr)) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "pthread_attr_init failed (%d)", ret); + goto out; + } + attr_inited = _gf_true; + + ret = pthread_attr_setstacksize (&w_attr, INDEX_THREAD_STACK_SIZE); + if (ret == EINVAL) { + gf_log (this->name, GF_LOG_WARNING, + "Using default thread stack size"); + } + GF_OPTION_INIT ("index-base", priv->index_basepath, path, out); + uuid_generate (priv->index); + uuid_generate (priv->xattrop_vgfid); + /*base_indices_holder is a directory which contains hard links to + * all base indices inside indices/xattrop directory*/ + uuid_generate (priv->base_indices_holder_vgfid); + INIT_LIST_HEAD (&priv->callstubs); + + this->private = priv; + ret = gf_thread_create (&thread, &w_attr, index_worker, this); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to create " + "worker thread, aborting"); + goto out; + } + ret = 0; +out: + if (ret) { + if (cond_inited) + pthread_cond_destroy (&priv->cond); + if (mutex_inited) + pthread_mutex_destroy (&priv->mutex); + if (priv) + GF_FREE (priv); + this->private = NULL; + } + if (attr_inited) + pthread_attr_destroy (&w_attr); + return ret; +} + +void +fini (xlator_t *this) +{ + index_priv_t *priv = NULL; + + priv = this->private; + if (!priv) + goto out; + this->private = NULL; + LOCK_DESTROY (&priv->lock); + pthread_cond_destroy (&priv->cond); + pthread_mutex_destroy (&priv->mutex); + GF_FREE (priv); +out: + return; +} + +int +index_forget (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_cache = 0; + if (!inode_ctx_del (inode, this, &tmp_cache)) + GF_FREE ((index_inode_ctx_t*) (long)tmp_cache); + + return 0; +} + +int32_t +index_releasedir (xlator_t *this, fd_t *fd) +{ + index_fd_ctx_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; + + ret = fd_ctx_del (fd, this, &ctx); + if (ret < 0) + goto out; + + fctx = (index_fd_ctx_t*) (long) ctx; + if (fctx->dir) + closedir (fctx->dir); + + GF_FREE (fctx); +out: + return 0; +} + +int32_t +index_release (xlator_t *this, fd_t *fd) +{ + index_fd_ctx_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; + + ret = fd_ctx_del (fd, this, &ctx); + if (ret < 0) + goto out; + + fctx = (index_fd_ctx_t*) (long) ctx; + GF_FREE (fctx); +out: + return 0; +} + +int +notify (xlator_t *this, int event, void *data, ...) +{ + int ret = 0; + ret = default_notify (this, event, data); + return ret; +} + +struct xlator_fops fops = { + .xattrop = index_xattrop, + .fxattrop = index_fxattrop, + + //interface functions follow + .getxattr = index_getxattr, + .lookup = index_lookup, + .readdir = index_readdir, + .unlink = index_unlink +}; + +struct xlator_dumpops dumpops; + +struct xlator_cbks cbks = { + .forget = index_forget, + .release = index_release, + .releasedir = index_releasedir +}; + +struct volume_options options[] = { + { .key = {"index-base" }, + .type = GF_OPTION_TYPE_PATH, + .description = "path where the index files need to be stored", + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h new file mode 100644 index 000000000..d6dcb1c23 --- /dev/null +++ b/xlators/features/index/src/index.h @@ -0,0 +1,73 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __INDEX_H__ +#define __INDEX_H__ + +#include "xlator.h" +#include "call-stub.h" +#include "defaults.h" +#include "byte-order.h" +#include "common-utils.h" +#include "index-mem-types.h" + +#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024)) + +typedef enum { + UNKNOWN, + IN, + NOTIN +} index_state_t; + +typedef struct index_inode_ctx { + gf_boolean_t processing; + struct list_head callstubs; + index_state_t state; +} index_inode_ctx_t; + +typedef struct index_fd_ctx { + DIR *dir; +} index_fd_ctx_t; + +typedef enum { + sync_not_started, + sync_started, + synced_state, +} to_be_healed_states_t; + +typedef enum { + INDEX_XATTROP, + BASE_INDICES_HOLDER, +} readdir_directory; + +typedef struct index_priv { + char *index_basepath; + uuid_t index; + gf_lock_t lock; + uuid_t xattrop_vgfid;//virtual gfid of the xattrop index dir + uuid_t base_indices_holder_vgfid; //virtual gfid of the + //to_be_healed_xattrop directory + struct list_head callstubs; + pthread_mutex_t mutex; + pthread_cond_t cond; + to_be_healed_states_t to_be_healed_states; +} index_priv_t; + +#define INDEX_STACK_UNWIND(fop, frame, params ...) \ +do { \ + if (frame) { \ + inode_t *_inode = frame->local; \ + frame->local = NULL; \ + inode_unref (_inode); \ + } \ + STACK_UNWIND_STRICT (fop, frame, params); \ +} while (0) + +#endif diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index ab545cb1c..0f79731b4 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -1,15 +1,18 @@ xlator_LTLIBRARIES = locks.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -locks_la_LDFLAGS = -module -avoidversion +locks_la_LDFLAGS = -module -avoid-version -locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c -locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \ + clear.c +locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = locks.h common.h +noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + + +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) CLEANFILES = @@ -17,4 +20,4 @@ uninstall-local: rm -f $(DESTDIR)$(xlatordir)/posix-locks.so install-data-hook: - ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
\ No newline at end of file + ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c new file mode 100644 index 000000000..124b9ad0f --- /dev/null +++ b/xlators/features/locks/src/clear.c @@ -0,0 +1,424 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" +#include "common.h" +#include "statedump.h" +#include "clear.h" + +int +clrlk_get_kind (char *kind) +{ + char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted", + "all"}; + int ret_kind = CLRLK_KIND_MAX; + int i = 0; + + for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) { + if (!strcmp (clrlk_kinds[i], kind)) { + ret_kind = i; + break; + } + } + + return ret_kind; +} + +int +clrlk_get_type (char *type) +{ + char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"}; + int ret_type = CLRLK_TYPE_MAX; + int i = 0; + + for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) { + if (!strcmp (clrlk_types[i], type)) { + ret_type = i; + break; + } + } + + return ret_type; +} + +int +clrlk_get_lock_range (char *range_str, struct gf_flock *ulock, + gf_boolean_t *chk_range) +{ + int ret = -1; + + if (!chk_range) + goto out; + + if (!range_str) { + ret = 0; + *chk_range = _gf_false; + goto out; + } + + if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence, + &ulock->l_start, &ulock->l_len) != 3) { + goto out; + } + + ret = 0; + *chk_range = _gf_true; +out: + return ret; +} + +int +clrlk_parse_args (const char* cmd, clrlk_args *args) +{ + char *opts = NULL; + char *cur = NULL; + char *tok = NULL; + char *sptr = NULL; + char *free_ptr = NULL; + char kw[KW_MAX] = {[KW_TYPE] = 't', + [KW_KIND] = 'k', + }; + int ret = -1; + int i = 0; + + GF_ASSERT (cmd); + free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char); + if (!opts) + goto out; + + if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) { + ret = -1; + goto out; + } + + /*clr_lk_prefix.ttype.kkind.args, args - type specific*/ + cur = opts; + for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr)); + cur = NULL, i++) { + if (tok[0] != kw[i]) { + ret = -1; + goto out; + } + if (i == KW_TYPE) + args->type = clrlk_get_type (tok+1); + if (i == KW_KIND) + args->kind = clrlk_get_kind (tok+1); + } + + if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX)) + goto out; + + /*optional args, neither range nor basename can 'legally' contain + * "/" in them*/ + tok = strtok_r (NULL, "/", &sptr); + if (tok) + args->opts = gf_strdup (tok); + + ret = 0; +out: + GF_FREE (free_ptr); + return ret; +} + +int +clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + int *blkd, int *granted, int *op_errno) +{ + posix_lock_t *plock = NULL; + posix_lock_t *tmp = NULL; + struct gf_flock ulock = {0, }; + int ret = -1; + int bcount = 0; + int gcount = 0; + gf_boolean_t chk_range = _gf_false; + + if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) { + *op_errno = EINVAL; + goto out; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list, + list) { + if ((plock->blocked && + !(args->kind & CLRLK_BLOCKED)) || + (!plock->blocked && + !(args->kind & CLRLK_GRANTED))) + continue; + + if (chk_range && + (plock->user_flock.l_whence != ulock.l_whence + || plock->user_flock.l_start != ulock.l_start + || plock->user_flock.l_len != ulock.l_len)) + continue; + + list_del_init (&plock->list); + if (plock->blocked) { + bcount++; + pl_trace_out (this, plock->frame, NULL, NULL, + F_SETLKW, &plock->user_flock, + -1, EAGAIN, NULL); + + STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN, + &plock->user_flock, NULL); + + } else { + gcount++; + } + GF_FREE (plock); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + grant_blocked_locks (this, pl_inode); + ret = 0; +out: + *blkd = bcount; + *granted = gcount; + return ret; +} + +/* Returns 0 on success and -1 on failure */ +int +clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno) +{ + pl_inode_lock_t *ilock = NULL; + pl_inode_lock_t *tmp = NULL; + struct gf_flock ulock = {0, }; + int ret = -1; + int bcount = 0; + int gcount = 0; + gf_boolean_t chk_range = _gf_false; + struct list_head released; + + INIT_LIST_HEAD (&released); + if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) { + *op_errno = EINVAL; + goto out; + } + + if (args->kind & CLRLK_BLOCKED) + goto blkd; + + if (args->kind & CLRLK_GRANTED) + goto granted; + +blkd: + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks, + blocked_locks) { + if (chk_range && + (ilock->user_flock.l_whence != ulock.l_whence + || ilock->user_flock.l_start != ulock.l_start + || ilock->user_flock.l_len != ulock.l_len)) + continue; + + bcount++; + list_del_init (&ilock->blocked_locks); + list_add (&ilock->blocked_locks, &released); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) { + list_del_init (&ilock->blocked_locks); + pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW, + &ilock->user_flock, -1, EAGAIN, + ilock->volume); + STACK_UNWIND_STRICT (inodelk, ilock->frame, -1, + EAGAIN, NULL); + //No need to take lock as the locks are only in one list + __pl_inodelk_unref (ilock); + } + + if (!(args->kind & CLRLK_GRANTED)) { + ret = 0; + goto out; + } + +granted: + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list, + list) { + if (chk_range && + (ilock->user_flock.l_whence != ulock.l_whence + || ilock->user_flock.l_start != ulock.l_start + || ilock->user_flock.l_len != ulock.l_len)) + continue; + + gcount++; + list_del_init (&ilock->list); + list_add (&ilock->list, &released); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (ilock, tmp, &released, list) { + list_del_init (&ilock->list); + //No need to take lock as the locks are only in one list + __pl_inodelk_unref (ilock); + } + + ret = 0; +out: + grant_blocked_inode_locks (this, pl_inode, dom); + *blkd = bcount; + *granted = gcount; + return ret; +} + +/* Returns 0 on success and -1 on failure */ +int +clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno) +{ + pl_entry_lock_t *elock = NULL; + pl_entry_lock_t *tmp = NULL; + int bcount = 0; + int gcount = 0; + int ret = -1; + struct list_head removed; + struct list_head released; + + INIT_LIST_HEAD (&released); + if (args->kind & CLRLK_BLOCKED) + goto blkd; + + if (args->kind & CLRLK_GRANTED) + goto granted; + +blkd: + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks, + blocked_locks) { + if (args->opts) { + if (!elock->basename || + strcmp (elock->basename, args->opts)) + continue; + } + + bcount++; + + list_del_init (&elock->blocked_locks); + list_add_tail (&elock->blocked_locks, &released); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (elock, tmp, &released, blocked_locks) { + list_del_init (&elock->blocked_locks); + entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL, + elock->basename, ENTRYLK_LOCK, elock->type, + -1, EAGAIN); + STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL); + GF_FREE ((char *) elock->basename); + GF_FREE (elock->connection_id); + GF_FREE (elock); + } + + if (!(args->kind & CLRLK_GRANTED)) { + ret = 0; + goto out; + } + +granted: + INIT_LIST_HEAD (&removed); + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (elock, tmp, &dom->entrylk_list, + domain_list) { + if (args->opts) { + if (!elock->basename || + strcmp (elock->basename, args->opts)) + continue; + } + + gcount++; + list_del_init (&elock->domain_list); + list_add_tail (&elock->domain_list, &removed); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (elock, tmp, &removed, domain_list) { + grant_blocked_entry_locks (this, pl_inode, elock, dom); + } + + ret = 0; +out: + *blkd = bcount; + *granted = gcount; + return ret; +} + +int +clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode, + clrlk_args *args, int *blkd, int *granted, + int *op_errno) +{ + pl_dom_list_t *dom = NULL; + int ret = -1; + int tmp_bcount = 0; + int tmp_gcount = 0; + + if (list_empty (&pl_inode->dom_list)) { + ret = 0; + goto out; + } + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + tmp_bcount = tmp_gcount = 0; + + switch (args->type) + { + case CLRLK_INODE: + ret = clrlk_clear_inodelk (this, pl_inode, dom, args, + &tmp_bcount, &tmp_gcount, + op_errno); + if (ret) + goto out; + break; + case CLRLK_ENTRY: + ret = clrlk_clear_entrylk (this, pl_inode, dom, args, + &tmp_bcount, &tmp_gcount, + op_errno); + if (ret) + goto out; + break; + } + + *blkd += tmp_bcount; + *granted += tmp_gcount; + } + + ret = 0; +out: + return ret; +} diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h new file mode 100644 index 000000000..511f3f74a --- /dev/null +++ b/xlators/features/locks/src/clear.h @@ -0,0 +1,76 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __CLEAR_H__ +#define __CLEAR_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "compat-errno.h" +#include "stack.h" +#include "call-stub.h" +#include "locks.h" + +typedef enum { + CLRLK_INODE, + CLRLK_ENTRY, + CLRLK_POSIX, + CLRLK_TYPE_MAX +} clrlk_type; + +typedef enum { + CLRLK_BLOCKED = 1, + CLRLK_GRANTED, + CLRLK_ALL, + CLRLK_KIND_MAX +} clrlk_kind; + +typedef enum { + KW_TYPE, + KW_KIND, + /*add new keywords here*/ + KW_MAX +} clrlk_opts; + +struct _clrlk_args; +typedef struct _clrlk_args clrlk_args; + +struct _clrlk_args { + int type; + int kind; + char *opts; +}; + +int +clrlk_get__kind (char *kind); +int +clrlk_get_type (char *type); +int +clrlk_get_lock_range (char *range_str, struct gf_flock *ulock, + gf_boolean_t *chk_range); +int +clrlk_parse_args (const char* cmd, clrlk_args *args); + +int +clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + int *blkd, int *granted, int *op_errno); +int +clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno); +int +clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno); +int +clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode, + clrlk_args *args, int *blkd, int *granted, + int *op_errno); +#endif /* __CLEAR_H__ */ diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 83800ff5a..b3309580d 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <unistd.h> #include <fcntl.h> #include <limits.h> @@ -42,23 +32,23 @@ static int __is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock); static void __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock); +static int +pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock); static pl_dom_list_t * -allocate_domain (const char *volume) +__allocate_domain (const char *volume) { pl_dom_list_t *dom = NULL; - dom = CALLOC (1, sizeof (*dom)); + dom = GF_CALLOC (1, sizeof (*dom), + gf_locks_mt_pl_dom_list_t); if (!dom) - return NULL; - + goto out; - dom->domain = strdup(volume); - if (!dom->domain) { - gf_log ("posix-locks", GF_LOG_TRACE, - "Out of Memory"); - return NULL; - } + dom->domain = gf_strdup(volume); + if (!dom->domain) + goto out; gf_log ("posix-locks", GF_LOG_TRACE, "New domain allocated: %s", dom->domain); @@ -69,6 +59,12 @@ allocate_domain (const char *volume) INIT_LIST_HEAD (&dom->inodelk_list); INIT_LIST_HEAD (&dom->blocked_inodelks); +out: + if (dom && (NULL == dom->domain)) { + GF_FREE (dom); + dom = NULL; + } + return dom; } @@ -80,19 +76,28 @@ get_domain (pl_inode_t *pl_inode, const char *volume) { pl_dom_list_t *dom = NULL; - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (strcmp (dom->domain, volume) == 0) - goto found; + GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out); + GF_VALIDATE_OR_GOTO ("posix-locks", volume, out); + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + if (strcmp (dom->domain, volume) == 0) + goto unlock; + } + dom = __allocate_domain (volume); + if (dom) + list_add (&dom->inode_list, &pl_inode->dom_list); } - - dom = allocate_domain(volume); - - if (dom) - list_add (&dom->inode_list, &pl_inode->dom_list); -found: - +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + if (dom) { + gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); + } else { + gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); + } +out: return dom; } @@ -102,6 +107,12 @@ fd_to_fdnum (fd_t *fd) return ((unsigned long) fd); } +fd_t * +fd_from_fdnum (posix_lock_t *lock) +{ + return ((fd_t *) lock->fd_num); +} + int __pl_inode_is_empty (pl_inode_t *pl_inode) { @@ -125,10 +136,10 @@ __pl_inode_is_empty (pl_inode_t *pl_inode) void pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame) { - snprintf (str, size, "Pid=%llu, lk-owner=%llu, Transport=%p, Frame=%llu", + snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", (unsigned long long) frame->root->pid, - (unsigned long long) frame->root->lk_owner, - (void *)frame->root->trans, + lkowner_utoa (&frame->root->lk_owner), + frame->root->client, (unsigned long long) frame->root->unique); } @@ -151,25 +162,24 @@ pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc) } if (loc && loc->path) { - ipath = strdup (loc->path); + ipath = gf_strdup (loc->path); } else { ret = inode_path (inode, NULL, &ipath); if (ret <= 0) ipath = NULL; } - snprintf (str, size, "ino=%llu, fd=%p, path=%s", - (unsigned long long) inode->ino, fd, + snprintf (str, size, "gfid=%s, fd=%p, path=%s", + uuid_utoa (inode->gfid), fd, ipath ? ipath : "<nul>"); - if (ipath) - FREE (ipath); + GF_FREE (ipath); } void pl_print_lock (char *str, int size, int cmd, - struct flock *flock, uint64_t owner) + struct gf_flock *flock, gf_lkowner_t *owner) { char *cmd_str = NULL; char *type_str = NULL; @@ -217,17 +227,17 @@ pl_print_lock (char *str, int size, int cmd, } snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, " - "start=%llu, len=%llu, pid=%llu, lk-owner=%llu", + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", cmd_str, type_str, (unsigned long long) flock->l_start, (unsigned long long) flock->l_len, (unsigned long long) flock->l_pid, - (unsigned long long) owner); + lkowner_utoa (owner)); } void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct flock *flock, const char *domain) + int cmd, struct gf_flock *flock, const char *domain) { posix_locks_private_t *priv = NULL; char pl_locker[256]; @@ -244,9 +254,9 @@ pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, if (domain) pl_print_inodelk (pl_lock, 256, cmd, flock, domain); else - pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner); + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, pl_lockee, pl_lock); } @@ -275,7 +285,7 @@ pl_print_verdict (char *str, int size, int op_ret, int op_errno) void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct flock *flock, int op_ret, int op_errno, const char *domain) + int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain) { posix_locks_private_t *priv = NULL; @@ -294,11 +304,11 @@ pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, if (domain) pl_print_inodelk (pl_lock, 256, cmd, flock, domain); else - pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner); + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); pl_print_verdict (verdict, 32, op_ret, op_errno); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker, pl_lockee, pl_lock); } @@ -306,7 +316,7 @@ pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct flock *flock, const char *domain) + int cmd, struct gf_flock *flock, const char *domain) { posix_locks_private_t *priv = NULL; @@ -324,9 +334,9 @@ pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, if (domain) pl_print_inodelk (pl_lock, 256, cmd, flock, domain); else - pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner); + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, pl_lockee, pl_lock); } @@ -353,7 +363,7 @@ pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd) pl_print_locker (pl_locker, 256, this, frame); pl_print_lockee (pl_lockee, 256, fd, NULL); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[FLUSH] Locker = {%s} Lockee = {%s}", pl_locker, pl_lockee); } @@ -371,7 +381,7 @@ pl_trace_release (xlator_t *this, fd_t *fd) pl_print_lockee (pl_lockee, 256, fd, NULL); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[RELEASE] Lockee = {%s}", pl_lockee); } @@ -382,6 +392,7 @@ pl_update_refkeeper (xlator_t *this, inode_t *inode) pl_inode_t *pl_inode = NULL; int is_empty = 0; int need_unref = 0; + int need_ref = 0; pl_inode = pl_inode_get (this, inode); @@ -395,13 +406,17 @@ pl_update_refkeeper (xlator_t *this, inode_t *inode) } if (!is_empty && !pl_inode->refkeeper) { - pl_inode->refkeeper = inode_ref (inode); + need_ref = 1; + pl_inode->refkeeper = inode; } } pthread_mutex_unlock (&pl_inode->mutex); if (need_unref) inode_unref (inode); + + if (need_ref) + inode_ref (inode); } @@ -409,72 +424,78 @@ pl_inode_t * pl_inode_get (xlator_t *this, inode_t *inode) { uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; -// mode_t st_mode = 0; - int ret = 0; + pl_inode_t *pl_inode = NULL; + int ret = 0; - ret = inode_ctx_get (inode, this,&tmp_pl_inode); - if (ret == 0) { - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - goto out; - } - pl_inode = CALLOC (1, sizeof (*pl_inode)); - if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto unlock; + } + pl_inode = GF_CALLOC (1, sizeof (*pl_inode), + gf_locks_mt_pl_inode_t); + if (!pl_inode) { + goto unlock; + } - gf_log (this->name, GF_LOG_TRACE, - "Allocating new pl inode"); + gf_log (this->name, GF_LOG_TRACE, + "Allocating new pl inode"); -/* - st_mode = inode->st_mode; - if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) - pl_inode->mandatory = 1; -*/ - - pthread_mutex_init (&pl_inode->mutex, NULL); + pthread_mutex_init (&pl_inode->mutex, NULL); - INIT_LIST_HEAD (&pl_inode->dom_list); - INIT_LIST_HEAD (&pl_inode->ext_list); - INIT_LIST_HEAD (&pl_inode->rw_list); + INIT_LIST_HEAD (&pl_inode->dom_list); + INIT_LIST_HEAD (&pl_inode->ext_list); + INIT_LIST_HEAD (&pl_inode->rw_list); + INIT_LIST_HEAD (&pl_inode->reservelk_list); + INIT_LIST_HEAD (&pl_inode->blocked_reservelks); + INIT_LIST_HEAD (&pl_inode->blocked_calls); - ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + } +unlock: + UNLOCK (&inode->lock); -out: - return pl_inode; + return pl_inode; } /* Create a new posix_lock_t */ posix_lock_t * -new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid, - uint64_t owner, fd_t *fd) +new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd) { - posix_lock_t *lock = NULL; + posix_lock_t *lock = NULL; - lock = CALLOC (1, sizeof (posix_lock_t)); - if (!lock) { - return NULL; - } + GF_VALIDATE_OR_GOTO ("posix-locks", flock, out); + GF_VALIDATE_OR_GOTO ("posix-locks", client, out); + GF_VALIDATE_OR_GOTO ("posix-locks", fd, out); - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; + lock = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!lock) { + goto out; + } - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; - lock->transport = transport; + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->client = client; lock->fd_num = fd_to_fdnum (fd); - lock->client_pid = client_pid; - lock->owner = owner; + lock->fd = fd; + lock->client_pid = client_pid; + lock->owner = *owner; - INIT_LIST_HEAD (&lock->list); + INIT_LIST_HEAD (&lock->list); - return lock; +out: + return lock; } @@ -482,7 +503,7 @@ new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid, void __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { - list_del_init (&lock->list); + list_del_init (&lock->list); } @@ -490,32 +511,37 @@ __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) void __destroy_lock (posix_lock_t *lock) { - free (lock); + GF_FREE (lock); } -/* Convert a posix_lock to a struct flock */ +/* Convert a posix_lock to a struct gf_flock */ void -posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) +posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock) { - flock->l_pid = lock->client_pid; - flock->l_type = lock->fl_type; - flock->l_start = lock->fl_start; + flock->l_pid = lock->client_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + flock->l_owner = lock->owner; - if (lock->fl_end == LLONG_MAX) - flock->l_len = 0; - else - flock->l_len = lock->fl_end - lock->fl_start + 1; + if (lock->fl_end == LLONG_MAX) + flock->l_len = 0; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; } - /* Insert the lock into the inode's lock list */ static void __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { - list_add_tail (&lock->list, &pl_inode->ext_list); + if (lock->blocked) + gettimeofday (&lock->blkd_time, NULL); + else + gettimeofday (&lock->granted_time, NULL); + + list_add_tail (&lock->list, &pl_inode->ext_list); - return; + return; } @@ -523,14 +549,14 @@ __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) int locks_overlap (posix_lock_t *l1, posix_lock_t *l2) { - /* - Note: - FUSE always gives us absolute offsets, so no need to worry - about SEEK_CUR or SEEK_END - */ - - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); } @@ -539,8 +565,8 @@ int same_owner (posix_lock_t *l1, posix_lock_t *l2) { - return ((l1->owner == l2->owner) && - (l1->transport == l2->transport)); + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->client == l2->client)); } @@ -549,15 +575,15 @@ same_owner (posix_lock_t *l1, posix_lock_t *l2) void __delete_unlck_locks (pl_inode_t *pl_inode) { - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->fl_type == F_UNLCK) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if (l->fl_type == F_UNLCK) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } } @@ -565,86 +591,160 @@ __delete_unlck_locks (pl_inode_t *pl_inode) static posix_lock_t * add_locks (posix_lock_t *l1, posix_lock_t *l2) { - posix_lock_t *sum = NULL; + posix_lock_t *sum = NULL; - sum = CALLOC (1, sizeof (posix_lock_t)); - if (!sum) - return NULL; + sum = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!sum) + return NULL; - sum->fl_start = min (l1->fl_start, l2->fl_start); - sum->fl_end = max (l1->fl_end, l2->fl_end); + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); - return sum; + return sum; } /* Subtract two locks */ struct _values { - posix_lock_t *locks[3]; + posix_lock_t *locks[3]; }; /* {big} must always be contained inside {small} */ static struct _values subtract_locks (posix_lock_t *big, posix_lock_t *small) { - struct _values v = { .locks = {0, 0, 0} }; - - if ((big->fl_start == small->fl_start) && - (big->fl_end == small->fl_end)) { - /* both edges coincide with big */ - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_type = small->fl_type; - } - else if ((small->fl_start > big->fl_start) && - (small->fl_end < big->fl_end)) { - /* both edges lie inside big */ - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - v.locks[2] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[2]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - memcpy (v.locks[2], big, sizeof (posix_lock_t)); - v.locks[2]->fl_start = small->fl_end + 1; - } - /* one edge coincides with big */ - else if (small->fl_start == big->fl_start) { - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_start = small->fl_end + 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - } - else if (small->fl_end == big->fl_end) { - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - } - else { - gf_log ("posix-locks", GF_LOG_ERROR, - "Unexpected case in subtract_locks. Please send " - "a bug report to gluster-devel@nongnu.org"); + + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + goto done; + } + + if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + + memcpy (v.locks[2], big, sizeof (posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + goto done; + } + /* one edge coincides with big */ + if (small->fl_start == big->fl_start) { + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + goto done; + } + + if (small->fl_end == big->fl_end) { + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + goto done; + } + + GF_ASSERT (0); + gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); + +out: + if (v.locks[0]) { + GF_FREE (v.locks[0]); + v.locks[0] = NULL; + } + if (v.locks[1]) { + GF_FREE (v.locks[1]); + v.locks[1] = NULL; + } + if (v.locks[2]) { + GF_FREE (v.locks[2]); + v.locks[2] = NULL; + } + +done: return v; } +static posix_lock_t * +first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + posix_lock_t *conf = NULL; + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->blocked) + continue; + + if (locks_overlap (l, lock)) { + if (same_owner (l, lock)) + continue; + + if ((l->fl_type == F_WRLCK) || + (lock->fl_type == F_WRLCK)) { + conf = l; + goto unlock; + } + } + } + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + return conf; +} + /* Start searching from {begin}, and return the first lock that conflicts, NULL if no conflict @@ -703,6 +803,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) struct _values v = { .locks = {0, 0, 0} }; list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) { + if (conf->blocked) + continue; if (!locks_overlap (conf, lock)) continue; @@ -711,7 +813,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) sum = add_locks (lock, conf); sum->fl_type = lock->fl_type; - sum->transport = lock->transport; + sum->client = lock->client; sum->fd_num = lock->fd_num; sum->client_pid = lock->client_pid; sum->owner = lock->owner; @@ -720,6 +822,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) __destroy_lock (conf); __destroy_lock (lock); + INIT_LIST_HEAD (&sum->list); + posix_lock_to_flock (sum, &sum->user_flock); __insert_and_merge (pl_inode, sum); return; @@ -727,7 +831,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) sum = add_locks (lock, conf); sum->fl_type = conf->fl_type; - sum->transport = conf->transport; + sum->client = conf->client; sum->fd_num = conf->fd_num; sum->client_pid = conf->client_pid; sum->owner = conf->owner; @@ -747,6 +851,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) continue; INIT_LIST_HEAD (&v.locks[i]->list); + posix_lock_to_flock (v.locks[i], + &v.locks[i]->user_flock); __insert_and_merge (pl_inode, v.locks[i]); } @@ -800,7 +906,8 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *g list_del_init (&l->list); if (__is_lock_grantable (pl_inode, l)) { - conf = CALLOC (1, sizeof (*conf)); + conf = GF_CALLOC (1, sizeof (*conf), + gf_locks_mt_posix_lock_t); if (!conf) { l->blocked = 1; @@ -814,10 +921,9 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *g posix_lock_to_flock (l, &conf->user_flock); gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Granted", + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted", l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, - l->owner, + l->client_pid, lkowner_utoa (&l->owner), l->user_flock.l_start, l->user_flock.l_len); @@ -853,14 +959,61 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock); + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, + &lock->user_flock, NULL); - FREE (lock); + GF_FREE (lock); } return; } +static int +pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock) +{ + struct gf_flock flock = {0,}; + posix_lock_t *unlock_lock = NULL; + + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + + int ret = -1; + + INIT_LIST_HEAD (&granted_list); + + flock.l_type = F_UNLCK; + flock.l_whence = old_lock->user_flock.l_whence; + flock.l_start = old_lock->user_flock.l_start; + flock.l_len = old_lock->user_flock.l_len; + + + unlock_lock = new_posix_lock (&flock, old_lock->client, + old_lock->client_pid, &old_lock->owner, + old_lock->fd); + GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out); + ret = 0; + + __insert_and_merge (pl_inode, unlock_lock); + + __grant_blocked_locks (this, pl_inode, &granted_list); + + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); + + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, NULL); + + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + + GF_FREE (lock); + } + +out: + return ret; +} int pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, @@ -872,21 +1025,38 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, pthread_mutex_lock (&pl_inode->mutex); { + /* Send unlock before the actual lock to + prevent lock upgrade / downgrade + problems only if: + - it is a blocking call + - it has other conflicting locks + */ + + if (can_block && + !(__is_lock_grantable (pl_inode, lock))) { + ret = pl_send_prelock_unlock (this, pl_inode, + lock); + if (ret) + gf_log (this->name, GF_LOG_DEBUG, + "Could not send pre-lock " + "unlock"); + } + if (__is_lock_grantable (pl_inode, lock)) { gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => OK", + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lock->owner, + lkowner_utoa (&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); __insert_and_merge (pl_inode, lock); } else if (can_block) { gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked", + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lock->owner, + lkowner_utoa (&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); lock->blocked = 1; @@ -894,10 +1064,10 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, ret = -1; } else { gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => NOK", + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lock->owner, + lkowner_utoa (&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); errno = EAGAIN; @@ -919,7 +1089,7 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) { posix_lock_t *conf = NULL; - conf = first_overlap (pl_inode, lock); + conf = first_conflicting_overlap (pl_inode, lock); if (conf == NULL) { lock->fl_type = F_UNLCK; @@ -928,3 +1098,124 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) return conf; } + + +struct _lock_table * +pl_lock_table_new (void) +{ + struct _lock_table *new = NULL; + + new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table); + if (new == NULL) { + goto out; + } + INIT_LIST_HEAD (&new->entrylk_lockers); + INIT_LIST_HEAD (&new->inodelk_lockers); + LOCK_INIT (&new->lock); +out: + return new; +} + + +int +pl_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner, + glusterfs_fop_t type) +{ + int32_t ret = -1; + struct _locker *new = NULL; + + GF_VALIDATE_OR_GOTO ("lock-table", table, out); + GF_VALIDATE_OR_GOTO ("lock-table", volume, out); + + new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker); + if (new == NULL) { + goto out; + } + INIT_LIST_HEAD (&new->lockers); + + new->volume = gf_strdup (volume); + + if (fd == NULL) { + loc_copy (&new->loc, loc); + } else { + new->fd = fd_ref (fd); + } + + new->pid = pid; + new->owner = *owner; + + LOCK (&table->lock); + { + if (type == GF_FOP_ENTRYLK) + list_add_tail (&new->lockers, &table->entrylk_lockers); + else + list_add_tail (&new->lockers, &table->inodelk_lockers); + } + UNLOCK (&table->lock); +out: + return ret; +} + +int +pl_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type) +{ + struct _locker *locker = NULL; + struct _locker *tmp = NULL; + int32_t ret = -1; + struct list_head *head = NULL; + struct list_head del; + + GF_VALIDATE_OR_GOTO ("lock-table", table, out); + GF_VALIDATE_OR_GOTO ("lock-table", volume, out); + + INIT_LIST_HEAD (&del); + + LOCK (&table->lock); + { + if (type == GF_FOP_ENTRYLK) { + head = &table->entrylk_lockers; + } else { + head = &table->inodelk_lockers; + } + + list_for_each_entry_safe (locker, tmp, head, lockers) { + if (!is_same_lkowner (&locker->owner, owner) || + strcmp (locker->volume, volume)) + continue; + + /* + * It is possible for inodelk lock to come on anon-fd + * and inodelk unlock to come on normal fd in case of + * client re-opens. So don't check for fds to be equal. + */ + if (locker->fd && fd) + list_move_tail (&locker->lockers, &del); + else if (locker->loc.inode && loc && + (locker->loc.inode == loc->inode)) + list_move_tail (&locker->lockers, &del); + } + } + UNLOCK (&table->lock); + + tmp = NULL; + locker = NULL; + + list_for_each_entry_safe (locker, tmp, &del, lockers) { + list_del_init (&locker->lockers); + if (locker->fd) + fd_unref (locker->fd); + else + loc_wipe (&locker->loc); + + GF_FREE (locker->volume); + GF_FREE (locker); + } + + ret = 0; +out: + return ret; + +} + diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 133a4f722..db19ec978 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -1,28 +1,55 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef __COMMON_H__ #define __COMMON_H__ +#include "lkowner.h" +/*dump locks format strings */ +#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu" +#define ENTRY_FMT "type=%s on basename=%s" +#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p" +#define GRNTD_AT "granted at %s" +#define BLKD_AT "blocked at %s" +#define CONN_ID "connection-id=%s" +#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT +#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT +#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT + +#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT +#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT +#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT + +#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT +#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT +#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT + +#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid) + +struct _locker { + struct list_head lockers; + char *volume; + loc_t loc; + fd_t *fd; + gf_lkowner_t owner; + pid_t pid; +}; + +struct _lock_table { + struct list_head inodelk_lockers; + struct list_head entrylk_lockers; + gf_lock_t lock; +}; + posix_lock_t * -new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid, - uint64_t owner, fd_t *fd); +new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd); pl_inode_t * pl_inode_get (xlator_t *this, inode_t *inode); @@ -38,7 +65,7 @@ void grant_blocked_locks (xlator_t *this, pl_inode_t *inode); void -posix_lock_to_flock (posix_lock_t *lock, struct flock *flock); +posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock); int locks_overlap (posix_lock_t *l1, posix_lock_t *l2); @@ -54,13 +81,14 @@ pl_dom_list_t * get_domain (pl_inode_t *pl_inode, const char *volume); void -grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom); +grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom); void __delete_inode_lock (pl_inode_lock_t *lock); void -__destroy_inode_lock (pl_inode_lock_t *lock); +__pl_inodelk_unref (pl_inode_lock_t *lock); void grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, @@ -69,19 +97,23 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, void pl_update_refkeeper (xlator_t *this, inode_t *inode); int32_t -get_inodelk_count (xlator_t *this, inode_t *inode); +__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname); +int32_t +get_inodelk_count (xlator_t *this, inode_t *inode, char *domname); int32_t +__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode); +int32_t get_entrylk_count (xlator_t *this, inode_t *inode); void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct flock *flock, const char *domain); + int cmd, struct gf_flock *flock, const char *domain); void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct flock *flock, int op_ret, int op_errno, const char *domain); + int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain); void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct flock *flock, const char *domain); + int cmd, struct gf_flock *flock, const char *domain); void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd); @@ -108,7 +140,7 @@ void pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame); void -pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char *domain); +pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain); void pl_trace_release (xlator_t *this, fd_t *fd); @@ -116,4 +148,40 @@ pl_trace_release (xlator_t *this, fd_t *fd); unsigned long fd_to_fdnum (fd_t *fd); +fd_t * +fd_from_fdnum (posix_lock_t *lock); + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block); +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2); + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock, int can_block); +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock); + +uint32_t +check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename); + +int32_t +pl_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, + fd_t *fd, + pid_t pid, + gf_lkowner_t *owner, + glusterfs_fop_t type); + +int32_t +pl_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, + fd_t *fd, + gf_lkowner_t *owner, + glusterfs_fop_t type); + +struct _lock_table * +pl_lock_table_new (void); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index b83044b7f..0785dc547 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -35,28 +25,30 @@ static pl_entry_lock_t * new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, - transport_t *trans, pid_t client_pid, uint64_t owner, const char *volume) + client_t *client, pid_t client_pid, gf_lkowner_t *owner, + const char *volume) { - pl_entry_lock_t *newlock = NULL; + pl_entry_lock_t *newlock = NULL; - newlock = CALLOC (1, sizeof (pl_entry_lock_t)); - if (!newlock) { - goto out; - } + newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t), + gf_locks_mt_pl_entry_lock_t); + if (!newlock) { + goto out; + } - newlock->basename = basename ? strdup (basename) : NULL; - newlock->type = type; - newlock->trans = trans; - newlock->volume = volume; - newlock->client_pid = client_pid; - newlock->owner = owner; + newlock->basename = basename ? gf_strdup (basename) : NULL; + newlock->type = type; + newlock->trans = client; + newlock->volume = volume; + newlock->client_pid = client_pid; + newlock->owner = *owner; - INIT_LIST_HEAD (&newlock->domain_list); - INIT_LIST_HEAD (&newlock->blocked_locks); + INIT_LIST_HEAD (&newlock->domain_list); + INIT_LIST_HEAD (&newlock->blocked_locks); out: - return newlock; + return newlock; } @@ -76,16 +68,16 @@ out: static int names_conflict (const char *n1, const char *n2) { - return all_names (n1) || all_names (n2) || !strcmp (n1, n2); + return all_names (n1) || all_names (n2) || !strcmp (n1, n2); } -static int +static inline int __same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) { - return ((l1->owner == l2->owner) && - (l1->trans == l2->trans)); + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->trans == l2->trans)); } @@ -98,33 +90,33 @@ __same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) static pl_entry_lock_t * __lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *lock = NULL; - if (list_empty (&dom->entrylk_list)) - return NULL; + if (list_empty (&dom->entrylk_list)) + return NULL; - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (names_conflict (lock->basename, basename)) - return lock; - } + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (names_conflict (lock->basename, basename)) + return lock; + } - return NULL; + return NULL; } static pl_entry_lock_t * __blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *lock = NULL; - if (list_empty (&dom->blocked_entrylks)) - return NULL; + if (list_empty (&dom->blocked_entrylks)) + return NULL; - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (names_conflict (lock->basename, basename)) - return lock; - } + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (names_conflict (lock->basename, basename)) + return lock; + } - return NULL; + return NULL; } static int @@ -132,23 +124,23 @@ __owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock) { pl_entry_lock_t *lock = NULL; - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (__same_entrylk_owner (lock, newlock)) - return 1; - } + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (__same_entrylk_owner (lock, newlock)) + return 1; + } - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (__same_entrylk_owner (lock, newlock)) - return 1; - } + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (__same_entrylk_owner (lock, newlock)) + return 1; + } - return 0; + return 0; } static int names_equal (const char *n1, const char *n2) { - return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); } void @@ -212,7 +204,7 @@ entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain, pl_print_lockee (pl_lockee, 256, fd, loc); pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, pl_lockee, pl_entrylk); } @@ -239,7 +231,7 @@ entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain, pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); pl_print_verdict (verdict, 32, op_ret, op_errno); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker, pl_lockee, pl_entrylk); } @@ -265,7 +257,7 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, pl_print_lockee (pl_lockee, 256, fd, loc); pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, pl_lockee, pl_entrylk); } @@ -283,21 +275,21 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, static pl_entry_lock_t * __find_most_matching_lock (pl_dom_list_t *dom, const char *basename) { - pl_entry_lock_t *lock; - pl_entry_lock_t *all = NULL; - pl_entry_lock_t *exact = NULL; - - if (list_empty (&dom->entrylk_list)) - return NULL; - - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (all_names (lock->basename)) - all = lock; - else if (names_equal (lock->basename, basename)) - exact = lock; - } + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; + + if (list_empty (&dom->entrylk_list)) + return NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (all_names (lock->basename)) + all = lock; + else if (names_equal (lock->basename, basename)) + exact = lock; + } - return (exact ? exact : all); + return (exact ? exact : all); } /** @@ -313,66 +305,80 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename) int __lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, - call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock) + call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, + int nonblock, char *conn_id) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *conf = NULL; - transport_t *trans = NULL; - pid_t client_pid = 0; - uint64_t owner = 0; - - int ret = -EINVAL; + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *conf = NULL; + int ret = -EINVAL; + + lock = new_entrylk_lock (pinode, basename, type, + frame->root->client, frame->root->pid, + &frame->root->lk_owner, dom->domain); + if (!lock) { + ret = -ENOMEM; + goto out; + } - trans = frame->root->trans; - client_pid = frame->root->pid; - owner = (uint64_t)(long)frame->root; + lock->frame = frame; + lock->this = this; + lock->trans = frame->root->client; - lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, owner, dom->domain); - if (!lock) { - ret = -ENOMEM; - goto out; - } + if (conn_id) { + lock->connection_id = gf_strdup (conn_id); + } - conf = __lock_grantable (dom, basename, type); - if (conf) { - ret = -EAGAIN; - if (nonblock) - goto out; + conf = __lock_grantable (dom, basename, type); + if (conf) { + ret = -EAGAIN; + if (nonblock){ + GF_FREE (lock->connection_id); + GF_FREE ((char *)lock->basename); + GF_FREE (lock); + goto out; - lock->frame = frame; - lock->this = this; + } - list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + gettimeofday (&lock->blkd_time, NULL); + list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); + gf_log (this->name, GF_LOG_TRACE, + "Blocking lock: {pinode=%p, basename=%s}", + pinode, basename); - goto out; - } + goto out; + } if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) { ret = -EAGAIN; - if (nonblock) + if (nonblock) { + GF_FREE (lock->connection_id); + GF_FREE ((char *) lock->basename); + GF_FREE (lock); goto out; + + } lock->frame = frame; lock->this = this; + gettimeofday (&lock->blkd_time, NULL); list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); gf_log (this->name, GF_LOG_TRACE, "Lock is grantable, but blocking to prevent starvation"); - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); + gf_log (this->name, GF_LOG_TRACE, + "Blocking lock: {pinode=%p, basename=%s}", + pinode, basename); - goto out; + ret = -EAGAIN; + goto out; } switch (type) { - case ENTRYLK_WRLCK: - list_add (&lock->domain_list, &dom->entrylk_list); - break; + case ENTRYLK_WRLCK: + gettimeofday (&lock->granted_time, NULL); + list_add_tail (&lock->domain_list, &dom->entrylk_list); + break; default: @@ -380,11 +386,11 @@ __lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, "Invalid type for entrylk specified: %d", type); ret = -EINVAL; goto out; - } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } /** @@ -397,312 +403,366 @@ out: pl_entry_lock_t * __unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *ret_lock = NULL; - - lock = __find_most_matching_lock (dom, basename); - - if (!lock) { - gf_log ("locks", GF_LOG_DEBUG, - "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", - basename); - goto out; - } - - if (names_equal (lock->basename, basename) - && lock->type == type) { - - if (type == ENTRYLK_WRLCK) { - list_del (&lock->domain_list); - ret_lock = lock; - } - } else { - gf_log ("locks", GF_LOG_DEBUG, - "Unlock for a non-existing lock!"); - goto out; - } + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *ret_lock = NULL; + + lock = __find_most_matching_lock (dom, basename); + + if (!lock) { + gf_log ("locks", GF_LOG_DEBUG, + "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", + basename); + goto out; + } + + if (names_equal (lock->basename, basename) + && lock->type == type) { + + if (type == ENTRYLK_WRLCK) { + list_del_init (&lock->domain_list); + ret_lock = lock; + } + } else { + gf_log ("locks", GF_LOG_DEBUG, + "Unlock for a non-existing lock!"); + goto out; + } out: - return ret_lock; + return ret_lock; } +uint32_t +check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename) +{ + uint32_t entrylk = 0; + pl_inode_t *pinode = 0; + pl_dom_list_t *dom = NULL; + pl_entry_lock_t *conf = NULL; + + pinode = pl_inode_get (this, parent); + if (!pinode) + goto out; + pthread_mutex_lock (&pinode->mutex); + { + list_for_each_entry (dom, &pinode->dom_list, inode_list) { + conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK); + if (conf && conf->basename) { + entrylk = 1; + break; + } + } + } + pthread_mutex_unlock (&pinode->mutex); + +out: + return entrylk; +} void __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct list_head *granted) + pl_dom_list_t *dom, struct list_head *granted) { - int bl_ret = 0; - pl_entry_lock_t *bl = NULL; - pl_entry_lock_t *tmp = NULL; + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; struct list_head blocked_list; INIT_LIST_HEAD (&blocked_list); list_splice_init (&dom->blocked_entrylks, &blocked_list); - - list_for_each_entry_safe (bl, tmp, &blocked_list, - blocked_locks) { - list_del_init (&bl->blocked_locks); + list_for_each_entry_safe (bl, tmp, &blocked_list, + blocked_locks) { + + list_del_init (&bl->blocked_locks); - gf_log ("locks", GF_LOG_TRACE, - "Trying to unblock: {pinode=%p, basename=%s}", - pl_inode, bl->basename); + gf_log ("locks", GF_LOG_TRACE, + "Trying to unblock: {pinode=%p, basename=%s}", + pl_inode, bl->basename); - bl_ret = __lock_name (pl_inode, bl->basename, bl->type, - bl->frame, dom, bl->this, 0); + bl_ret = __lock_name (pl_inode, bl->basename, bl->type, + bl->frame, dom, bl->this, 0, + bl->connection_id); - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); - } else { - if (bl->basename) - FREE (bl->basename); - FREE (bl); - } - } - return; + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "should never happen"); + GF_FREE (bl->connection_id); + GF_FREE ((char *)bl->basename); + GF_FREE (bl); + } + } + return; } /* Grants locks if possible which are blocked on a lock */ void grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_entry_lock_t *unlocked, pl_dom_list_t *dom) + pl_entry_lock_t *unlocked, pl_dom_list_t *dom) { - struct list_head granted_list; - pl_entry_lock_t *tmp = NULL; - pl_entry_lock_t *lock = NULL; + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; - INIT_LIST_HEAD (&granted_list); + INIT_LIST_HEAD (&granted_list); - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list); - } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_entry_locks (this, pl_inode, dom, + &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); - list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { - list_del_init (&lock->blocked_locks); + list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { + list_del_init (&lock->blocked_locks); entrylk_trace_out (this, lock->frame, NULL, NULL, NULL, lock->basename, ENTRYLK_LOCK, lock->type, 0, 0); - STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0); + STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); - FREE (lock->basename); - FREE (lock); - } + GF_FREE (lock->connection_id); + GF_FREE ((char *)lock->basename); + GF_FREE (lock); + } - FREE (unlocked->basename); - FREE (unlocked); + GF_FREE ((char *)unlocked->basename); + GF_FREE (unlocked->connection_id); + GF_FREE (unlocked); - return; + return; } /** - * release_entry_locks_for_transport: release all entry locks from this - * transport for this loc_t + * release_entry_locks_for_client: release all entry locks from this + * client for this loc_t */ static int -release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, - pl_dom_list_t *dom, transport_t *trans) +release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode, + pl_dom_list_t *dom, client_t *client) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *tmp = NULL; - struct list_head granted; + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *tmp = NULL; + struct list_head granted; struct list_head released; - INIT_LIST_HEAD (&granted); + INIT_LIST_HEAD (&granted); INIT_LIST_HEAD (&released); - pthread_mutex_lock (&pinode->mutex); - { + pthread_mutex_lock (&pinode->mutex); + { list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks, blocked_locks) { - if (lock->trans != trans) + if (lock->trans != client) continue; list_del_init (&lock->blocked_locks); - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_TRACE, "releasing lock on held by " - "{transport=%p}",trans); + "{client=%p}", client); list_add (&lock->blocked_locks, &released); } - list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, - domain_list) { - if (lock->trans != trans) - continue; + list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, + domain_list) { + if (lock->trans != client) + continue; - list_del_init (&lock->domain_list); + list_del_init (&lock->domain_list); - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_TRACE, "releasing lock on held by " - "{transport=%p}",trans); + "{client=%p}", client); - FREE (lock->basename); - FREE (lock); - } + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); + } - __grant_blocked_entry_locks (this, pinode, dom, &granted); + __grant_blocked_entry_locks (this, pinode, dom, &granted); - } + } - pthread_mutex_unlock (&pinode->mutex); + pthread_mutex_unlock (&pinode->mutex); list_for_each_entry_safe (lock, tmp, &released, blocked_locks) { list_del_init (&lock->blocked_locks); - STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN); + STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL); - if (lock->basename) - FREE (lock->basename); - FREE (lock); + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); } - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - list_del_init (&lock->blocked_locks); + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); - STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0); + STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); - if (lock->basename) - FREE (lock->basename); - FREE (lock); - } + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); + } - return 0; + return 0; } /* Common entrylk code called by pl_entrylk and pl_fentrylk */ int pl_common_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, inode_t *inode, const char *basename, - entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd) + entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd, + dict_t *xdata) + { - int32_t op_ret = -1; - int32_t op_errno = 0; - - transport_t * transport = NULL; - pid_t pid = -1; - uint64_t owner = -1; - - pl_inode_t * pinode = NULL; - int ret = -1; - pl_entry_lock_t *unlocked = NULL; - char unwind = 1; - - pl_dom_list_t *dom = NULL; - - pinode = pl_inode_get (this, inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - goto out; - } - - dom = get_domain (pinode, volume); - if (!dom){ - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto out; - } + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + char unwind = 1; + GF_UNUSED int dict_ret = -1; + pl_inode_t *pinode = NULL; + pl_entry_lock_t *unlocked = NULL; + pl_dom_list_t *dom = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + + if (xdata) + dict_ret = dict_get_str (xdata, "connection-id", &conn_id); + + pinode = pl_inode_get (this, inode); + if (!pinode) { + op_errno = ENOMEM; + goto out; + } - entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type); + dom = get_domain (pinode, volume); + if (!dom){ + op_errno = ENOMEM; + goto out; + } - pid = frame->root->pid; - owner = (uint64_t)(long) frame->root; - transport = frame->root->trans; + entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type); - if (pid == 0) { - /* + if (frame->root->lk_owner.len == 0) { + /* this is a special case that means release - all locks from this transport - */ - - gf_log (this->name, GF_LOG_TRACE, - "Releasing locks for transport %p", transport); - - release_entry_locks_for_transport (this, pinode, dom, transport); - op_ret = 0; - - goto out; - } - - switch (cmd) { - case ENTRYLK_LOCK: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, dom, this, 0); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - if (ret == -EAGAIN) - unwind = 0; - op_errno = -ret; - goto out; - } - - break; - - case ENTRYLK_LOCK_NB: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, dom, this, 1); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - op_errno = -ret; - goto out; - } - - break; - - case ENTRYLK_UNLOCK: - pthread_mutex_lock (&pinode->mutex); - { + all locks from this client + */ + + gf_log (this->name, GF_LOG_TRACE, + "Releasing locks for client %p", frame->root->client); + + release_entry_locks_for_client (this, pinode, dom, + frame->root->client); + op_ret = 0; + + goto out; + } + + switch (cmd) { + case ENTRYLK_LOCK: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, dom, this, 0, conn_id); + } + pthread_mutex_unlock (&pinode->mutex); + + op_errno = -ret; + if (ret < 0) { + if (ret == -EAGAIN) + unwind = 0; + else + unwind = 1; + goto out; + } else { + op_ret = 0; + op_errno = 0; + unwind = 1; + goto out; + } + + break; + + case ENTRYLK_LOCK_NB: + unwind = 1; + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, dom, this, 1, conn_id); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_UNLOCK: + pthread_mutex_lock (&pinode->mutex); + { unlocked = __unlock_name (dom, basename, type); - } - pthread_mutex_unlock (&pinode->mutex); + } + pthread_mutex_unlock (&pinode->mutex); - if (unlocked) - grant_blocked_entry_locks (this, pinode, unlocked, dom); + if (unlocked) + grant_blocked_entry_locks (this, pinode, unlocked, dom); - break; + break; - default: - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in entrylk (cmd=%d). Please file" + default: + gf_log (this->name, GF_LOG_ERROR, + "Unexpected case in entrylk (cmd=%d). Please file" "a bug report at http://bugs.gluster.com", cmd); - goto out; - } + goto out; + } - op_ret = 0; + op_ret = 0; out: pl_update_refkeeper (this, inode); - if (unwind) { + if (unwind) { entrylk_trace_out (this, frame, volume, fd, loc, basename, cmd, type, op_ret, op_errno); - STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno); - } else { + ctx = pl_ctx_get (frame->root->client, this); + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + + if (cmd == ENTRYLK_UNLOCK) + pl_del_locker (ctx->ltable, volume, loc, fd, + &frame->root->lk_owner, + GF_FOP_ENTRYLK); + else + pl_add_locker (ctx->ltable, volume, loc, fd, + frame->root->pid, + &frame->root->lk_owner, + GF_FOP_ENTRYLK); + +unwind: + STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL); + } else { entrylk_trace_block (this, frame, volume, fd, loc, basename, cmd, type); } - return 0; + return 0; } /** @@ -713,11 +773,11 @@ out: int pl_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - - pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type, loc, NULL); + pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, + type, loc, NULL, xdata); return 0; } @@ -732,16 +792,16 @@ pl_entrylk (call_frame_t *frame, xlator_t *this, int pl_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - - pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type, NULL, fd); + pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, + type, NULL, fd, xdata); return 0; } -static int32_t +int32_t __get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode) { int32_t count = 0; @@ -750,24 +810,10 @@ __get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode) list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - " domain: %s %s on %s state = Active", - dom->domain, - lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK", lock->basename); count++; } list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - " domain: %s %s on %s state = Blocked", - dom->domain, - lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK", lock->basename); count++; } diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 50a5996d7..508523e11 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -33,30 +23,40 @@ #include "locks.h" #include "common.h" -void +inline void __delete_inode_lock (pl_inode_lock_t *lock) { - list_del (&lock->list); + list_del (&lock->list); } -void -__destroy_inode_lock (pl_inode_lock_t *lock) +static inline void +__pl_inodelk_ref (pl_inode_lock_t *lock) +{ + lock->ref++; +} + +inline void +__pl_inodelk_unref (pl_inode_lock_t *lock) { - FREE (lock); + lock->ref--; + if (!lock->ref) { + GF_FREE (lock->connection_id); + GF_FREE (lock); + } } /* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */ -static int +static inline int inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) - return 1; + if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) + return 1; - return 0; + return 0; } void -pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char *domain) +pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain) { char *cmd_str = NULL; char *type_str = NULL; @@ -115,46 +115,43 @@ pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char static int inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); } /* Returns true if the 2 inodelks have the same owner */ -static int same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +static inline int +same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return ((l1->owner == l2->owner) && - (l1->transport == l2->transport)); + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->client == l2->client)); } /* Returns true if the 2 inodelks conflict with each other */ static int inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if (same_inodelk_owner (l1, l2)) - return 0; - - if (!inodelk_overlap (l1, l2)) - return 0; - - return (inodelk_type_conflict(l1, l2)); + return (inodelk_overlap (l1, l2) && + inodelk_type_conflict (l1, l2)); } /* Determine if lock is grantable or not */ static pl_inode_lock_t * __inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock) { - pl_inode_lock_t *l = NULL; - pl_inode_lock_t *ret = NULL; - if (list_empty (&dom->inodelk_list)) - goto out; - list_for_each_entry (l, &dom->inodelk_list, list){ - if (inodelk_conflict (lock, l)) { - ret = l; - goto out; - } - } + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + if (list_empty (&dom->inodelk_list)) + goto out; + list_for_each_entry (l, &dom->inodelk_list, list){ + if (inodelk_conflict (lock, l) && + !same_inodelk_owner (lock, l)) { + ret = l; + goto out; + } + } out: - return ret; + return ret; } static pl_inode_lock_t * @@ -163,18 +160,18 @@ __blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock) pl_inode_lock_t *l = NULL; pl_inode_lock_t *ret = NULL; - if (list_empty (&dom->blocked_entrylks)) - return NULL; + if (list_empty (&dom->blocked_inodelks)) + return NULL; - list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) { - if (inodelk_conflict (lock, l)) { - ret = l; - goto out; + list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) { + if (inodelk_conflict (lock, l)) { + ret = l; + goto out; } - } + } out: - return ret; + return ret; } static int @@ -182,17 +179,17 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock) { pl_inode_lock_t *lock = NULL; - list_for_each_entry (lock, &dom->entrylk_list, list) { - if (same_inodelk_owner (lock, newlock)) - return 1; - } + list_for_each_entry (lock, &dom->inodelk_list, list) { + if (same_inodelk_owner (lock, newlock)) + return 1; + } - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (same_inodelk_owner (lock, newlock)) - return 1; - } + list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + if (same_inodelk_owner (lock, newlock)) + return 1; + } - return 0; + return 0; } @@ -201,80 +198,85 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock) */ static int __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, - int can_block, pl_dom_list_t *dom) + int can_block, pl_dom_list_t *dom) { - pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; + pl_inode_lock_t *conf = NULL; + int ret = -EINVAL; - conf = __inodelk_grantable (dom, lock); - if (conf){ - ret = -EAGAIN; - if (can_block == 0) - goto out; + conf = __inodelk_grantable (dom, lock); + if (conf){ + ret = -EAGAIN; + if (can_block == 0) + goto out; - list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + gettimeofday (&lock->blkd_time, NULL); + list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked", + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lock->owner, + lkowner_utoa (&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); - goto out; - } + goto out; + } if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) { ret = -EAGAIN; if (can_block == 0) goto out; + gettimeofday (&lock->blkd_time, NULL); list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); gf_log (this->name, GF_LOG_TRACE, "Lock is grantable, but blocking to prevent starvation"); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Blocked", + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lock->owner, + lkowner_utoa (&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); - goto out; + goto out; } - list_add (&lock->list, &dom->inodelk_list); + __pl_inodelk_ref (lock); + gettimeofday (&lock->granted_time, NULL); + list_add (&lock->list, &dom->inodelk_list); - ret = 0; + ret = 0; out: - return ret; + return ret; } /* Return true if the two inodelks have exactly same lock boundaries */ static int inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if ((l1->fl_start == l2->fl_start) && - (l1->fl_end == l2->fl_end)) - return 1; + if ((l1->fl_start == l2->fl_start) && + (l1->fl_end == l2->fl_end)) + return 1; - return 0; + return 0; } static pl_inode_lock_t * find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom) { - pl_inode_lock_t *l = NULL; - list_for_each_entry (l, &dom->inodelk_list, list) { - if (inodelks_equal (l, lock)) - return l; - } - return NULL; + pl_inode_lock_t *l = NULL; + list_for_each_entry (l, &dom->inodelk_list, list) { + if (inodelks_equal (l, lock) && + same_inodelk_owner (l, lock)) + return l; + } + return NULL; } /* Set F_UNLCK removes a lock which has the exact same lock boundaries @@ -284,105 +286,110 @@ static pl_inode_lock_t * __inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) { - pl_inode_lock_t *conf = NULL; + pl_inode_lock_t *conf = NULL; - conf = find_matching_inodelk (lock, dom); - if (!conf) { - gf_log (this->name, GF_LOG_DEBUG, - " Matching lock not found for unlock"); - goto out; + conf = find_matching_inodelk (lock, dom); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + " Matching lock not found for unlock %llu-%llu, by %s " + "on %p", (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, + lkowner_utoa (&lock->owner), lock->client); + goto out; } - __delete_inode_lock (conf); + __delete_inode_lock (conf); gf_log (this->name, GF_LOG_DEBUG, - " Matching lock found for unlock"); - __destroy_inode_lock (lock); - + " Matching lock found for unlock %llu-%llu, by %s on %p", + (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner), + lock->client); out: - return conf; - - + return conf; } static void -__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, - struct list_head *granted, pl_dom_list_t *dom) +__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom) { - int bl_ret = 0; - pl_inode_lock_t *bl = NULL; - pl_inode_lock_t *tmp = NULL; + int bl_ret = 0; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; struct list_head blocked_list; INIT_LIST_HEAD (&blocked_list); list_splice_init (&dom->blocked_inodelks, &blocked_list); - list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { + list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { - list_del_init (&bl->blocked_locks); + list_del_init (&bl->blocked_locks); - bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom); + bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom); - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); } } - return; + return; } /* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom) +grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom) { - struct list_head granted; - pl_inode_lock_t *lock; - pl_inode_lock_t *tmp; - - INIT_LIST_HEAD (&granted); + struct list_head granted; + pl_inode_lock_t *lock; + pl_inode_lock_t *tmp; - if (list_empty (&dom->blocked_inodelks)) { - gf_log (this->name, GF_LOG_TRACE, - "No blocked locks to be granted for domain: %s", dom->domain); - } + INIT_LIST_HEAD (&granted); pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_inode_locks (this, pl_inode, &granted, dom); - } + { + __grant_blocked_inode_locks (this, pl_inode, &granted, dom); + } pthread_mutex_unlock (&pl_inode->mutex); - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->owner, - lock->user_flock.l_start, - lock->user_flock.l_len); + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, lock->volume); + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, lock->volume); - STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0); - } + STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL); + } + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); + __pl_inodelk_unref (lock); + } + } + pthread_mutex_unlock (&pl_inode->mutex); } -/* Release all inodelks from this transport */ +/* Release all inodelks from this client */ static int -release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom, - inode_t *inode, transport_t *trans) +release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom, + inode_t *inode, client_t *client) { - pl_inode_lock_t *tmp = NULL; - pl_inode_lock_t *l = NULL; + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *l = NULL; pl_inode_t * pinode = NULL; - struct list_head granted; struct list_head released; char *path = NULL; + char *file = NULL; - INIT_LIST_HEAD (&granted); INIT_LIST_HEAD (&released); pinode = pl_inode_get (this, inode); @@ -391,247 +398,304 @@ release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom, { list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) { - if (l->transport != trans) + if (l->client != client) continue; list_del_init (&l->blocked_locks); - if (inode_path (inode, NULL, &path) < 0) { - gf_log (this->name, GF_LOG_TRACE, - "inode_path failed"); - goto unlock; - } + inode_path (inode, NULL, &path); + if (path) + file = path; + else + file = uuid_utoa (inode->gfid); - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on %s held by " - "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}", - path, trans, - (uint64_t) l->client_pid, - l->owner); + gf_log (this->name, GF_LOG_DEBUG, + "releasing blocking lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + file, client, (uint64_t) l->client_pid, + lkowner_utoa (&l->owner)); list_add (&l->blocked_locks, &released); - + if (path) { + GF_FREE (path); + path = NULL; + } } list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) { - if (l->transport != trans) + if (l->client != client) continue; - __delete_inode_lock (l); - __destroy_inode_lock (l); - - - if (inode_path (inode, NULL, &path) < 0) { - gf_log (this->name, GF_LOG_TRACE, - "inode_path failed"); - goto unlock; + inode_path (inode, NULL, &path); + if (path) + file = path; + else + file = uuid_utoa (inode->gfid); + + gf_log (this->name, GF_LOG_DEBUG, + "releasing granted lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + file, client, (uint64_t) l->client_pid, + lkowner_utoa (&l->owner)); + + if (path) { + GF_FREE (path); + path = NULL; } - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on %s held by " - "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}", - path, trans, - (uint64_t) l->client_pid, - l->owner); - - + __delete_inode_lock (l); + __pl_inodelk_unref (l); } } -unlock: - if (path) - FREE (path); + GF_FREE (path); pthread_mutex_unlock (&pinode->mutex); list_for_each_entry_safe (l, tmp, &released, blocked_locks) { list_del_init (&l->blocked_locks); - STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN); - FREE (l); + STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL); + //No need to take lock as the locks are only in one list + __pl_inodelk_unref (l); } - grant_blocked_inode_locks (this, pinode, dom); - return 0; + grant_blocked_inode_locks (this, pinode, dom); + return 0; } static int pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, - int can_block, pl_dom_list_t *dom) + int can_block, pl_dom_list_t *dom) { - int ret = -EINVAL; + int ret = -EINVAL; pl_inode_lock_t *retlock = NULL; + gf_boolean_t unref = _gf_true; - pthread_mutex_lock (&pl_inode->mutex); - { - if (lock->fl_type != F_UNLCK) { - ret = __lock_inodelk (this, pl_inode, lock, can_block, dom); - if (ret == 0) - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK", + pthread_mutex_lock (&pl_inode->mutex); + { + if (lock->fl_type != F_UNLCK) { + ret = __lock_inodelk (this, pl_inode, lock, can_block, dom); + if (ret == 0) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lock->owner, + lkowner_utoa (&lock->owner), lock->fl_start, lock->fl_end); + } else if (ret == -EAGAIN) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + if (can_block) + unref = _gf_false; + } + } else { + retlock = __inode_unlock_lock (this, lock, dom); + if (!retlock) { + gf_log (this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + __pl_inodelk_unref (retlock); - if (ret == -EAGAIN) - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->owner, - lock->user_flock.l_start, - lock->user_flock.l_len); - - goto out; - } - - - retlock = __inode_unlock_lock (this, lock, dom); - if (!retlock) { - gf_log (this->name, GF_LOG_DEBUG, - "Bad Unlock issued on Inode lock"); - ret = -EINVAL; - goto out; + ret = 0; } - __destroy_inode_lock (retlock); - - ret = 0; - - - } + } out: - pthread_mutex_unlock (&pl_inode->mutex); - grant_blocked_inode_locks (this, pl_inode, dom); + if (unref) + __pl_inodelk_unref (lock); + pthread_mutex_unlock (&pl_inode->mutex); + grant_blocked_inode_locks (this, pl_inode, dom); return ret; } /* Create a new inode_lock_t */ pl_inode_lock_t * -new_inode_lock (struct flock *flock, transport_t *transport, pid_t client_pid, - uint64_t owner, const char *volume) +new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + call_frame_t *frame, xlator_t *this, const char *volume, + char *conn_id) { - pl_inode_lock_t *lock = NULL; + pl_inode_lock_t *lock = NULL; + + lock = GF_CALLOC (1, sizeof (*lock), + gf_locks_mt_pl_inode_lock_t); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->client = client; + lock->client_pid = client_pid; + lock->volume = volume; + lock->owner = frame->root->lk_owner; + lock->frame = frame; + lock->this = this; + + if (conn_id) { + lock->connection_id = gf_strdup (conn_id); + } + + INIT_LIST_HEAD (&lock->list); + INIT_LIST_HEAD (&lock->blocked_locks); + __pl_inodelk_ref (lock); + + return lock; +} - lock = CALLOC (1, sizeof (*lock)); - if (!lock) { - return NULL; - } +int32_t +_pl_convert_volume (const char *volume, char **res) +{ + char *mdata_vol = NULL; + int ret = 0; - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; + mdata_vol = strrchr (volume, ':'); + //if the volume already ends with :metadata don't bother + if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0)) + return 0; - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; + ret = gf_asprintf (res, "%s:metadata", volume); + if (ret <= 0) + return ENOMEM; + return 0; +} - lock->transport = transport; - lock->client_pid = client_pid; - lock->owner = owner; - lock->volume = volume; +int32_t +_pl_convert_volume_for_special_range (struct gf_flock *flock, + const char *volume, char **res) +{ + int32_t ret = 0; - INIT_LIST_HEAD (&lock->list); - INIT_LIST_HEAD (&lock->blocked_locks); + if ((flock->l_start == LLONG_MAX -1) && + (flock->l_len == 0)) { + ret = _pl_convert_volume (volume, res); + } - return lock; + return ret; } /* Common inodelk code called from pl_inodelk and pl_finodelk */ int pl_common_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, inode_t *inode, int32_t cmd, - struct flock *flock, loc_t *loc, fd_t *fd) + struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - int can_block = 0; - transport_t * transport = NULL; - pid_t client_pid = -1; - uint64_t owner = -1; - pl_inode_t * pinode = NULL; - pl_inode_lock_t * reqlock = NULL; - pl_dom_list_t * dom = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (inode, unwind); - VALIDATE_OR_GOTO (flock, unwind); - - if ((flock->l_start < 0) || (flock->l_len < 0)) { - op_errno = EINVAL; - goto unwind; - } + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + GF_UNUSED int dict_ret = -1; + int can_block = 0; + pl_inode_t * pinode = NULL; + pl_inode_lock_t * reqlock = NULL; + pl_dom_list_t * dom = NULL; + char *res = NULL; + char *res1 = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + + if (xdata) + dict_ret = dict_get_str (xdata, "connection-id", &conn_id); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (inode, unwind); + VALIDATE_OR_GOTO (flock, unwind); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } - pl_trace_in (this, frame, fd, loc, cmd, flock, volume); + op_errno = _pl_convert_volume_for_special_range (flock, volume, &res); + if (op_errno) + goto unwind; + if (res) + volume = res; - transport = frame->root->trans; - client_pid = frame->root->pid; - owner = (uint64_t) (long)frame->root; + pl_trace_in (this, frame, fd, loc, cmd, flock, volume); - pinode = pl_inode_get (this, inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - goto unwind; - } + pinode = pl_inode_get (this, inode); + if (!pinode) { + op_errno = ENOMEM; + goto unwind; + } - dom = get_domain (pinode, volume); + dom = get_domain (pinode, volume); + if (!dom) { + op_errno = ENOMEM; + goto unwind; + } - if (client_pid == 0) { - /* + if (frame->root->lk_owner.len == 0) { + /* special case: this means release all locks - from this transport - */ - gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks from transport %p", transport); - - release_inode_locks_of_transport (this, dom, inode, transport); - goto unwind; - } - - reqlock = new_inode_lock (flock, transport, client_pid, owner, volume); - - if (!reqlock) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - switch (cmd) { - case F_SETLKW: - can_block = 1; - reqlock->frame = frame; - reqlock->this = this; - - /* fall through */ - - case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); - ret = pl_inode_setlk (this, pinode, reqlock, + from this client + */ + gf_log (this->name, GF_LOG_TRACE, + "Releasing all locks from client %p", frame->root->client); + + release_inode_locks_of_client (this, dom, inode, frame->root->client); + _pl_convert_volume (volume, &res1); + if (res1) { + dom = get_domain (pinode, res1); + if (dom) + release_inode_locks_of_client (this, dom, + inode, frame->root->client); + } + + op_ret = 0; + goto unwind; + } + + reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid, + frame, this, volume, conn_id); + + if (!reqlock) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + + switch (cmd) { + case F_SETLKW: + can_block = 1; + + /* fall through */ + + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); + ret = pl_inode_setlk (this, pinode, reqlock, can_block, dom); - if (ret < 0) { - if (can_block) { + if (ret < 0) { + if ((can_block) && (F_UNLCK != flock->l_type)) { pl_trace_block (this, frame, fd, loc, cmd, flock, volume); - goto out; + goto out; } - gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); - op_errno = -ret; - __destroy_inode_lock (reqlock); - goto unwind; - } - break; - - default: - op_errno = ENOTSUP; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); + op_errno = -ret; + goto unwind; + } + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_DEBUG, "Lock command F_GETLK not supported for [f]inodelk " "(cmd=%d)", cmd); @@ -640,86 +704,103 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, op_ret = 0; + ctx = pl_ctx_get (frame->root->client, this); + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + + if (flock->l_type == F_UNLCK) + pl_del_locker (ctx->ltable, volume, loc, fd, + &frame->root->lk_owner, + GF_FOP_INODELK); + else + pl_add_locker (ctx->ltable, volume, loc, fd, + frame->root->pid, + &frame->root->lk_owner, + GF_FOP_INODELK); + unwind: - if ((inode != NULL) && (flock !=NULL)) { - pl_update_refkeeper (this, inode); - pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume); - } + if ((inode != NULL) && (flock !=NULL)) { + pl_update_refkeeper (this, inode); + pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume); + } - STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL); out: + GF_FREE (res); + GF_FREE (res1); return 0; } int pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *flock) + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) { - - pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, loc, NULL); + pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, + loc, NULL, xdata); return 0; } int pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *flock) + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) { - - pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, NULL, fd); + pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, + NULL, fd, xdata); return 0; } +static inline int32_t +__get_inodelk_dom_count (pl_dom_list_t *dom) +{ + pl_inode_lock_t *lock = NULL; + int32_t count = 0; -static int32_t -__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode) + list_for_each_entry (lock, &dom->inodelk_list, list) { + count++; + } + list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + count++; + } + return count; +} + +/* Returns the no. of locks (blocked/granted) held on a given domain name + * If @domname is NULL, returns the no. of locks in all the domains present. + * If @domname is non-NULL and non-existent, returns 0 */ +int32_t +__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname) { int32_t count = 0; - pl_inode_lock_t *lock = NULL; pl_dom_list_t *dom = NULL; list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - list_for_each_entry (lock, &dom->inodelk_list, list) { - - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" " - "state = Active", - dom->domain, - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->owner, - lock->user_flock.l_start, - lock->user_flock.l_len); - - count++; - } - - list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + if (domname) { + if (strcmp (domname, dom->domain) == 0) { + count = __get_inodelk_dom_count (dom); + goto out; + } - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" " - "state = Blocked", - dom->domain, - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->owner, - lock->user_flock.l_start, - lock->user_flock.l_len); + } else { + /* Counting locks from all domains */ + count += __get_inodelk_dom_count (dom); - count++; } - } +out: return count; } int32_t -get_inodelk_count (xlator_t *this, inode_t *inode) +get_inodelk_count (xlator_t *this, inode_t *inode, char *domname) { pl_inode_t *pl_inode = NULL; uint64_t tmp_pl_inode = 0; @@ -735,7 +816,7 @@ get_inodelk_count (xlator_t *this, inode_t *inode) pthread_mutex_lock (&pl_inode->mutex); { - count = __get_inodelk_count (this, pl_inode); + count = __get_inodelk_count (this, pl_inode, domname); } pthread_mutex_unlock (&pl_inode->mutex); diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h new file mode 100644 index 000000000..08aeb0a79 --- /dev/null +++ b/xlators/features/locks/src/locks-mem-types.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __LOCKS_MEM_TYPES_H__ +#define __LOCKS_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_locks_mem_types_ { + gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1, + gf_locks_mt_pl_inode_t, + gf_locks_mt_posix_lock_t, + gf_locks_mt_pl_entry_lock_t, + gf_locks_mt_pl_inode_lock_t, + gf_locks_mt_truncate_ops, + gf_locks_mt_pl_rw_req_t, + gf_locks_mt_posix_locks_private_t, + gf_locks_mt_pl_fdctx_t, + gf_locks_mt_end +}; +#endif + diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index e89092811..76fc941d7 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef __POSIX_LOCKS_H__ #define __POSIX_LOCKS_H__ @@ -26,9 +16,12 @@ #endif #include "compat-errno.h" -#include "transport.h" #include "stack.h" #include "call-stub.h" +#include "locks-mem-types.h" +#include "client_t.h" + +#include "lkowner.h" struct __pl_fd; @@ -40,24 +33,29 @@ struct __posix_lock { off_t fl_end; short blocked; /* waiting to acquire */ - struct flock user_flock; /* the flock supplied by the user */ + struct gf_flock user_flock; /* the flock supplied by the user */ xlator_t *this; /* required for blocked locks */ unsigned long fd_num; + fd_t *fd; call_frame_t *frame; + struct timeval blkd_time; /*time at which lock was queued into blkd list*/ + struct timeval granted_time; /*time at which lock was queued into active list*/ + /* These two together serve to uniquely identify each process across nodes */ - transport_t *transport; /* to identify client node */ + void *client; /* to identify client node */ + gf_lkowner_t owner; pid_t client_pid; /* pid of client process */ - uint64_t owner; /* lock owner from fuse */ }; typedef struct __posix_lock posix_lock_t; struct __pl_inode_lock { struct list_head list; struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */ + int ref; short fl_type; off_t fl_start; @@ -65,18 +63,23 @@ struct __pl_inode_lock { const char *volume; - struct flock user_flock; /* the flock supplied by the user */ + struct gf_flock user_flock; /* the flock supplied by the user */ xlator_t *this; /* required for blocked locks */ fd_t *fd; call_frame_t *frame; + struct timeval blkd_time; /*time at which lock was queued into blkd list*/ + struct timeval granted_time; /*time at which lock was queued into active list*/ + /* These two together serve to uniquely identify each process across nodes */ - transport_t *transport; /* to identify client node */ + void *client; /* to identify client node */ + gf_lkowner_t owner; pid_t client_pid; /* pid of client process */ - uint64_t owner; + + char *connection_id; /* stores the client connection id */ }; typedef struct __pl_inode_lock pl_inode_lock_t; @@ -109,9 +112,14 @@ struct __entry_lock { const char *basename; entrylk_type type; - transport_t *trans; - pid_t client_pid; /* pid of client process */ - uint64_t owner; + struct timeval blkd_time; /*time at which lock was queued into blkd list*/ + struct timeval granted_time; /*time at which lock was queued into active list*/ + + void *trans; + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ + + char *connection_id; /* stores the client connection id */ }; typedef struct __entry_lock pl_entry_lock_t; @@ -125,6 +133,9 @@ struct __pl_inode { struct list_head dom_list; /* list of domains */ struct list_head ext_list; /* list of fcntl locks */ struct list_head rw_list; /* list of waiting r/w requests */ + struct list_head reservelk_list; /* list of reservelks */ + struct list_head blocked_reservelks; /* list of blocked reservelks */ + struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/ int mandatory; /* if mandatory locking is enabled */ inode_t *refkeeper; /* hold refs on an inode while locks are @@ -142,12 +153,40 @@ typedef struct __pl_fd pl_fd_t; typedef struct { gf_boolean_t mandatory; /* if mandatory locking is enabled */ gf_boolean_t trace; /* trace lock requests in and out */ + char *brickname; } posix_locks_private_t; + typedef struct { gf_boolean_t entrylk_count_req; gf_boolean_t inodelk_count_req; + gf_boolean_t inodelk_dom_count_req; gf_boolean_t posixlk_count_req; + gf_boolean_t parent_entrylk_req; + + /* used by {f,}truncate */ + loc_t loc; + fd_t *fd; + off_t offset; + dict_t *xdata; + enum {TRUNCATE, FTRUNCATE} op; } pl_local_t; + +typedef struct { + struct list_head locks_list; +} pl_fdctx_t; + + +typedef struct _locks_ctx { + gf_lock_t ltable_lock; /* only for replace, + ltable has its own internal + lock for operations */ + struct _lock_table *ltable; +} pl_ctx_t; + + +pl_ctx_t * +pl_ctx_get (client_t *client, xlator_t *xlator); + #endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 2e6afd9fc..7bfb38a51 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <unistd.h> #include <fcntl.h> #include <limits.h> @@ -37,6 +27,9 @@ #include "locks.h" #include "common.h" #include "statedump.h" +#include "clear.h" +#include "defaults.h" +#include "syncop.h" #ifndef LLONG_MAX #define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ @@ -47,37 +40,87 @@ void do_blocked_rw (pl_inode_t *); static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t); +static int format_brickname(char *); +int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *); +static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); -struct _truncate_ops { - loc_t loc; - fd_t *fd; - off_t offset; - enum {TRUNCATE, FTRUNCATE} op; -}; +static pl_fdctx_t * +pl_new_fdctx () +{ + pl_fdctx_t *fdctx = NULL; + + fdctx = GF_CALLOC (1, sizeof (*fdctx), + gf_locks_mt_pl_fdctx_t); + GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out); + + INIT_LIST_HEAD (&fdctx->locks_list); + +out: + return fdctx; +} + +static pl_fdctx_t * +pl_check_n_create_fdctx (xlator_t *this, fd_t *fd) +{ + int ret = 0; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + GF_VALIDATE_OR_GOTO ("posix-locks", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + LOCK (&fd->lock); + { + ret = __fd_ctx_get (fd, this, &tmp); + if ((ret != 0) || (tmp == 0)) { + fdctx = pl_new_fdctx (); + if (fdctx == NULL) { + goto unlock; + } + } + + ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx); + if (ret != 0) { + GF_FREE (fdctx); + fdctx = NULL; + gf_log (this->name, GF_LOG_DEBUG, + "failed to set fd ctx"); + } + } +unlock: + UNLOCK (&fd->lock); + +out: + return fdctx; +} int pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; local = frame->local; if (local->op == TRUNCATE) loc_wipe (&local->loc); + if (local->xdata) + dict_unref (local->xdata); + if (local->fd) + fd_unref (local->fd); + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - prebuf, postbuf); + prebuf, postbuf, xdata); return 0; } static int truncate_allowed (pl_inode_t *pl_inode, - transport_t *transport, pid_t client_pid, - uint64_t owner, off_t offset) + client_t *client, pid_t client_pid, + gf_lkowner_t *owner, off_t offset) { posix_lock_t *l = NULL; posix_lock_t region = {.list = {0, }, }; @@ -85,9 +128,9 @@ truncate_allowed (pl_inode_t *pl_inode, region.fl_start = offset; region.fl_end = LLONG_MAX; - region.transport = transport; + region.client = client; region.client_pid = client_pid; - region.owner = owner; + region.owner = *owner; pthread_mutex_lock (&pl_inode->mutex); { @@ -96,6 +139,8 @@ truncate_allowed (pl_inode_t *pl_inode, && locks_overlap (®ion, l) && !same_owner (®ion, l)) { ret = 0; + gf_log ("posix-locks", GF_LOG_TRACE, "Truncate " + "allowed"); break; } } @@ -108,10 +153,11 @@ truncate_allowed (pl_inode_t *pl_inode, static int truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { posix_locks_private_t *priv = NULL; - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; inode_t *inode = NULL; pl_inode_t *pl_inode = NULL; @@ -133,8 +179,6 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, pl_inode = pl_inode_get (this, inode); if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_ret = -1; op_errno = ENOMEM; goto unwind; @@ -142,8 +186,8 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (priv->mandatory && pl_inode->mandatory - && !truncate_allowed (pl_inode, frame->root->trans, - frame->root->pid, frame->root->lk_owner, + && !truncate_allowed (pl_inode, frame->root->client, + frame->root->pid, &frame->root->lk_owner, local->offset)) { op_ret = -1; op_errno = EAGAIN; @@ -154,52 +198,58 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, case TRUNCATE: STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, - &local->loc, local->offset); + &local->loc, local->offset, local->xdata); break; case FTRUNCATE: STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, - local->fd, local->offset); + local->fd, local->offset, local->xdata); break; } return 0; unwind: + gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, " + "error: %s", op_ret, strerror (op_errno)); if (local->op == TRUNCATE) loc_wipe (&local->loc); + if (local->xdata) + dict_unref (local->xdata); + if (local->fd) + fd_unref (local->fd); - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL); + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata); return 0; } int pl_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset) + loc_t *loc, off_t offset, dict_t *xdata) { - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; - local = CALLOC (1, sizeof (struct _truncate_ops)); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto unwind; - } + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); local->op = TRUNCATE; local->offset = offset; loc_copy (&local->loc, loc); + if (xdata) + local->xdata = dict_ref (xdata); frame->local = local; STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); + FIRST_CHILD (this)->fops->stat, loc, NULL); return 0; unwind: - STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL); + gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, " + "error: %s", loc->path, -1, strerror (ENOMEM)); + STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } @@ -207,33 +257,54 @@ unwind: int pl_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; - local = CALLOC (1, sizeof (struct _truncate_ops)); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto unwind; - } + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); local->op = FTRUNCATE; local->offset = offset; - local->fd = fd; + local->fd = fd_ref (fd); + if (xdata) + local->xdata = dict_ref (xdata); frame->local = local; STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd); + FIRST_CHILD(this)->fops->fstat, fd, xdata); return 0; unwind: - STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL); + gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, " + "error: %s", -1, strerror (ENOMEM)); + STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } +int +pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd) +{ + posix_lock_t *l = NULL; + int found = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + found = 1; + break; + } + } + + } + pthread_mutex_unlock (&pl_inode->mutex); + return found; +} + static void delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) { @@ -263,7 +334,8 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) list_for_each_entry_safe (l, tmp, &blocked_list, list) { list_del_init(&l->list); - STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock); + STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock, + NULL); __destroy_lock (l); } @@ -275,7 +347,7 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) static void __delete_locks_of_owner (pl_inode_t *pl_inode, - transport_t *transport, uint64_t owner) + client_t *client, gf_lkowner_t *owner) { posix_lock_t *tmp = NULL; posix_lock_t *l = NULL; @@ -283,16 +355,18 @@ __delete_locks_of_owner (pl_inode_t *pl_inode, /* TODO: what if it is a blocked lock with pending l->frame */ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if ((l->transport == transport) - && (l->owner == owner)) { - gf_log ("posix-locks", GF_LOG_TRACE, + if (l->blocked) + continue; + if ((l->client == client) && + is_same_lkowner (&l->owner, owner)) { + gf_log ("posix-locks", GF_LOG_TRACE, " Flushing lock" - "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s", - l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, - l->owner, - l->user_flock.l_start, - l->user_flock.l_len, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, + lkowner_utoa (&l->owner), + l->user_flock.l_start, + l->user_flock.l_len, l->blocked == 1 ? "Blocked" : "Active"); __delete_lock (pl_inode, l); @@ -303,52 +377,578 @@ __delete_locks_of_owner (pl_inode_t *pl_inode, return; } + +int32_t +pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; + +} + +int32_t +pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + int op_ret = -1; + int32_t bcount = 0; + int32_t gcount = 0; + char key[PATH_MAX] = {0, }; + char *lk_summary = NULL; + pl_inode_t *pl_inode = NULL; + dict_t *dict = NULL; + clrlk_args args = {0,}; + char *brickname = NULL; + + if (!name) + goto usual; + + if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) + goto usual; + + if (clrlk_parse_args (name, &args)) { + op_errno = EINVAL; + goto out; + } + + dict = dict_new (); + if (!dict) { + op_errno = ENOMEM; + goto out; + } + + pl_inode = pl_inode_get (this, loc->inode); + if (!pl_inode) { + op_errno = ENOMEM; + goto out; + } + + switch (args.type) { + case CLRLK_INODE: + case CLRLK_ENTRY: + op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode, + &args, &bcount, + &gcount, + &op_errno); + if (op_ret) + goto out; + break; + case CLRLK_POSIX: + op_ret = clrlk_clear_posixlk (this, pl_inode, &args, + &bcount, &gcount, + &op_errno); + if (op_ret) + goto out; + break; + case CLRLK_TYPE_MAX: + op_errno = EINVAL; + goto out; + } + + op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname); + if (op_ret) { + gf_log (this->name, GF_LOG_WARNING, + "Couldn't get brickname"); + } else { + op_ret = format_brickname(brickname); + if (op_ret) { + gf_log (this->name, GF_LOG_WARNING, + "Couldn't format brickname"); + GF_FREE(brickname); + brickname = NULL; + } + } + + if (!gcount && !bcount) { + if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d " + "granted locks=%d", + (brickname == NULL)? this->name : brickname, + (args.type == CLRLK_INODE)? "inode": + (args.type == CLRLK_ENTRY)? "entry": + (args.type == CLRLK_POSIX)? "posix": " ", + bcount, gcount) == -1) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + strncpy (key, name, strlen (name)); + if (dict_set_dynstr (dict, key, lk_summary)) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + op_ret = 0; +out: + GF_FREE(brickname); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + + GF_FREE (args.opts); + if (op_ret && lk_summary) + GF_FREE (lk_summary); + if (dict) + dict_unref (dict); + return 0; + +usual: + STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +} + +static int +format_brickname(char *brickname) +{ + int ret = -1; + char *hostname = NULL; + char *volume = NULL; + char *saveptr = NULL; + + if (!brickname) + goto out; + + strtok_r(brickname, ":", &saveptr); + hostname = gf_strdup(strtok_r(NULL, ":", &saveptr)); + if (hostname == NULL) + goto out; + volume = gf_strdup(strtok_r(NULL, ".", &saveptr)); + if (volume == NULL) + goto out; + + sprintf(brickname, "%s:%s", hostname, volume); + + ret = 0; +out: + GF_FREE(hostname); + GF_FREE(volume); + return ret; +} + +static int +fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno, + char **brickname) +{ + int ret = -1; + loc_t loc = {0, }; + dict_t *dict = NULL; + + if (!brickname) + goto out; + + if (!op_errno) + goto out; + + uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref (inode); + + ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict, + GF_XATTR_PATHINFO_KEY); + if (ret < 0) { + *op_errno = errno; + goto out; + } + + ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname); + if (ret) + goto out; + + *brickname = gf_strdup(*brickname); + if (*brickname == NULL) { + ret = -1; + goto out; + } + + ret = 0; +out: + if (dict != NULL) { + dict_unref (dict); + } + loc_wipe(&loc); + + return ret; +} + + +int +pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno) +{ + int ret = -1; + posix_locks_private_t *priv = NULL; + char *brickname = NULL; + char *end = NULL; + char *tmp = NULL; + + priv = this->private; + + ret = fetch_pathinfo (this, inode, op_errno, &brickname); + if (ret) + goto out; + + end = strrchr (brickname, ':'); + if (!end) { + GF_FREE(brickname); + ret = -1; + goto out; + } + + tmp = brickname; + brickname = gf_strndup (brickname, (end - brickname)); + if (brickname == NULL) { + ret = -1; + goto out; + } + + priv->brickname = brickname; + ret = 0; +out: + GF_FREE(tmp); + return ret; +} + +char * +pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno) +{ + posix_locks_private_t *priv = NULL; + char *key = NULL; + int ret = 0; + + priv = this->private; + + if (priv->brickname == NULL) { + ret = pl_lockinfo_get_brickname (this, inode, op_errno); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "cannot get brickname"); + goto out; + } + } + + key = priv->brickname; +out: + return key; +} + +int32_t +pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd, + dict_t *dict, int32_t *op_errno) +{ + pl_inode_t *pl_inode = NULL; + char *key = NULL, *buf = NULL; + int32_t op_ret = 0; + unsigned long fdnum = 0, len = 0; + dict_t *tmp = NULL; + + pl_inode = pl_inode_get (this, fd->inode); + + if (!pl_inode) { + gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); + *op_errno = EBADFD; + op_ret = -1; + goto out; + } + + if (!pl_locks_by_fd (pl_inode, fd)) { + op_ret = 0; + goto out; + } + + fdnum = fd_to_fdnum (fd); + + key = pl_lockinfo_key (this, fd->inode, op_errno); + if (key == NULL) { + op_ret = -1; + goto out; + } + + tmp = dict_new (); + if (tmp == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_set_uint64 (tmp, key, fdnum); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value " + "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", + fdnum, fd, uuid_utoa (fd->inode->gfid), + strerror (*op_errno)); + goto out; + } + + len = dict_serialized_length (tmp); + if (len < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, + "dict_serialized_length failed (%s) while handling " + "lockinfo for fd (ptr:%p inode-gfid:%s)", + strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid)); + goto out; + } + + buf = GF_CALLOC (1, len, gf_common_mt_char); + if (buf == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_serialize (tmp, buf); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, + "dict_serialize failed (%s) while handling lockinfo " + "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno), + fd, uuid_utoa (fd->inode->gfid)); + goto out; + } + + op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value " + "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", + fdnum, fd, uuid_utoa (fd->inode->gfid), + strerror (*op_errno)); + goto out; + } + + buf = NULL; +out: + if (tmp != NULL) { + dict_unref (tmp); + } + + if (buf != NULL) { + GF_FREE (buf); + } + + return op_ret; +} + + +int32_t +pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + int32_t op_ret = 0, op_errno = 0; + dict_t *dict = NULL; + + if (!name) { + goto usual; + } + + if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) { + dict = dict_new (); + if (dict == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict, + &op_errno); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "getting lockinfo on fd (ptr:%p inode-gfid:%s) " + "failed (%s)", fd, uuid_utoa (fd->inode->gfid), + strerror (op_errno)); + } + + goto unwind; + } else { + goto usual; + } + +unwind: + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); + if (dict != NULL) { + dict_unref (dict); + } + + return 0; + +usual: + STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +} + +int32_t +pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num, + int32_t *op_errno) +{ + pl_inode_t *pl_inode = NULL; + uint64_t newfd_num = 0; + posix_lock_t *l = NULL; + int32_t op_ret = 0; + + newfd_num = fd_to_fdnum (newfd); + + pl_inode = pl_inode_get (frame->this, newfd->inode); + if (pl_inode == NULL) { + op_ret = -1; + *op_errno = EBADFD; + goto out; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->fd_num == oldfd_num) { + l->fd_num = newfd_num; + l->client = frame->root->client; + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + op_ret = 0; +out: + return op_ret; +} + +int32_t +pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf, + int len, int32_t *op_errno) +{ + int32_t op_ret = -1; + dict_t *lockinfo = NULL; + uint64_t oldfd_num = 0; + char *key = NULL; + + lockinfo = dict_new (); + if (lockinfo == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + goto out; + } + + key = pl_lockinfo_key (frame->this, fd->inode, op_errno); + if (key == NULL) { + op_ret = -1; + goto out; + } + + op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num); + + if (oldfd_num == 0) { + op_ret = 0; + goto out; + } + + op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno); + if (op_ret < 0) { + gf_log (frame->this->name, GF_LOG_WARNING, + "migration of locks from oldfd (ptr:%p) to newfd " + "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd, + uuid_utoa (fd->inode->gfid)); + goto out; + } + +out: + dict_unref (lockinfo); + + return op_ret; +} + +int32_t +pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + int32_t op_ret = 0, op_errno = 0; + void *lockinfo_buf = NULL; + int len = 0; + + op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY, + &lockinfo_buf, &len); + if (lockinfo_buf == NULL) { + goto usual; + } + + op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len, + &op_errno); + if (op_ret < 0) { + goto unwind; + } + +usual: + STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); + return 0; +} + int32_t pl_opendir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, dict_t *xdata) { - int dummy = 1; - int ret = -1; + pl_fdctx_t *fdctx = NULL; if (op_ret < 0) goto unwind; - ret = fd_ctx_set (fd, this, dummy); - if (ret != 0) - gf_log (this->name, GF_LOG_ERROR, - "setting context for fd=%p in locks failed.", fd); + fdctx = pl_check_n_create_fdctx (this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } unwind: - STACK_UNWIND_STRICT (opendir, + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, - fd); - return 0; + fd, xdata); + return 0; } -int32_t +int32_t pl_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd) + loc_t *loc, fd_t *fd, dict_t *xdata) { - STACK_WIND (frame, - pl_opendir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, - loc, fd); - return 0; + STACK_WIND (frame, + pl_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, fd, xdata); + return 0; } int pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); return 0; } @@ -356,40 +956,35 @@ pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int pl_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - uint64_t owner = -1; - - priv = this->private; - owner = frame->root->lk_owner; + pl_inode_t *pl_inode = NULL; pl_inode = pl_inode_get (this, fd->inode); if (!pl_inode) { gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); - STACK_UNWIND_STRICT (flush, frame, -1, EBADFD); + STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL); return 0; } pl_trace_flush (this, frame, fd); - if (owner == 0) { + if (frame->root->lk_owner.len == 0) { /* Handle special case when protocol/server sets lk-owner to zero. * This usually happens due to a client disconnection. Hence, free * all locks opened with this fd. */ gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks with fd %p", fd); + "Releasing all locks with fd %p", fd); delete_locks_of_fd (this, pl_inode, fd); goto wind; } pthread_mutex_lock (&pl_inode->mutex); { - __delete_locks_of_owner (pl_inode, frame->root->trans, - owner); + __delete_locks_of_owner (pl_inode, frame->root->client, + &frame->root->lk_owner); } pthread_mutex_unlock (&pl_inode->mutex); @@ -399,28 +994,29 @@ pl_flush (call_frame_t *frame, xlator_t *this, wind: STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd); + FIRST_CHILD(this)->fops->flush, fd, xdata); return 0; } int pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - int dummy = 1; - int ret = -1; + pl_fdctx_t *fdctx = NULL; if (op_ret < 0) goto unwind; - ret = fd_ctx_set (fd, this, dummy); - if (ret != 0) - gf_log (this->name, GF_LOG_ERROR, - "setting context for fd=%p in locks failed.", fd); + fdctx = pl_check_n_create_fdctx (this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } unwind: - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -428,12 +1024,11 @@ unwind: int pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) + fd_t *fd, dict_t *xdata) { - /* why isn't O_TRUNC being handled ? */ STACK_WIND (frame, pl_open_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, - loc, flags & ~O_TRUNC, fd, wbflags); + loc, flags, fd, xdata); return 0; } @@ -443,22 +1038,23 @@ int pl_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - int dummy = 1; - int ret = -1; + pl_fdctx_t *fdctx = NULL; if (op_ret < 0) goto unwind; - ret = fd_ctx_set (fd, this, dummy); - if (ret != 0) - gf_log (this->name, GF_LOG_ERROR, - "setting context for fd=%p in locks failed.", fd); + fdctx = pl_check_n_create_fdctx (this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } unwind: STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } @@ -466,11 +1062,12 @@ unwind: int pl_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) + loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) { STACK_WIND (frame, pl_create_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, - loc, flags, mode, fd); + loc, flags, mode, umask, fd, xdata); return 0; } @@ -479,10 +1076,10 @@ int pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref) + struct iobref *iobref, dict_t *xdata) { STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - vector, count, stbuf, iobref); + vector, count, stbuf, iobref, xdata); return 0; } @@ -490,9 +1087,10 @@ pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -522,7 +1120,7 @@ do_blocked_rw (pl_inode_t *pl_inode) list_for_each_entry_safe (rw, tmp, &wind_list, list) { list_del_init (&rw->list); call_resume (rw->stub); - free (rw); + GF_FREE (rw); } return; @@ -550,12 +1148,12 @@ __rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region, int -pl_readv_cont (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) +pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { STACK_WIND (frame, pl_readv_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - fd, size, offset); + fd, size, offset, flags, xdata); return 0; } @@ -563,7 +1161,7 @@ pl_readv_cont (call_frame_t *frame, xlator_t *this, int pl_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { posix_locks_private_t *priv = NULL; pl_inode_t *pl_inode = NULL; @@ -580,7 +1178,7 @@ pl_readv (call_frame_t *frame, xlator_t *this, if (priv->mandatory && pl_inode->mandatory) { region.fl_start = offset; region.fl_end = offset + size - 1; - region.transport = frame->root->trans; + region.client = frame->root->client; region.fd_num = fd_to_fdnum(fd); region.client_pid = frame->root->pid; region.owner = frame->root->lk_owner; @@ -601,23 +1199,21 @@ pl_readv (call_frame_t *frame, xlator_t *this, goto unlock; } - rw = CALLOC (1, sizeof (*rw)); + rw = GF_CALLOC (1, sizeof (*rw), + gf_locks_mt_pl_rw_req_t); if (!rw) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_errno = ENOMEM; op_ret = -1; goto unlock; } rw->stub = fop_readv_stub (frame, pl_readv_cont, - fd, size, offset); + fd, size, offset, flags, + xdata); if (!rw->stub) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_errno = ENOMEM; op_ret = -1; - free (rw); + GF_FREE (rw); goto unlock; } @@ -633,12 +1229,12 @@ pl_readv (call_frame_t *frame, xlator_t *this, if (wind_needed) { STACK_WIND (frame, pl_readv_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - fd, size, offset); + fd, size, offset, flags, xdata); } if (op_ret == -1) STACK_UNWIND_STRICT (readv, frame, -1, op_errno, - NULL, 0, NULL, NULL); + NULL, 0, NULL, NULL, NULL); return 0; } @@ -647,11 +1243,11 @@ pl_readv (call_frame_t *frame, xlator_t *this, int pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { STACK_WIND (frame, pl_writev_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, iobref); + fd, vector, count, offset, flags, iobref, xdata); return 0; } @@ -660,7 +1256,7 @@ pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, int pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { posix_locks_private_t *priv = NULL; pl_inode_t *pl_inode = NULL; @@ -670,14 +1266,13 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, int op_errno = 0; char wind_needed = 1; - priv = this->private; pl_inode = pl_inode_get (this, fd->inode); if (priv->mandatory && pl_inode->mandatory) { region.fl_start = offset; region.fl_end = offset + iov_length (vector, count) - 1; - region.transport = frame->root->trans; + region.client = frame->root->client; region.fd_num = fd_to_fdnum(fd); region.client_pid = frame->root->pid; region.owner = frame->root->lk_owner; @@ -698,10 +1293,9 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, goto unlock; } - rw = CALLOC (1, sizeof (*rw)); + rw = GF_CALLOC (1, sizeof (*rw), + gf_locks_mt_pl_rw_req_t); if (!rw) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_errno = ENOMEM; op_ret = -1; goto unlock; @@ -709,13 +1303,11 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, rw->stub = fop_writev_stub (frame, pl_writev_cont, fd, vector, count, offset, - iobref); + flags, iobref, xdata); if (!rw->stub) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_errno = ENOMEM; op_ret = -1; - free (rw); + GF_FREE (rw); goto unlock; } @@ -731,51 +1323,221 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, if (wind_needed) STACK_WIND (frame, pl_writev_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, iobref); + fd, vector, count, offset, flags, iobref, xdata); if (op_ret == -1) - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL); + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, + NULL); return 0; } +static int +__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd) +{ + int found = 0; + posix_lock_t *l = NULL; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + found = 1; + break; + } + } + + return found; +} + +static posix_lock_t * +lock_dup (posix_lock_t *lock) +{ + posix_lock_t *new_lock = NULL; + + new_lock = new_posix_lock (&lock->user_flock, lock->client, + lock->client_pid, &lock->owner, + (fd_t *)lock->fd_num); + return new_lock; +} + +static int +__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, + pl_fdctx_t *fdctx) +{ + posix_lock_t *l = NULL; + posix_lock_t *duplock = NULL; + int ret = 0; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + duplock = lock_dup (l); + if (!duplock) { + ret = -1; + break; + } + + list_add_tail (&duplock->list, &fdctx->locks_list); + } + } + + return ret; +} + +static int +__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, + pl_fdctx_t *fdctx) +{ + int ret = 0; + + ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx); + if (ret) + goto out; + +out: + return ret; + +} + +static void +pl_mark_eol_lock (posix_lock_t *lock) +{ + lock->user_flock.l_type = GF_LK_EOL; + return; +} + +static posix_lock_t * +__get_next_fdctx_lock (pl_fdctx_t *fdctx) +{ + posix_lock_t *lock = NULL; + + GF_ASSERT (fdctx); + + if (list_empty (&fdctx->locks_list)) { + gf_log (THIS->name, GF_LOG_DEBUG, + "fdctx lock list empty"); + goto out; + } + + lock = list_entry (fdctx->locks_list.next, typeof (*lock), + list); + + GF_ASSERT (lock); + + list_del_init (&lock->list); + +out: + return lock; +} + +static int +__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock) +{ + posix_lock_t *lock = NULL; + int ret = 0; + + GF_ASSERT (fdctx); + + lock = __get_next_fdctx_lock (fdctx); + if (!lock) { + gf_log (THIS->name, GF_LOG_DEBUG, + "marking EOL in reqlock"); + pl_mark_eol_lock (reqlock); + goto out; + } + + reqlock->user_flock = lock->user_flock; + reqlock->fl_start = lock->fl_start; + reqlock->fl_type = lock->fl_type; + reqlock->fl_end = lock->fl_end; + reqlock->owner = lock->owner; + +out: + if (lock) + __destroy_lock (lock); + + return ret; +} + +static int +pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode, + fd_t *fd, posix_lock_t *reqlock) +{ + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + int ret = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (!__fd_has_locks (pl_inode, fd)) { + gf_log (this->name, GF_LOG_DEBUG, + "fd=%p has no active locks", fd); + ret = 0; + goto unlock; + } + + gf_log (this->name, GF_LOG_DEBUG, + "There are active locks on fd"); + + ret = fd_ctx_get (fd, this, &tmp); + fdctx = (pl_fdctx_t *)(long) tmp; + + if (list_empty (&fdctx->locks_list)) { + gf_log (this->name, GF_LOG_TRACE, + "no fdctx -> copying all locks on fd"); + + ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx); + if (ret) { + goto unlock; + } + + ret = __set_next_lock_fd (fdctx, reqlock); + + } else { + gf_log (this->name, GF_LOG_TRACE, + "fdctx present -> returning the next lock"); + ret = __set_next_lock_fd (fdctx, reqlock); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "could not get next lock of fd"); + goto unlock; + } + } + } + +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + return ret; + +} int pl_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct flock *flock) + fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - transport_t *transport = NULL; - pid_t client_pid = 0; - uint64_t owner = 0; - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - int op_ret = 0; - int op_errno = 0; - int can_block = 0; - posix_lock_t *reqlock = NULL; - posix_lock_t *conf = NULL; - int ret = 0; - - transport = frame->root->trans; - client_pid = frame->root->pid; - owner = frame->root->lk_owner; - priv = this->private; + pl_inode_t *pl_inode = NULL; + int op_ret = 0; + int op_errno = 0; + int can_block = 0; + posix_lock_t *reqlock = NULL; + posix_lock_t *conf = NULL; + int ret = 0; + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_ret = -1; + op_errno = EINVAL; + goto unwind; + } pl_inode = pl_inode_get (this, fd->inode); if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_ret = -1; op_errno = ENOMEM; goto unwind; } - reqlock = new_posix_lock (flock, transport, client_pid, - owner, fd); + reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid, + &frame->root->lk_owner, fd); if (!reqlock) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); op_ret = -1; op_errno = ENOMEM; goto unwind; @@ -785,6 +1547,68 @@ pl_lk (call_frame_t *frame, xlator_t *this, switch (cmd) { + case F_RESLK_LCKW: + can_block = 1; + + /* fall through */ + case F_RESLK_LCK: + memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); + reqlock->frame = frame; + reqlock->this = this; + + ret = pl_reserve_setlk (this, pl_inode, reqlock, + can_block); + if (ret < 0) { + if (can_block) + goto out; + + op_ret = -1; + op_errno = -ret; + __destroy_lock (reqlock); + goto unwind; + } + /* Finally a getlk and return the call */ + conf = pl_getlk (pl_inode, reqlock); + if (conf) + posix_lock_to_flock (conf, flock); + break; + + case F_RESLK_UNLCK: + reqlock->frame = frame; + reqlock->this = this; + ret = pl_reserve_unlock (this, pl_inode, reqlock); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + } + __destroy_lock (reqlock); + goto unwind; + + break; + + case F_GETLK_FD: + reqlock->frame = frame; + reqlock->this = this; + ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); + GF_ASSERT (ret >= 0); + + ret = pl_getlk_fd (this, pl_inode, fd, reqlock); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "getting locks on fd failed"); + op_ret = -1; + op_errno = ENOLCK; + goto unwind; + } + + gf_log (this->name, GF_LOG_TRACE, + "Replying with a lock on fd for healing"); + + posix_lock_to_flock (reqlock, flock); + __destroy_lock (reqlock); + + break; + #if F_GETLK != F_GETLK64 case F_GETLK64: #endif @@ -809,12 +1633,18 @@ pl_lk (call_frame_t *frame, xlator_t *this, case F_SETLK64: #endif case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); + ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); + if (ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "Lock blocked due to conflicting reserve lock"); + goto out; + } ret = pl_setlk (this, pl_inode, reqlock, can_block); if (ret == -1) { - if (can_block) { + if ((can_block) && (F_UNLCK != flock->l_type)) { pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL); goto out; } @@ -822,13 +1652,22 @@ pl_lk (call_frame_t *frame, xlator_t *this, op_ret = -1; op_errno = EAGAIN; __destroy_lock (reqlock); + + } else if ((0 == ret) && (F_UNLCK == flock->l_type)) { + /* For NLM's last "unlock on fd" detection */ + if (pl_locks_by_fd (pl_inode, fd)) + flock->l_type = F_RDLCK; + else + flock->l_type = F_UNLCK; } } unwind: pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL); pl_update_refkeeper (this, fd->inode); - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock); + + + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata); out: return 0; } @@ -843,122 +1682,123 @@ pl_forget (xlator_t *this, posix_lock_t *ext_tmp = NULL; posix_lock_t *ext_l = NULL; - struct list_head posixlks_released; + struct list_head posixlks_released; pl_inode_lock_t *ino_tmp = NULL; pl_inode_lock_t *ino_l = NULL; - struct list_head inodelks_released; + struct list_head inodelks_released; pl_rw_req_t *rw_tmp = NULL; pl_rw_req_t *rw_req = NULL; pl_entry_lock_t *entry_tmp = NULL; pl_entry_lock_t *entry_l = NULL; - struct list_head entrylks_released; + struct list_head entrylks_released; pl_dom_list_t *dom = NULL; pl_dom_list_t *dom_tmp = NULL; - INIT_LIST_HEAD (&posixlks_released); - INIT_LIST_HEAD (&inodelks_released); - INIT_LIST_HEAD (&entrylks_released); + INIT_LIST_HEAD (&posixlks_released); + INIT_LIST_HEAD (&inodelks_released); + INIT_LIST_HEAD (&entrylks_released); pl_inode = pl_inode_get (this, inode); - pthread_mutex_lock (&pl_inode->mutex); - { + pthread_mutex_lock (&pl_inode->mutex); + { - if (!list_empty (&pl_inode->rw_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending R/W requests found, releasing."); + if (!list_empty (&pl_inode->rw_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending R/W requests found, releasing."); - list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, - list) { + list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, + list) { - list_del (&rw_req->list); - FREE (rw_req); - } - } + list_del (&rw_req->list); + GF_FREE (rw_req); + } + } - if (!list_empty (&pl_inode->ext_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending fcntl locks found, releasing."); + if (!list_empty (&pl_inode->ext_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending fcntl locks found, releasing."); - list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, - list) { + list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, + list) { - __delete_lock (pl_inode, ext_l); - if (ext_l->blocked) { - list_add_tail (&ext_l->list, &posixlks_released); - continue; - } - __destroy_lock (ext_l); - } - } + __delete_lock (pl_inode, ext_l); + if (ext_l->blocked) { + list_add_tail (&ext_l->list, &posixlks_released); + continue; + } + __destroy_lock (ext_l); + } + } - list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { + list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { - if (!list_empty (&dom->inodelk_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending inode locks found, releasing."); + if (!list_empty (&dom->inodelk_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending inode locks found, releasing."); - list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) { - __delete_inode_lock (ino_l); - __destroy_inode_lock (ino_l); - } + list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) { + __delete_inode_lock (ino_l); + __pl_inodelk_unref (ino_l); + } - list_splice_init (&dom->blocked_inodelks, &inodelks_released); - + list_splice_init (&dom->blocked_inodelks, &inodelks_released); - } - if (!list_empty (&dom->entrylk_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending entry locks found, releasing."); - list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { - list_del_init (&entry_l->domain_list); + } + if (!list_empty (&dom->entrylk_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending entry locks found, releasing."); - if (entry_l->basename) - FREE (entry_l->basename); - FREE (entry_l); - } + list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { + list_del_init (&entry_l->domain_list); - list_splice_init (&dom->blocked_entrylks, &entrylks_released); - } + GF_FREE ((char *)entry_l->basename); + GF_FREE (entry_l->connection_id); + GF_FREE (entry_l); + } - list_del (&dom->inode_list); - gf_log ("posix-locks", GF_LOG_TRACE, - " Cleaning up domain: %s", dom->domain); - FREE (dom->domain); - FREE (dom); - } + list_splice_init (&dom->blocked_entrylks, &entrylks_released); + } - } - pthread_mutex_unlock (&pl_inode->mutex); + list_del (&dom->inode_list); + gf_log ("posix-locks", GF_LOG_TRACE, + " Cleaning up domain: %s", dom->domain); + GF_FREE ((char *)(dom->domain)); + GF_FREE (dom); + } - list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) { + } + pthread_mutex_unlock (&pl_inode->mutex); - STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, &ext_l->user_flock); - __destroy_lock (ext_l); - } + list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) { - list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) { + STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, + &ext_l->user_flock, NULL); + __destroy_lock (ext_l); + } + + list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) { - STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0); - __destroy_inode_lock (ino_l); - } + STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL); + __pl_inodelk_unref (ino_l); + } - list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) { + list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) { - STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0); - if (entry_l->basename) - FREE (entry_l->basename); - FREE (entry_l); + STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL); + GF_FREE ((char *)entry_l->basename); + GF_FREE (entry_l->connection_id); + GF_FREE (entry_l); - } + } - FREE (pl_inode); + GF_FREE (pl_inode); return 0; } @@ -969,8 +1809,14 @@ pl_release (xlator_t *this, fd_t *fd) pl_inode_t *pl_inode = NULL; uint64_t tmp_pl_inode = 0; int ret = -1; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; - ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode); + if (fd == NULL) { + goto out; + } + + ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode); if (ret != 0) goto out; @@ -982,11 +1828,48 @@ pl_release (xlator_t *this, fd_t *fd) "Releasing all locks with fd %p", fd); delete_locks_of_fd (this, pl_inode, fd); + pl_update_refkeeper (this, fd->inode); + ret = fd_ctx_del (fd, this, &tmp); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Could not get fdctx"); + goto out; + } + + fdctx = (pl_fdctx_t *)(long)tmp; + + GF_FREE (fdctx); out: return ret; } -static int32_t + +int +pl_releasedir (xlator_t *this, fd_t *fd) +{ + int ret = -1; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + + if (fd == NULL) { + goto out; + } + + ret = fd_ctx_del (fd, this, &tmp); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Could not get fdctx"); + goto out; + } + + fdctx = (pl_fdctx_t *)(long)tmp; + + GF_FREE (fdctx); +out: + return ret; +} + +int32_t __get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode) { posix_lock_t *lock = NULL; @@ -994,16 +1877,6 @@ __get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode) list_for_each_entry (lock, &pl_inode->ext_list, list) { - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->owner, - lock->user_flock.l_start, - lock->user_flock.l_len, - lock->blocked == 1 ? "Blocked" : "Active"); - count++; } @@ -1036,6 +1909,24 @@ out: } void +pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent, + char *basename, dict_t *dict) +{ + uint32_t entrylk = 0; + int ret = -1; + + if (!parent || !basename || !strlen (basename)) + goto out; + entrylk = check_entrylk_on_basename (this, parent, basename); +out: + ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK); + } +} + +void pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict) { @@ -1052,19 +1943,34 @@ pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode, } void -pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, - dict_t *dict) +pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict, + gf_boolean_t per_dom) { int32_t count = 0; int ret = -1; + char *domname = NULL; + + + if (per_dom){ + ret = dict_get_str (dict, GLUSTERFS_INODELK_DOM_COUNT, + &domname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "value for key %s",GLUSTERFS_INODELK_DOM_COUNT); + goto out; + } + } + + count = get_inodelk_count (this, inode, domname); - count = get_inodelk_count (this, inode); ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - " dict_set failed on key %s", GLUSTERFS_INODELK_COUNT); + gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for " + "key %s", GLUSTERFS_INODELK_COUNT); } +out: + return; } void @@ -1085,56 +1991,63 @@ pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode, int32_t pl_lookup_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - dict_t *dict, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct iatt *buf, + dict_t *xdata, struct iatt *postparent) { pl_local_t *local = NULL; - if (!frame->local) { - goto out; - } + GF_VALIDATE_OR_GOTO (this->name, frame->local, out); - if (op_ret) { + if (op_ret) goto out; - } local = frame->local; + if (local->parent_entrylk_req) + pl_parent_entrylk_xattr_fill (this, local->loc.parent, + (char*)local->loc.name, xdata); if (local->entrylk_count_req) - pl_entrylk_xattr_fill (this, inode, dict); + pl_entrylk_xattr_fill (this, inode, xdata); if (local->inodelk_count_req) - pl_inodelk_xattr_fill (this, inode, dict); + pl_inodelk_xattr_fill (this, inode, xdata, _gf_false); + if (local->inodelk_dom_count_req) + pl_inodelk_xattr_fill (this, inode, xdata, _gf_true); if (local->posixlk_count_req) - pl_posixlk_xattr_fill (this, inode, dict); + pl_posixlk_xattr_fill (this, inode, xdata); +out: + local = frame->local; frame->local = NULL; - if (local != NULL) - FREE (local); + if (local != NULL) { + loc_wipe (&local->loc); + mem_put (local); + } -out: - STACK_UNWIND (frame, - op_ret, - op_errno, - inode, - buf, - dict, - postparent); - return 0; + STACK_UNWIND_STRICT ( + lookup, + frame, + op_ret, + op_errno, + inode, + buf, + xdata, + postparent); + return 0; } int32_t pl_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) + xlator_t *this, + loc_t *loc, + dict_t *xdata) { pl_local_t *local = NULL; int ret = -1; @@ -1143,41 +2056,114 @@ pl_lookup (call_frame_t *frame, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - local = CALLOC (1, sizeof (*local)); - if (!local) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - " Out of memory"); - goto out; + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + if (xdata) { + if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT)) + local->entrylk_count_req = 1; + if (dict_get (xdata, GLUSTERFS_INODELK_COUNT)) + local->inodelk_count_req = 1; + if (dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT)) + local->inodelk_dom_count_req = 1; + if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT)) + local->posixlk_count_req = 1; + if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK)) + local->parent_entrylk_req = 1; } - if (dict_get (xattr_req, GLUSTERFS_ENTRYLK_COUNT)) - local->entrylk_count_req = 1; - if (dict_get (xattr_req, GLUSTERFS_INODELK_COUNT)) - local->inodelk_count_req = 1; - if (dict_get (xattr_req, GLUSTERFS_POSIXLK_COUNT)) - local->posixlk_count_req = 1; - frame->local = local; + loc_copy (&local->loc, loc); - STACK_WIND (frame, - pl_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - loc, - xattr_req); + STACK_WIND (frame, + pl_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xdata); ret = 0; out: if (ret == -1) - STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL); + STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, + NULL, NULL, NULL); - return 0; + return 0; } +int +pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + pl_local_t *local = NULL; + gf_dirent_t *entry = NULL; + + local = frame->local; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + if (local->entrylk_count_req) + pl_entrylk_xattr_fill (this, entry->inode, entry->dict); + if (local->inodelk_count_req) + pl_inodelk_xattr_fill (this, entry->inode, entry->dict, + _gf_false); + if (local->inodelk_dom_count_req) + pl_inodelk_xattr_fill (this, entry->inode, entry->dict, + _gf_true); + if (local->posixlk_count_req) + pl_posixlk_xattr_fill (this, entry->inode, entry->dict); + } + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + if (local) + mem_put (local); + + return 0; +} + +int +pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + pl_local_t *local = NULL; + + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + if (dict) { + if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT)) + local->entrylk_count_req = 1; + if (dict_get (dict, GLUSTERFS_INODELK_COUNT)) + local->inodelk_count_req = 1; + if (dict_get (dict, GLUSTERFS_INODELK_DOM_COUNT)) + local->inodelk_dom_count_req = 1; + if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT)) + local->posixlk_count_req = 1; + } + + frame->local = local; + + STACK_WIND (frame, pl_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + + return 0; +out: + STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; +} + void -pl_dump_lock (char *str, int size, struct flock *flock, uint64_t owner) +pl_dump_lock (char *str, int size, struct gf_flock *flock, + gf_lkowner_t *owner, void *trans, char *conn_id, + time_t *granted_time, time_t *blkd_time, gf_boolean_t active) { - char *type_str = NULL; + char *type_str = NULL; + char granted[32] = {0,}; + char blocked[32] = {0,}; switch (flock->l_type) { case F_RDLCK: @@ -1194,12 +2180,35 @@ pl_dump_lock (char *str, int size, struct flock *flock, uint64_t owner) break; } - snprintf (str, size, "type=%s, start=%llu, len=%llu, pid=%llu, lk-owner=%llu", - type_str, (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid, - (unsigned long long) owner); - + if (active) { + if (blkd_time && *blkd_time == 0) { + snprintf (str, size, RANGE_GRNTD_FMT, + type_str, flock->l_whence, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner), trans, conn_id, + ctime_r (granted_time, granted)); + } else { + snprintf (str, size, RANGE_BLKD_GRNTD_FMT, + type_str, flock->l_whence, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner), trans, conn_id, + ctime_r (blkd_time, blocked), + ctime_r (granted_time, granted)); + } + } + else { + snprintf (str, size, RANGE_BLKD_FMT, + type_str, flock->l_whence, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner), trans, conn_id, + ctime_r (blkd_time, blocked)); + } } @@ -1208,8 +2217,10 @@ __dump_entrylks (pl_inode_t *pl_inode) { pl_dom_list_t *dom = NULL; pl_entry_lock_t *lock = NULL; - int count = 0; - char key[GF_DUMP_MAX_BUF_LEN]; + char blocked[32] = {0,}; + char granted[32] = {0,}; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN] = {0,}; char tmp[256]; @@ -1218,7 +2229,7 @@ __dump_entrylks (pl_inode_t *pl_inode) count = 0; gf_proc_dump_build_key(key, - "xlator.feature.locks.lock-dump.domain", + "lock-dump.domain", "domain"); gf_proc_dump_write(key, "%s", dom->domain); @@ -1226,10 +2237,25 @@ __dump_entrylks (pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "xlator.feature.locks.lock-dump.domain.entrylk", - "entrylk[%d](ACTIVE)",count ); - snprintf (tmp, 256," %s on %s", - lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK", lock->basename); + "entrylk[%d](ACTIVE)", count ); + if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) { + snprintf (tmp, 256, ENTRY_GRNTD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK", lock->basename, + (unsigned long long) lock->client_pid, + lkowner_utoa (&lock->owner), lock->trans, + lock->connection_id, + ctime_r (&lock->granted_time.tv_sec, granted)); + } else { + snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK", lock->basename, + (unsigned long long) lock->client_pid, + lkowner_utoa (&lock->owner), lock->trans, + lock->connection_id, + ctime_r (&lock->blkd_time.tv_sec, blocked), + ctime_r (&lock->granted_time.tv_sec, granted)); + } gf_proc_dump_write(key, tmp); @@ -1240,10 +2266,14 @@ __dump_entrylks (pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "xlator.feature.locks.lock-dump.domain.entrylk", - "entrylk[%d](BLOCKED)",count ); - snprintf (tmp, 256," %s on %s state = Blocked", - lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK", lock->basename); + "entrylk[%d](BLOCKED)", count ); + snprintf (tmp, 256, ENTRY_BLKD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK", lock->basename, + (unsigned long long) lock->client_pid, + lkowner_utoa (&lock->owner), lock->trans, + lock->connection_id, + ctime_r (&lock->blkd_time.tv_sec, blocked)); gf_proc_dump_write(key, tmp); @@ -1280,17 +2310,23 @@ __dump_inodelks (pl_inode_t *pl_inode) count = 0; gf_proc_dump_build_key(key, - "xlator.feature.locks.lock-dump.domain", + "lock-dump.domain", "domain"); gf_proc_dump_write(key, "%s", dom->domain); list_for_each_entry (lock, &dom->inodelk_list, list) { gf_proc_dump_build_key(key, - "xlator.feature.locks.lock-dump.domain.inodelk", + "inodelk", "inodelk[%d](ACTIVE)",count ); - pl_dump_lock (tmp, 256, &lock->user_flock, lock->owner); + SET_FLOCK_PID (&lock->user_flock, lock); + pl_dump_lock (tmp, 256, &lock->user_flock, + &lock->owner, + lock->client, lock->connection_id, + &lock->granted_time.tv_sec, + &lock->blkd_time.tv_sec, + _gf_true); gf_proc_dump_write(key, tmp); count++; @@ -1299,9 +2335,14 @@ __dump_inodelks (pl_inode_t *pl_inode) list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { gf_proc_dump_build_key(key, - "xlator.feature.locks.lock-dump.domain.inodelk", + "inodelk", "inodelk[%d](BLOCKED)",count ); - pl_dump_lock (tmp, 256, &lock->user_flock, lock->owner); + SET_FLOCK_PID (&lock->user_flock, lock); + pl_dump_lock (tmp, 256, &lock->user_flock, + &lock->owner, + lock->client, lock->connection_id, + 0, &lock->blkd_time.tv_sec, + _gf_false); gf_proc_dump_write(key, tmp); count++; @@ -1333,19 +2374,20 @@ __dump_posixlks (pl_inode_t *pl_inode) list_for_each_entry (lock, &pl_inode->ext_list, list) { + SET_FLOCK_PID (&lock->user_flock, lock); gf_proc_dump_build_key(key, - "xlator.feature.locks.lock-dump.domain.posixlk", + "posixlk", "posixlk[%d](%s)", count, lock->blocked ? "BLOCKED" : "ACTIVE"); - pl_dump_lock (tmp, 256, &lock->user_flock, lock->owner); + pl_dump_lock (tmp, 256, &lock->user_flock, + &lock->owner, lock->client, NULL, + &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, + (lock->blocked)? _gf_false: _gf_true); gf_proc_dump_write(key, tmp); count++; } - - - } void @@ -1366,78 +2408,220 @@ pl_dump_inode_priv (xlator_t *this, inode_t *inode) int ret = -1; uint64_t tmp_pl_inode = 0; pl_inode_t *pl_inode = NULL; - char key[GF_DUMP_MAX_BUF_LEN]; + char *pathname = NULL; + gf_boolean_t section_added = _gf_false; int count = 0; - if (!inode) - return -1; + if (!inode) { + errno = EINVAL; + goto out; + } - ret = inode_ctx_get (inode, this, &tmp_pl_inode); + ret = TRY_LOCK (&inode->lock); + if (ret) + goto out; + { + ret = __inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret) + goto unlock; + } +unlock: + UNLOCK (&inode->lock); + if (ret) + goto out; - if (ret != 0) + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + if (!pl_inode) { + ret = -1; + goto out; + } + + gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name); + section_added = _gf_true; + + /*We are safe to call __inode_path since we have the + * inode->table->lock */ + __inode_path (inode, NULL, &pathname); + if (pathname) + gf_proc_dump_write ("path", "%s", pathname); + + gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory); + + ret = pthread_mutex_trylock (&pl_inode->mutex); + if (ret) + goto out; + { + count = __get_entrylk_count (this, pl_inode); + if (count) { + gf_proc_dump_write("entrylk-count", "%d", count); + __dump_entrylks (pl_inode); + } + + count = __get_inodelk_count (this, pl_inode, NULL); + if (count) { + gf_proc_dump_write("inodelk-count", "%d", count); + __dump_inodelks (pl_inode); + } + + count = __get_posixlk_count (this, pl_inode); + if (count) { + gf_proc_dump_write("posixlk-count", "%d", count); + __dump_posixlks (pl_inode); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + +out: + GF_FREE (pathname); + + if (ret && inode) { + if (!section_added) + gf_proc_dump_add_section ("xlator.features.locks.%s." + "inode", this->name); + gf_proc_dump_write ("Unable to print lock state", "(Lock " + "acquisition failure) %s", + uuid_utoa (inode->gfid)); + } + return ret; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) return ret; - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } - if (!pl_inode) - return -1; + return ret; +} - gf_proc_dump_build_key(key, - "xlator.feature.locks.inode", - "%ld.mandatory",inode->ino); - gf_proc_dump_write(key, "%d", pl_inode->mandatory); +pl_ctx_t* +pl_ctx_get (client_t *client, xlator_t *xlator) +{ + void *tmp = NULL; + pl_ctx_t *ctx = NULL; - count = get_entrylk_count (this, inode); - gf_proc_dump_build_key(key, - "xlator.feature.locks.entrylk-count", - "%ld.entrylk-count", inode->ino); - gf_proc_dump_write(key, "%d", count); + client_ctx_get (client, xlator, &tmp); - dump_entrylks(pl_inode); + ctx = tmp; - count = get_inodelk_count (this, inode); - gf_proc_dump_build_key(key, - "xlator.feature.locks.inodelk-count", - "%ld.inodelk-count", inode->ino); - gf_proc_dump_write(key, "%d", count); + if (ctx != NULL) + goto out; - dump_inodelks(pl_inode); + ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t); - count = get_posixlk_count (this, inode); - gf_proc_dump_build_key(key, - "xlator.feature.locks.posixlk-count", - "%ld.posixlk-count", inode->ino); - gf_proc_dump_write(key, "%d", count); + if (ctx == NULL) + goto out; - dump_posixlks(pl_inode); + ctx->ltable = pl_lock_table_new(); + if (ctx->ltable == NULL) { + GF_FREE (ctx); + ctx = NULL; + goto out; + } - return 0; + LOCK_INIT (&ctx->ltable_lock); + + if (client_ctx_set (client, xlator, ctx) != 0) { + LOCK_DESTROY (&ctx->ltable_lock); + GF_FREE (ctx->ltable); + GF_FREE (ctx); + ctx = NULL; + } +out: + return ctx; } +static void +ltable_delete_locks (struct _lock_table *ltable) +{ + struct _locker *locker = NULL; + struct _locker *tmp = NULL; + + list_for_each_entry_safe (locker, tmp, <able->inodelk_lockers, lockers) { + if (locker->fd) + pl_del_locker (ltable, locker->volume, &locker->loc, + locker->fd, &locker->owner, + GF_FOP_INODELK); + GF_FREE (locker->volume); + GF_FREE (locker); + } + + list_for_each_entry_safe (locker, tmp, <able->entrylk_lockers, lockers) { + if (locker->fd) + pl_del_locker (ltable, locker->volume, &locker->loc, + locker->fd, &locker->owner, + GF_FOP_ENTRYLK); + GF_FREE (locker->volume); + GF_FREE (locker); + } + GF_FREE (ltable); +} -/* - * pl_dump_inode - inode dump function for posix locks - * - */ -int -pl_dump_inode (xlator_t *this) +static int32_t +destroy_cbk (xlator_t *this, client_t *client) { + void *tmp = NULL; + pl_ctx_t *locks_ctx = NULL; - assert(this); + client_ctx_del (client, this, &tmp); - if (this->itable) { - inode_table_dump(this->itable, - "xlator.features.locks.inode_table"); - } + if (tmp == NULL) + return 0 +; + locks_ctx = tmp; + if (locks_ctx->ltable) + ltable_delete_locks (locks_ctx->ltable); + + LOCK_DESTROY (&locks_ctx->ltable_lock); + GF_FREE (locks_ctx); return 0; } +static int32_t +disconnect_cbk (xlator_t *this, client_t *client) +{ + int32_t ret = 0; + pl_ctx_t *locks_ctx = NULL; + struct _lock_table *ltable = NULL; + + locks_ctx = pl_ctx_get (client, this); + if (locks_ctx == NULL) { + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto out; + } + + LOCK (&locks_ctx->ltable_lock); + { + if (locks_ctx->ltable) { + ltable = locks_ctx->ltable; + locks_ctx->ltable = pl_lock_table_new (); + } + } + UNLOCK (&locks_ctx->ltable_lock); + + if (ltable) + ltable_delete_locks (ltable); + +out: + return ret; +} + int init (xlator_t *this) @@ -1445,12 +2629,13 @@ init (xlator_t *this) posix_locks_private_t *priv = NULL; xlator_list_t *trav = NULL; data_t *mandatory = NULL; - data_t *trace = NULL; + data_t *trace = NULL; + int ret = -1; if (!this->children || this->children->next) { gf_log (this->name, GF_LOG_CRITICAL, "FATAL: posix-locks should have exactly one child"); - return -1; + goto out; } if (!this->parents) { @@ -1466,28 +2651,43 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_CRITICAL, "'locks' translator is not loaded over a storage " "translator"); - return -1; + goto out; } - priv = CALLOC (1, sizeof (*priv)); + priv = GF_CALLOC (1, sizeof (*priv), + gf_locks_mt_posix_locks_private_t); mandatory = dict_get (this->options, "mandatory-locks"); if (mandatory) gf_log (this->name, GF_LOG_WARNING, "mandatory locks not supported in this minor release."); - trace = dict_get (this->options, "trace"); - if (trace) { - if (gf_string2boolean (trace->data, - &priv->trace) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "'trace' takes on only boolean values."); - return -1; - } - } + trace = dict_get (this->options, "trace"); + if (trace) { + if (gf_string2boolean (trace->data, + &priv->trace) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'trace' takes on only boolean values."); + goto out; + } + } + + this->local_pool = mem_pool_new (pl_local_t, 32); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } this->private = priv; - return 0; + ret = 0; + +out: + if (ret) { + GF_FREE (priv); + } + return ret; } @@ -1497,7 +2697,11 @@ fini (xlator_t *this) posix_locks_private_t *priv = NULL; priv = this->private; - free (priv); + if (!priv) + return 0; + this->private = NULL; + GF_FREE (priv->brickname); + GF_FREE (priv); return 0; } @@ -1505,21 +2709,23 @@ fini (xlator_t *this) int pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *flock); + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, + dict_t *xdata); int pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *flock); + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata); int pl_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type); + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); int pl_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type); + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); struct xlator_fops fops = { .lookup = pl_lookup, @@ -1536,10 +2742,10 @@ struct xlator_fops fops = { .fentrylk = pl_fentrylk, .flush = pl_flush, .opendir = pl_opendir, -}; - - -struct xlator_mops mops = { + .readdirp = pl_readdirp, + .getxattr = pl_getxattr, + .fgetxattr = pl_fgetxattr, + .fsetxattr = pl_fsetxattr, }; struct xlator_dumpops dumpops = { @@ -1547,8 +2753,11 @@ struct xlator_dumpops dumpops = { }; struct xlator_cbks cbks = { - .forget = pl_forget, - .release = pl_release, + .forget = pl_forget, + .release = pl_release, + .releasedir = pl_releasedir, + .client_destroy = destroy_cbk, + .client_disconnect = disconnect_cbk, }; @@ -1556,8 +2765,8 @@ struct volume_options options[] = { { .key = { "mandatory-locks", "mandatory" }, .type = GF_OPTION_TYPE_BOOL }, - { .key = { "trace" }, - .type = GF_OPTION_TYPE_BOOL - }, + { .key = { "trace" }, + .type = GF_OPTION_TYPE_BOOL + }, { .key = {NULL} }, }; diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c new file mode 100644 index 000000000..11abd26d8 --- /dev/null +++ b/xlators/features/locks/src/reservelk.c @@ -0,0 +1,443 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +void +__delete_reserve_lock (posix_lock_t *lock) +{ + list_del (&lock->list); +} + +void +__destroy_reserve_lock (posix_lock_t *lock) +{ + GF_FREE (lock); +} + +/* Return true if the two reservelks have exactly same lock boundaries */ +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2) +{ + if ((l1->fl_start == l2->fl_start) && + (l1->fl_end == l2->fl_end)) + return 1; + + return 0; +} + +/* Determine if lock is grantable or not */ +static posix_lock_t * +__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + xlator_t *this = NULL; + posix_lock_t *l = NULL; + posix_lock_t *ret_lock = NULL; + + this = THIS; + + if (list_empty (&pl_inode->reservelk_list)) { + gf_log (this->name, GF_LOG_TRACE, + "No reservelks in list"); + goto out; + } + list_for_each_entry (l, &pl_inode->reservelk_list, list){ + if (reservelks_equal (lock, l)) { + ret_lock = l; + break; + } + } +out: + return ret_lock; +} + +static inline int +__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2) +{ + return (is_same_lkowner (&l1->owner, &l2->owner)); + +} + +static posix_lock_t * +__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + + if (list_empty (&pl_inode->reservelk_list)) { + gf_log ("posix-locks", GF_LOG_TRACE, + "reservelk list empty"); + return NULL; + } + + list_for_each_entry (l, &pl_inode->reservelk_list, list) { + if (reservelks_equal (l, lock)) { + gf_log ("posix-locks", GF_LOG_TRACE, + "equal reservelk found"); + break; + } + } + + return l; +} + +static int +__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock) +{ + posix_lock_t *conf = NULL; + int ret = 0; + + conf = __matching_reservelk (pl_inode, lock); + if (conf) { + gf_log (this->name, GF_LOG_TRACE, + "Matching reservelk found"); + if (__same_owner_reservelk (lock, conf)) { + list_del_init (&conf->list); + gf_log (this->name, GF_LOG_TRACE, + "Removing the matching reservelk for setlk to progress"); + GF_FREE (conf); + ret = 0; + } else { + gf_log (this->name, GF_LOG_TRACE, + "Conflicting reservelk found"); + ret = 1; + } + + } + return ret; + +} + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock, int can_block) +{ + int ret = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (__reservelk_conflict (this, pl_inode, lock)) { + gf_log (this->name, GF_LOG_TRACE, + "Found conflicting reservelk. Blocking until reservelk is unlocked."); + lock->blocked = can_block; + list_add_tail (&lock->list, &pl_inode->blocked_calls); + ret = -1; + goto unlock; + } + + gf_log (this->name, GF_LOG_TRACE, + "no conflicting reservelk found. Call continuing"); + ret = 0; + + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + return ret; + +} + + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_reservelks. + */ +static int +__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + posix_lock_t *conf = NULL; + int ret = -EINVAL; + + conf = __reservelk_grantable (pl_inode, lock); + if (conf){ + ret = -EAGAIN; + if (can_block == 0) + goto out; + + list_add_tail (&lock->list, &pl_inode->blocked_reservelks); + + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + + list_add (&lock->list, &pl_inode->reservelk_list); + + ret = 0; + +out: + return ret; +} + +static posix_lock_t * +find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode) +{ + posix_lock_t *l = NULL; + list_for_each_entry (l, &pl_inode->reservelk_list, list) { + if (reservelks_equal (l, lock)) + return l; + } + return NULL; +} + +/* Set F_UNLCK removes a lock which has the exact same lock boundaries + * as the UNLCK lock specifies. If such a lock is not found, returns invalid + */ +static posix_lock_t * +__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) +{ + + posix_lock_t *conf = NULL; + + conf = find_matching_reservelk (lock, pl_inode); + if (!conf) { + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock not found for unlock"); + goto out; + } + __delete_reserve_lock (conf); + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock found for unlock"); + +out: + return conf; + + +} + +static void +__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + + list_del_init (&bl->list); + + bl_ret = __lock_reservelk (this, pl_inode, bl, 1); + + if (bl_ret == 0) { + list_add (&bl->list, granted); + } + } + return; +} + +/* Grant all reservelks blocked on lock(s) */ +void +grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + + INIT_LIST_HEAD (&granted); + + if (list_empty (&pl_inode->blocked_reservelks)) { + gf_log (this->name, GF_LOG_TRACE, + "No blocked locks to be granted"); + return; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_reserve_locks (this, pl_inode, &granted); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, list) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock, + NULL); + } + +} + +static void +__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + + list_del_init (&bl->list); + + bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked); + + if (bl_ret == 0) { + list_add_tail (&bl->list, granted); + } + } + return; +} + +void +grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + fd_t *fd = NULL; + + int can_block = 0; + int32_t cmd = 0; + int ret = 0; + + if (list_empty (&pl_inode->blocked_calls)) { + gf_log (this->name, GF_LOG_TRACE, + "No blocked lock calls to be granted"); + return; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_lock_calls (this, pl_inode, &granted); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, list) { + fd = fd_from_fdnum (lock); + + if (lock->blocked) { + can_block = 1; + cmd = F_SETLKW; + } + else + cmd = F_SETLK; + + lock->blocked = 0; + ret = pl_setlk (this, pl_inode, lock, can_block); + if (ret == -1) { + if (can_block) { + pl_trace_block (this, lock->frame, fd, NULL, + cmd, &lock->user_flock, NULL); + continue; + } else { + gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); + pl_trace_out (this, lock->frame, fd, NULL, cmd, + &lock->user_flock, -1, EAGAIN, NULL); + pl_update_refkeeper (this, fd->inode); + STACK_UNWIND_STRICT (lk, lock->frame, -1, + EAGAIN, &lock->user_flock, + NULL); + __destroy_lock (lock); + } + } + + } + +} + + +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *retlock = NULL; + int ret = -1; + + pthread_mutex_lock (&pl_inode->mutex); + { + retlock = __reserve_unlock_lock (this, lock, pl_inode); + if (!retlock) { + gf_log (this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + + gf_log (this->name, GF_LOG_TRACE, + "Reservelk Unlock successful"); + __destroy_reserve_lock (retlock); + ret = 0; + } +out: + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_reserve_locks (this, pl_inode); + grant_blocked_lock_calls (this, pl_inode); + + return ret; + +} + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = -EINVAL; + + pthread_mutex_lock (&pl_inode->mutex); + { + + ret = __lock_reservelk (this, pl_inode, lock, can_block); + if (ret < 0) + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + else + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->fl_start, + lock->fl_end); + + } + pthread_mutex_unlock (&pl_inode->mutex); + return ret; +} diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c index fc69ce8a9..d2cca32de 100644 --- a/xlators/features/locks/tests/unit-test.c +++ b/xlators/features/locks/tests/unit-test.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" diff --git a/xlators/features/mac-compat/Makefile.am b/xlators/features/mac-compat/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/mac-compat/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am new file mode 100644 index 000000000..f8567edce --- /dev/null +++ b/xlators/features/mac-compat/src/Makefile.am @@ -0,0 +1,14 @@ +xlator_LTLIBRARIES = mac-compat.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +mac_compat_la_LDFLAGS = -module -avoid-version + +mac_compat_la_SOURCES = mac-compat.c +mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c new file mode 100644 index 000000000..7cb550ad5 --- /dev/null +++ b/xlators/features/mac-compat/src/mac-compat.c @@ -0,0 +1,237 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "compat-errno.h" + + +enum apple_xattr { + GF_FINDER_INFO_XATTR, + GF_RESOURCE_FORK_XATTR, + GF_XATTR_ALL, + GF_XATTR_NONE +}; + +static char *apple_xattr_name[] = { + [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo", + [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork" +}; + +static const char *apple_xattr_value[] = { + [GF_FINDER_INFO_XATTR] = + /* 1 2 3 4 5 6 7 8 */ + "\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0", + [GF_RESOURCE_FORK_XATTR] = "" +}; + +static int32_t apple_xattr_len[] = { + [GF_FINDER_INFO_XATTR] = 32, + [GF_RESOURCE_FORK_XATTR] = 1 +}; + + +int32_t +maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + intptr_t ax = (intptr_t)this->private; + int i = 0; + + if ((ax == GF_XATTR_ALL && op_ret >= 0) || ax != GF_XATTR_NONE) { + op_ret = op_errno = 0; + + for (i = 0; i < GF_XATTR_ALL; i++) { + if (dict_get (dict, apple_xattr_name[i])) + continue; + + if (dict_set (dict, apple_xattr_name[i], + bin_to_data ((void *)apple_xattr_value[i], + apple_xattr_len[i])) == -1) { + op_ret = -1; + op_errno = ENOMEM; + + break; + } + } + } + + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + + return 0; +} + + +int32_t +maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + intptr_t ax = GF_XATTR_NONE; + int i = 0; + + if (name) { + for (i = 0; i < GF_XATTR_ALL; i++) { + if (strcmp (apple_xattr_name[i], name) == 0) { + ax = i; + + break; + } + } + } else + ax = GF_XATTR_ALL; + + this->private = (void *)ax; + + STACK_WIND (frame, maccomp_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, name, xdata); + return 0; +} + + +int32_t +maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + intptr_t ax = GF_XATTR_NONE; + int i = 0; + + if (name) { + for (i = 0; i < GF_XATTR_ALL; i++) { + if (strcmp (apple_xattr_name[i], name) == 0) { + ax = i; + + break; + } + } + } else + ax = GF_XATTR_ALL; + + this->private = (void *)ax; + + STACK_WIND (frame, maccomp_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, + fd, name, xdata); + return 0; +} + + +int32_t +maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + intptr_t ax = (intptr_t)this->private; + + if (op_ret == -1 && ax != GF_XATTR_NONE) + op_ret = op_errno = 0; + + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int32_t +maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + intptr_t ax = GF_XATTR_NONE; + int i = 0; + + for (i = 0; i < GF_XATTR_ALL; i++) { + if (dict_get (dict, apple_xattr_name[i])) { + ax = i; + + break; + } + } + + this->private = (void *)ax; + + STACK_WIND (frame, maccomp_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; +} + + +int32_t +maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + intptr_t ax = GF_XATTR_NONE; + int i = 0; + + for (i = 0; i < GF_XATTR_ALL; i++) { + if (dict_get (dict, apple_xattr_name[i])) { + ax = i; + + break; + } + } + + this->private = (void *)ax; + + STACK_WIND (frame, maccomp_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, + fd, dict, flags, xdata); + return 0; +} + + +int32_t +init (xlator_t *this) +{ + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + + +struct xlator_fops fops = { + .getxattr = maccomp_getxattr, + .fgetxattr = maccomp_fgetxattr, + .setxattr = maccomp_setxattr, + .fsetxattr = maccomp_fsetxattr, +}; + +struct xlator_cbks cbks; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/features/marker/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am new file mode 100644 index 000000000..a7c676472 --- /dev/null +++ b/xlators/features/marker/src/Makefile.am @@ -0,0 +1,17 @@ +xlator_LTLIBRARIES = marker.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +marker_la_LDFLAGS = -module -avoid-version + +marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c +marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/xlators/lib/src + +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c new file mode 100644 index 000000000..84a718add --- /dev/null +++ b/xlators/features/marker/src/marker-common.c @@ -0,0 +1,69 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include <fnmatch.h> +#include "marker-common.h" + +marker_inode_ctx_t * +marker_inode_ctx_new () +{ + marker_inode_ctx_t *ctx = NULL; + + ctx = GF_CALLOC (1, sizeof (marker_inode_ctx_t), + gf_marker_mt_marker_inode_ctx_t); + if (ctx == NULL) + goto out; + + ctx->quota_ctx = NULL; +out: + return ctx; +} + +int32_t +marker_force_inode_ctx_get (inode_t *inode, xlator_t *this, + marker_inode_ctx_t **ctx) +{ + int32_t ret = -1; + uint64_t ctx_int = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx_int); + if (ret == 0) + *ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int; + else { + *ctx = marker_inode_ctx_new (); + if (*ctx == NULL) + goto unlock; + + ret = __inode_ctx_put (inode, this, + (uint64_t )(unsigned long) *ctx); + if (ret == -1) { + GF_FREE (*ctx); + goto unlock; + } + ret = 0; + } + } +unlock: UNLOCK (&inode->lock); + + return ret; +} + +int +marker_filter_quota_xattr (dict_t *dict, char *key, + data_t *value, void *data) +{ + dict_del (dict, key); + return 0; +} diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h new file mode 100644 index 000000000..23dd846cb --- /dev/null +++ b/xlators/features/marker/src/marker-common.h @@ -0,0 +1,27 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _MARKER_COMMON_H +#define _MARKER_COMMON_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "inode.h" +#include "xlator.h" +#include "marker.h" + +int32_t +marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **); + +int +marker_filter_quota_xattr (dict_t *, char *, data_t *, void *); +#endif diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h new file mode 100644 index 000000000..1f74d5048 --- /dev/null +++ b/xlators/features/marker/src/marker-mem-types.h @@ -0,0 +1,25 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __MARKER_MEM_TYPES_H__ +#define __MARKER_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_marker_mem_types_ { + gf_marker_mt_marker_conf_t = gf_common_mt_end + 1, + gf_marker_mt_loc_t, + gf_marker_mt_volume_mark, + gf_marker_mt_int64_t, + gf_marker_mt_quota_inode_ctx_t, + gf_marker_mt_marker_inode_ctx_t, + gf_marker_mt_inode_contribution_t, + gf_marker_mt_end +}; +#endif diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c new file mode 100644 index 000000000..af5fed132 --- /dev/null +++ b/xlators/features/marker/src/marker-quota-helper.c @@ -0,0 +1,414 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "locking.h" +#include "marker-quota.h" +#include "marker-common.h" +#include "marker-quota-helper.h" +#include "marker-mem-types.h" + +int +mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("marker", loc, out); + GF_VALIDATE_OR_GOTO ("marker", inode, out); + GF_VALIDATE_OR_GOTO ("marker", path, out); + /* Not checking for parent because while filling + * loc of root, parent will be NULL + */ + + if (inode) { + loc->inode = inode_ref (inode); + } + + if (parent) + loc->parent = inode_ref (parent); + + loc->path = gf_strdup (path); + if (!loc->path) { + gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); + goto loc_wipe; + } + + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + else + goto loc_wipe; + + ret = 0; +loc_wipe: + if (ret < 0) + loc_wipe (loc); +out: + return ret; +} + + +int32_t +mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc) +{ + char *resolvedpath = NULL; + inode_t *parent = NULL; + int ret = -1; + + if ((!inode) || (!loc)) + return ret; + + if ((inode) && __is_root_gfid (inode->gfid)) { + loc->parent = NULL; + goto ignore_parent; + } + + if (parent_gfid == NULL) + parent = inode_parent (inode, 0, NULL); + else + parent = inode_find (inode->table, + (unsigned char *) parent_gfid); + + if (parent == NULL) + goto err; + +ignore_parent: + ret = inode_path (inode, NULL, &resolvedpath); + if (ret < 0) + goto err; + + ret = mq_loc_fill (loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; + +err: + if (parent) + inode_unref (parent); + + GF_FREE (resolvedpath); + + return ret; +} + + +quota_inode_ctx_t * +mq_alloc_inode_ctx () +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + + QUOTA_ALLOC (ctx, quota_inode_ctx_t, ret); + if (ret == -1) + goto out; + + ctx->size = 0; + ctx->dirty = 0; + ctx->updation_status = _gf_false; + LOCK_INIT (&ctx->lock); + INIT_LIST_HEAD (&ctx->contribution_head); +out: + return ctx; +} + +inode_contribution_t * +mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx) +{ + inode_contribution_t *contri = NULL; + inode_contribution_t *temp = NULL; + + if (!inode || !ctx) + goto out; + + list_for_each_entry (temp, &ctx->contribution_head, contri_list) { + if (uuid_compare (temp->gfid, inode->gfid) == 0) { + contri = temp; + goto out; + } + } +out: + return contri; +} + + +int32_t +mq_delete_contribution_node (dict_t *dict, char *key, + inode_contribution_t *contribution) +{ + if (dict_get (dict, key) != NULL) + goto out; + + QUOTA_FREE_CONTRIBUTION_NODE (contribution); +out: + return 0; +} + + +inode_contribution_t * +__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +{ + int32_t ret = 0; + inode_contribution_t *contribution = NULL; + + if (!loc->parent) { + if (!uuid_is_null (loc->pargfid)) + loc->parent = inode_find (loc->inode->table, + loc->pargfid); + if (!loc->parent) + loc->parent = inode_parent (loc->inode, loc->pargfid, + loc->name); + if (!loc->parent) + goto out; + } + + list_for_each_entry (contribution, &ctx->contribution_head, contri_list) { + if (loc->parent && + uuid_compare (contribution->gfid, loc->parent->gfid) == 0) { + goto out; + } + } + + QUOTA_ALLOC (contribution, inode_contribution_t, ret); + if (ret == -1) + goto out; + + contribution->contribution = 0; + + uuid_copy (contribution->gfid, loc->parent->gfid); + + LOCK_INIT (&contribution->lock); + INIT_LIST_HEAD (&contribution->contri_list); + + list_add_tail (&contribution->contri_list, &ctx->contribution_head); + +out: + return contribution; +} + + +inode_contribution_t * +mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +{ + inode_contribution_t *contribution = NULL; + + if ((ctx == NULL) || (loc == NULL)) + return NULL; + + if (strcmp (loc->path, "/") == 0) + return NULL; + + LOCK (&ctx->lock); + { + contribution = __mq_add_new_contribution_node (this, ctx, loc); + } + UNLOCK (&ctx->lock); + + return contribution; +} + + +int32_t +mq_dict_set_contribution (xlator_t *this, dict_t *dict, + loc_t *loc) +{ + int32_t ret = -1; + char contri_key [512] = {0, }; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", dict, out); + GF_VALIDATE_OR_GOTO ("marker", loc, out); + GF_VALIDATE_OR_GOTO ("marker", loc->parent, out); + + GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret); + if (ret < 0) { + ret = -1; + goto out; + } + + ret = dict_set_int64 (dict, contri_key, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "unable to set dict value on %s.", + loc->path); + goto out; + } + + ret = 0; +out: + return ret; +} + + +int32_t +mq_inode_ctx_get (inode_t *inode, xlator_t *this, + quota_inode_ctx_t **ctx) +{ + int32_t ret = -1; + uint64_t ctx_int = 0; + marker_inode_ctx_t *mark_ctx = NULL; + + GF_VALIDATE_OR_GOTO ("marker", inode, out); + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", ctx, out); + + ret = inode_ctx_get (inode, this, &ctx_int); + if (ret < 0) { + ret = -1; + *ctx = NULL; + goto out; + } + + mark_ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int; + if (mark_ctx->quota_ctx == NULL) { + ret = -1; + goto out; + } + + *ctx = mark_ctx->quota_ctx; + + ret = 0; + +out: + return ret; +} + + +quota_inode_ctx_t * +__mq_inode_ctx_new (inode_t *inode, xlator_t *this) +{ + int32_t ret = -1; + quota_inode_ctx_t *quota_ctx = NULL; + marker_inode_ctx_t *mark_ctx = NULL; + + ret = marker_force_inode_ctx_get (inode, this, &mark_ctx); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "marker_force_inode_ctx_get() failed"); + goto out; + } + + LOCK (&inode->lock); + { + if (mark_ctx->quota_ctx == NULL) { + quota_ctx = mq_alloc_inode_ctx (); + if (quota_ctx == NULL) { + ret = -1; + goto unlock; + } + mark_ctx->quota_ctx = quota_ctx; + } else { + quota_ctx = mark_ctx->quota_ctx; + } + + ret = 0; + } +unlock: + UNLOCK (&inode->lock); +out: + return quota_ctx; +} + + +quota_inode_ctx_t * +mq_inode_ctx_new (inode_t * inode, xlator_t *this) +{ + return __mq_inode_ctx_new (inode, this); +} + +quota_local_t * +mq_local_new () +{ + quota_local_t *local = NULL; + + local = mem_get0 (THIS->local_pool); + if (!local) + goto out; + + local->ref = 1; + LOCK_INIT (&local->lock); + + local->ctx = NULL; + local->contri = NULL; + +out: + return local; +} + +quota_local_t * +mq_local_ref (quota_local_t *local) +{ + LOCK (&local->lock); + { + local->ref ++; + } + UNLOCK (&local->lock); + + return local; +} + + +int32_t +mq_local_unref (xlator_t *this, quota_local_t *local) +{ + int32_t ref = 0; + if (local == NULL) + goto out; + + QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref); + + if (ref != 0) + goto out; + + if (local->fd != NULL) + fd_unref (local->fd); + + loc_wipe (&local->loc); + + loc_wipe (&local->parent_loc); + + LOCK_DESTROY (&local->lock); + + mem_put (local); +out: + return 0; +} + + +inode_contribution_t * +mq_get_contribution_from_loc (xlator_t *this, loc_t *loc) +{ + int32_t ret = 0; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (ret < 0) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "cannot get marker-quota context from inode " + "(gfid:%s, path:%s)", + uuid_utoa (loc->inode->gfid), loc->path); + goto err; + } + + contribution = mq_get_contribution_node (loc->parent, ctx); + if (contribution == NULL) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "inode (gfid:%s, path:%s) has " + "no contribution towards parent (gfid:%s)", + uuid_utoa (loc->inode->gfid), + loc->path, uuid_utoa (loc->parent->gfid)); + goto err; + } + +err: + return contribution; +} diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h new file mode 100644 index 000000000..6cdd14881 --- /dev/null +++ b/xlators/features/marker/src/marker-quota-helper.h @@ -0,0 +1,76 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _MARKER_QUOTA_HELPER_H +#define _MARKER_QUOTA_HELPER + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "marker.h" + +#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \ + do { \ + list_del (&_contribution->contri_list); \ + GF_FREE (_contribution); \ + } while (0) + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK (lock); \ + var ++; \ + UNLOCK (lock); \ + } while (0) + +#define QUOTA_SAFE_DECREMENT(lock, var, value) \ + do { \ + LOCK (lock); \ + { \ + value = --var; \ + } \ + UNLOCK (lock); \ + } while (0) + +inode_contribution_t * +mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *); + +int32_t +mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *); + +quota_inode_ctx_t * +mq_inode_ctx_new (inode_t *, xlator_t *); + +int32_t +mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **); + +int32_t +mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *); + +int32_t +mq_inode_loc_fill (const char *, inode_t *, loc_t *); + +quota_local_t * +mq_local_new (); + +quota_local_t * +mq_local_ref (quota_local_t *); + +int32_t +mq_local_unref (xlator_t *, quota_local_t *); + +inode_contribution_t * +mq_get_contribution_node (inode_t *, quota_inode_ctx_t *); + +inode_contribution_t * +mq_get_contribution_from_loc (xlator_t *this, loc_t *loc); + +#endif diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c new file mode 100644 index 000000000..6f9af6e13 --- /dev/null +++ b/xlators/features/marker/src/marker-quota.c @@ -0,0 +1,2520 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dict.h" +#include "xlator.h" +#include "defaults.h" +#include "libxlator.h" +#include "common-utils.h" +#include "byte-order.h" +#include "marker-quota.h" +#include "marker-quota-helper.h" + +int +mq_loc_copy (loc_t *dst, loc_t *src) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("marker", dst, out); + GF_VALIDATE_OR_GOTO ("marker", src, out); + + if (src->inode == NULL || + src->path == NULL) { + gf_log ("marker", GF_LOG_WARNING, + "src loc is not valid"); + goto out; + } + + ret = loc_copy (dst, src); +out: + return ret; +} + +int32_t +mq_get_local_err (quota_local_t *local, + int32_t *val) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("marker", local, out); + GF_VALIDATE_OR_GOTO ("marker", val, out); + + LOCK (&local->lock); + { + *val = local->err; + } + UNLOCK (&local->lock); + + ret = 0; +out: + return ret; +} + +int32_t +mq_get_ctx_updation_status (quota_inode_ctx_t *ctx, + gf_boolean_t *status) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO ("marker", status, out); + + LOCK (&ctx->lock); + { + *status = ctx->updation_status; + } + UNLOCK (&ctx->lock); + + ret = 0; +out: + return ret; +} + + +int32_t +mq_set_ctx_updation_status (quota_inode_ctx_t *ctx, + gf_boolean_t status) +{ + int32_t ret = -1; + + if (ctx == NULL) + goto out; + + LOCK (&ctx->lock); + { + ctx->updation_status = status; + } + UNLOCK (&ctx->lock); + + ret = 0; +out: + return ret; +} + +int32_t +mq_test_and_set_ctx_updation_status (quota_inode_ctx_t *ctx, + gf_boolean_t *status) +{ + int32_t ret = -1; + gf_boolean_t temp = _gf_false; + + GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO ("marker", status, out); + + LOCK (&ctx->lock); + { + temp = *status; + *status = ctx->updation_status; + ctx->updation_status = temp; + } + UNLOCK (&ctx->lock); + + ret = 0; +out: + return ret; +} + +void +mq_assign_lk_owner (xlator_t *this, call_frame_t *frame) +{ + marker_conf_t *conf = NULL; + uint64_t lk_owner = 0; + + conf = this->private; + + LOCK (&conf->lock); + { + if (++conf->quota_lk_owner == 0) { + ++conf->quota_lk_owner; + } + + lk_owner = conf->quota_lk_owner; + } + UNLOCK (&conf->lock); + + set_lk_owner_from_uint64 (&frame->root->lk_owner, lk_owner); + + return; +} + + +int32_t +mq_loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc, + uint64_t ino, char *name) +{ + int32_t ret = -1; + int32_t len = 0; + char *path = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", newloc, out); + GF_VALIDATE_OR_GOTO ("marker", oldloc, out); + GF_VALIDATE_OR_GOTO ("marker", name, out); + + newloc->inode = inode_new (oldloc->inode->table); + + if (!newloc->inode) { + ret = -1; + goto out; + } + + newloc->parent = inode_ref (oldloc->inode); + uuid_copy (newloc->pargfid, oldloc->inode->gfid); + + len = strlen (oldloc->path); + + if (oldloc->path [len - 1] == '/') + ret = gf_asprintf ((char **) &path, "%s%s", + oldloc->path, name); + else + ret = gf_asprintf ((char **) &path, "%s/%s", + oldloc->path, name); + + if (ret < 0) + goto out; + + newloc->path = path; + + newloc->name = strrchr (newloc->path, '/'); + + if (newloc->name) + newloc->name++; + + gf_log (this->name, GF_LOG_DEBUG, "path = %s name =%s", + newloc->path, newloc->name); +out: + return ret; +} + +int32_t +mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + QUOTA_STACK_DESTROY (frame, this); + + return 0; +} + +int32_t +mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct gf_flock lock = {0, }; + quota_local_t *local = NULL; + loc_t loc = {0, }; + int ret = -1; + + local = frame->local; + + if (op_ret == -1) { + local->err = -1; + + mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL); + + return 0; + } + + if (op_ret == 0) + local->ctx->dirty = 0; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + ret = loc_copy (&loc, &local->loc); + if (ret == -1) { + local->err = -1; + frame->local = NULL; + mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL); + return 0; + } + + if (local->loc.inode == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "Inode is NULL, so can't stackwind."); + goto out; + } + + STACK_WIND (frame, + mq_dirty_inode_updation_done, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &loc, F_SETLKW, &lock, NULL); + + loc_wipe (&loc); + + return 0; +out: + mq_dirty_inode_updation_done (frame, NULL, this, -1, 0, NULL); + + return 0; +} + +int32_t +mq_mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int32_t ret = -1; + int64_t *size = NULL; + dict_t *newdict = NULL; + quota_local_t *local = NULL; + + local = (quota_local_t *) frame->local; + + if (op_ret == -1) + goto err; + + if (!dict) + goto wind; + + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (ret) + goto wind; + + LOCK (&local->ctx->lock); + { + local->ctx->size = ntoh64 (*size); + } + UNLOCK (&local->ctx->lock); + +wind: + newdict = dict_new (); + if (!newdict) + goto err; + + ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0); + if (ret) + goto err; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + STACK_WIND (frame, mq_release_lock_on_dirty_inode, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + &local->loc, newdict, 0, NULL); + ret = 0; + +err: + if (op_ret == -1 || ret == -1) { + local->err = -1; + + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + } + + if (newdict) + dict_unref (newdict); + + return 0; +} + +int32_t +mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + int32_t ret = -1; + dict_t *new_dict = NULL; + int64_t *size = NULL; + int64_t *delta = NULL; + quota_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) + goto err; + + if (dict == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "Dict is null while updating the size xattr %s", + local->loc.path?local->loc.path:""); + goto err; + } + + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (!size) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get the size, %s", + local->loc.path?local->loc.path:""); + goto err; + } + + QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err); + + *delta = hton64 (local->sum - ntoh64 (*size)); + + gf_log (this->name, GF_LOG_DEBUG, "calculated size = %"PRId64", " + "original size = %"PRIu64 + " path = %s diff = %"PRIu64, local->sum, ntoh64 (*size), + local->loc.path, ntoh64 (*delta)); + + new_dict = dict_new (); + if (!new_dict); + + ret = dict_set_bin (new_dict, QUOTA_SIZE_KEY, delta, 8); + if (ret) + goto err; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, mq_mark_inode_undirty, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, &local->loc, + GF_XATTROP_ADD_ARRAY64, new_dict, NULL); + + ret = 0; + +err: + if (op_ret == -1 || ret == -1) { + local->err = -1; + + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + } + + if (new_dict) + dict_unref (new_dict); + + return 0; +} + +int32_t +mq_test_and_set_local_err(quota_local_t *local, + int32_t *val) +{ + int tmp = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("marker", local, out); + GF_VALIDATE_OR_GOTO ("marker", val, out); + + LOCK (&local->lock); + { + tmp = local->err; + local->err = *val; + *val = tmp; + } + UNLOCK (&local->lock); + + ret = 0; +out: + return ret; +} + +int32_t +mq_get_dirty_inode_size (call_frame_t *frame, xlator_t *this) +{ + int32_t ret = -1; + dict_t *dict = NULL; + quota_local_t *local = NULL; + + local = (quota_local_t *) frame->local; + + dict = dict_new (); + if (!dict) { + ret = -1; + goto err; + } + + ret = dict_set_int64 (dict, QUOTA_SIZE_KEY, 0); + if (ret) + goto err; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, mq_update_size_xattr, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->loc, dict); + ret =0; + +err: + if (ret) { + local->err = -1; + + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + } + + if (dict) + dict_unref (dict); + + return 0; +} + +int32_t +mq_get_child_contribution (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct iatt *buf, + dict_t *dict, + struct iatt *postparent) +{ + int32_t ret = -1; + int32_t val = 0; + char contri_key [512] = {0, }; + int64_t *contri = NULL; + quota_local_t *local = NULL; + + local = frame->local; + + frame->local = NULL; + + QUOTA_STACK_DESTROY (frame, this); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "%s", + strerror (op_errno)); + val = -2; + if (!mq_test_and_set_local_err (local, &val) && + val != -2) + mq_release_lock_on_dirty_inode (local->frame, NULL, + this, 0, 0, NULL); + + goto exit; + } + + ret = mq_get_local_err (local, &val); + if (!ret && val == -2) + goto exit; + + GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret); + if (ret < 0) + goto out; + + if (!dict) + goto out; + + if (dict_get_bin (dict, contri_key, (void **) &contri) == 0) + local->sum += ntoh64 (*contri); + +out: + LOCK (&local->lock); + { + val = --local->dentry_child_count; + } + UNLOCK (&local->lock); + + if (val == 0) { + mq_dirty_inode_readdir (local->frame, NULL, this, + 0, 0, NULL, NULL); + } + mq_local_unref (this, local); + + return 0; +exit: + mq_local_unref (this, local); + return 0; +} + +int32_t +mq_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries, dict_t *xdata) +{ + char contri_key [512] = {0, }; + int32_t ret = 0; + int32_t val = 0; + off_t offset = 0; + int32_t count = 0; + dict_t *dict = NULL; + quota_local_t *local = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *newframe = NULL; + loc_t loc = {0, }; + + local = mq_local_ref (frame->local); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "readdir failed %s", strerror (op_errno)); + local->err = -1; + + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + + goto end; + } else if (op_ret == 0) { + mq_get_dirty_inode_size (frame, this); + + goto end; + } + + local->dentry_child_count = 0; + + list_for_each_entry (entry, (&entries->list), list) { + gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name); + + if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name, + ".."))) { + gf_log (this->name, GF_LOG_DEBUG, "entry = %s", + entry->d_name); + continue; + } + + offset = entry->d_off; + count++; + } + + if (count == 0) { + mq_get_dirty_inode_size (frame, this); + goto end; + + } + + local->frame = frame; + + LOCK (&local->lock); + { + local->dentry_child_count = count; + local->d_off = offset; + } + UNLOCK (&local->lock); + + + list_for_each_entry (entry, (&entries->list), list) { + gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name); + + if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name, + ".."))) { + gf_log (this->name, GF_LOG_DEBUG, "entry = %s", + entry->d_name); + continue; + } + + ret = mq_loc_fill_from_name (this, &loc, &local->loc, + entry->d_ino, entry->d_name); + if (ret < 0) + goto out; + + ret = 0; + + LOCK (&local->lock); + { + if (local->err != -2) { + newframe = copy_frame (frame); + if (!newframe) { + ret = -1; + } + } else + ret = -1; + } + UNLOCK (&local->lock); + + if (ret == -1) + goto out; + + newframe->local = mq_local_ref (local); + + dict = dict_new (); + if (!dict) { + ret = -1; + goto out; + } + + GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret); + if (ret < 0) + goto out; + + ret = dict_set_int64 (dict, contri_key, 0); + if (ret) + goto out; + + STACK_WIND (newframe, + mq_get_child_contribution, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + &loc, dict); + + offset = entry->d_off; + + loc_wipe (&loc); + + newframe = NULL; + + out: + if (dict) { + dict_unref (dict); + dict = NULL; + } + + if (ret) { + val = -2; + mq_test_and_set_local_err (local, &val); + + if (newframe) { + newframe->local = NULL; + mq_local_unref(this, local); + QUOTA_STACK_DESTROY (newframe, this); + } + + break; + } + } + + if (ret && val != -2) { + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + } +end: + mq_local_unref (this, local); + + return 0; +} + +int32_t +mq_dirty_inode_readdir (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, dict_t *xdata) +{ + quota_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) { + local->err = -1; + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + return 0; + } + + if (local->fd == NULL) + local->fd = fd_ref (fd); + + STACK_WIND (frame, + mq_readdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, + local->fd, READDIR_BUF, local->d_off, xdata); + + return 0; +} + +int32_t +mq_check_if_still_dirty (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct iatt *buf, + dict_t *dict, + struct iatt *postparent) +{ + int8_t dirty = -1; + int32_t ret = -1; + fd_t *fd = NULL; + quota_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "failed to get " + "the dirty xattr for %s", local->loc.path); + goto err; + } + + if (!dict) { + ret = -1; + goto err; + } + + ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); + if (ret) + goto err; + + //the inode is not dirty anymore + if (dirty == 0) { + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + + return 0; + } + + fd = fd_create (local->loc.inode, frame->root->pid); + + local->d_off = 0; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + GF_UUID_ASSERT (local->loc.gfid); + STACK_WIND(frame, + mq_dirty_inode_readdir, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + &local->loc, fd, NULL); + + ret = 0; + +err: + if (op_ret == -1 || ret == -1) { + local->err = -1; + mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); + } + + if (fd != NULL) { + fd_unref (fd); + } + + return 0; +} + +int32_t +mq_get_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = -1; + dict_t *xattr_req = NULL; + quota_local_t *local = NULL; + + if (op_ret == -1) { + mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL); + return 0; + } + + local = frame->local; + + xattr_req = dict_new (); + if (xattr_req == NULL) { + ret = -1; + goto err; + } + + ret = dict_set_int8 (xattr_req, QUOTA_DIRTY_KEY, 0); + if (ret) + goto err; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, + mq_check_if_still_dirty, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + &local->loc, + xattr_req); + ret = 0; + +err: + if (ret) { + local->err = -1; + mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0, NULL); + } + + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + +/* return 1 when dirty updation started + * 0 other wise + */ +int32_t +mq_update_dirty_inode (xlator_t *this, + loc_t *loc, + quota_inode_ctx_t *ctx, + inode_contribution_t *contribution) +{ + int32_t ret = -1; + quota_local_t *local = NULL; + gf_boolean_t status = _gf_false; + struct gf_flock lock = {0, }; + call_frame_t *frame = NULL; + + ret = mq_get_ctx_updation_status (ctx, &status); + if (ret == -1 || status == _gf_true) { + ret = 0; + goto out; + } + + frame = create_frame (this, this->ctx->pool); + if (frame == NULL) { + ret = -1; + goto out; + } + + mq_assign_lk_owner (this, frame); + + local = mq_local_new (); + if (local == NULL) + goto fr_destroy; + + frame->local = local; + ret = mq_loc_copy (&local->loc, loc); + if (ret < 0) + goto fr_destroy; + + local->ctx = ctx; + + local->contri = contribution; + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + if (local->loc.inode == NULL) { + ret = -1; + gf_log (this->name, GF_LOG_WARNING, + "Inode is NULL, so can't stackwind."); + goto fr_destroy; + } + + STACK_WIND (frame, + mq_get_dirty_xattr, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->loc, F_SETLKW, &lock, NULL); + return 1; + +fr_destroy: + QUOTA_STACK_DESTROY (frame, this); +out: + + return 0; +} + + +int32_t +mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + quota_local_t *local = NULL; + + if (frame == NULL) + return 0; + + local = frame->local; + + if (local != NULL) { + mq_initiate_quota_txn (this, &local->loc); + } + + QUOTA_STACK_DESTROY (frame, this); + + return 0; +} + + +int32_t +mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + struct gf_flock lock = {0, }; + quota_local_t *local = NULL; + + local = frame->local; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + STACK_WIND (frame, + mq_inode_creation_done, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->loc, + F_SETLKW, &lock, NULL); + + return 0; +} + + +int32_t +mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int32_t ret = -1; + dict_t *newdict = NULL; + quota_local_t *local = NULL; + + if (op_ret < 0) { + goto err; + } + + local = frame->local; + + if (local->loc.inode->ia_type == IA_IFDIR) { + newdict = dict_new (); + if (!newdict) { + goto err; + } + + ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0); + if (ret == -1) { + goto err; + } + + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, mq_xattr_creation_release_lock, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + &local->loc, newdict, 0, NULL); + } else { + mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL); + } + + ret = 0; + +err: + if (ret < 0) { + mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL); + } + + if (newdict != NULL) + dict_unref (newdict); + + return 0; +} + + +int32_t +mq_create_xattr (xlator_t *this, call_frame_t *frame) +{ + int32_t ret = 0; + int64_t *value = NULL; + int64_t *size = NULL; + dict_t *dict = NULL; + char key[512] = {0, }; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contri = NULL; + + if (frame == NULL || this == NULL) + return 0; + + local = frame->local; + + ret = mq_inode_ctx_get (local->loc.inode, this, &ctx); + if (ret < 0) { + ctx = mq_inode_ctx_new (local->loc.inode, this); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "mq_inode_ctx_new failed"); + ret = -1; + goto out; + } + } + + dict = dict_new (); + if (!dict) + goto out; + + if (local->loc.inode->ia_type == IA_IFDIR) { + QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err); + ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8); + if (ret < 0) + goto free_size; + } + + if (strcmp (local->loc.path, "/") != 0) { + contri = mq_add_new_contribution_node (this, ctx, &local->loc); + if (contri == NULL) + goto err; + + QUOTA_ALLOC_OR_GOTO (value, int64_t, ret, err); + GET_CONTRI_KEY (key, local->loc.parent->gfid, ret); + + ret = dict_set_bin (dict, key, value, 8); + if (ret < 0) + goto free_value; + } + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, mq_create_dirty_xattr, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, &local->loc, + GF_XATTROP_ADD_ARRAY64, dict, NULL); + ret = 0; + +free_size: + if (ret < 0) { + GF_FREE (size); + } + +free_value: + if (ret < 0) { + GF_FREE (value); + } + +err: + dict_unref (dict); + +out: + if (ret < 0) { + mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL); + } + + return 0; +} + + +int32_t +mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *dict, + struct iatt *postparent) +{ + quota_local_t *local = NULL; + int64_t *size = NULL, *contri = NULL; + int8_t dirty = 0; + int32_t ret = 0; + char contri_key[512] = {0, }; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (ret < 0) + goto create_xattr; + + ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); + if (ret < 0) + goto create_xattr; + + //check contribution xattr if not root + if (strcmp (local->loc.path, "/") != 0) { + GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); + if (ret < 0) + goto out; + + ret = dict_get_bin (dict, contri_key, (void **) &contri); + if (ret < 0) + goto create_xattr; + } + +out: + mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL); + return 0; + +create_xattr: + if (uuid_is_null (local->loc.gfid)) { + uuid_copy (local->loc.gfid, buf->ia_gfid); + } + + mq_create_xattr (this, frame); + return 0; +} + + +int32_t +mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dict_t *xattr_req = NULL; + quota_local_t *local = NULL; + int32_t ret = 0; + + if (op_ret < 0) { + goto lock_err; + } + + local = frame->local; + + xattr_req = dict_new (); + if (xattr_req == NULL) { + goto err; + } + + ret = mq_req_xattr (this, &local->loc, xattr_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "cannot request xattr"); + goto err; + } + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, mq_check_n_set_inode_xattr, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->loc, xattr_req); + + dict_unref (xattr_req); + + return 0; + +err: + mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL); + + if (xattr_req) + dict_unref (xattr_req); + return 0; + +lock_err: + mq_inode_creation_done (frame, NULL, this, 0, 0, NULL); + return 0; +} + + +int32_t +mq_set_inode_xattr (xlator_t *this, loc_t *loc) +{ + struct gf_flock lock = {0, }; + quota_local_t *local = NULL; + int32_t ret = 0; + call_frame_t *frame = NULL; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + ret = -1; + goto err; + } + + local = mq_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->loc, loc); + if (ret < 0) { + goto err; + } + + frame->local = local; + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + STACK_WIND (frame, + mq_get_xattr, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->loc, F_SETLKW, &lock, NULL); + + return 0; + +err: + QUOTA_STACK_DESTROY (frame, this); + + return 0; +} + + +int32_t +mq_get_parent_inode_local (xlator_t *this, quota_local_t *local) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", local, out); + + local->contri = NULL; + + loc_wipe (&local->loc); + + ret = mq_loc_copy (&local->loc, &local->parent_loc); + if (ret < 0) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "loc copy failed"); + goto out; + } + + loc_wipe (&local->parent_loc); + + ret = mq_inode_loc_fill (NULL, local->loc.parent, + &local->parent_loc); + if (ret < 0) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "failed to build parent loc of %s", + local->loc.path); + goto out; + } + + ret = mq_inode_ctx_get (local->loc.inode, this, &ctx); + if (ret < 0) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "inode ctx get failed"); + goto out; + } + + local->ctx = ctx; + + if (list_empty (&ctx->contribution_head)) { + gf_log_callingfn (this->name, GF_LOG_WARNING, + "contribution node list is empty which " + "is an error"); + ret = -1; + goto out; + } + + local->contri = (inode_contribution_t *) ctx->contribution_head.next; + + ret = 0; +out: + return ret; +} + + +int32_t +mq_xattr_updation_done (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict, dict_t *xdata) +{ + QUOTA_STACK_DESTROY (frame, this); + return 0; +} + + +int32_t +mq_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = 0; + gf_boolean_t status = _gf_false; + quota_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1 || local->err) { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking failed on path (%s)(%s)", + local->parent_loc.path, strerror (op_errno)); + } + mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL); + + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "inodelk released on %s", local->parent_loc.path); + + if ((strcmp (local->parent_loc.path, "/") == 0) + || (local->delta == 0)) { + mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL); + } else { + ret = mq_get_parent_inode_local (this, local); + if (ret < 0) { + mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, + NULL); + goto out; + } + status = _gf_true; + + ret = mq_test_and_set_ctx_updation_status (local->ctx, &status); + if (ret == 0 && status == _gf_false) { + mq_get_lock_on_parent (frame, this); + } else { + mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, + NULL); + } + } +out: + return 0; +} + + +//now release lock on the parent inode +int32_t +mq_release_parent_lock (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + int32_t ret = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + struct gf_flock lock = {0, }; + + local = frame->local; + + if (local->err != 0) { + gf_log_callingfn (this->name, + (local->err == ENOENT) ? GF_LOG_DEBUG + : GF_LOG_WARNING, + "An operation during quota updation " + "of path (%s) failed (%s)", local->loc.path, + strerror (local->err)); + } + + ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); + if (ret < 0) + goto wind; + + LOCK (&ctx->lock); + { + ctx->dirty = 0; + } + UNLOCK (&ctx->lock); + + if (local->parent_loc.inode == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "Invalid parent inode."); + goto err; + } + +wind: + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + STACK_WIND (frame, + mq_inodelk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->parent_loc, + F_SETLKW, &lock, NULL); + + return 0; +err: + mq_xattr_updation_done (frame, NULL, this, + 0, 0 , NULL, NULL); + return 0; +} + + +int32_t +mq_mark_undirty (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict, dict_t *xdata) +{ + int32_t ret = -1; + int64_t *size = NULL; + dict_t *newdict = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, "%s occurred while" + " updating the size of %s", strerror (op_errno), + local->parent_loc.path); + + goto err; + } + + //update the size of the parent inode + if (dict != NULL) { + ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + LOCK (&ctx->lock); + { + if (size) + ctx->size = ntoh64 (*size); + gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64, + local->parent_loc.path, ctx->size); + } + UNLOCK (&ctx->lock); + } + + newdict = dict_new (); + if (!newdict) { + op_errno = ENOMEM; + goto err; + } + + ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0); + + if (ret == -1) { + op_errno = -ret; + goto err; + } + + uuid_copy (local->parent_loc.gfid, local->parent_loc.inode->gfid); + GF_UUID_ASSERT (local->parent_loc.gfid); + + STACK_WIND (frame, mq_release_parent_lock, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + &local->parent_loc, newdict, 0, NULL); + + ret = 0; +err: + if (op_ret == -1 || ret == -1) { + local->err = op_errno; + + mq_release_parent_lock (frame, NULL, this, 0, 0, NULL); + } + + if (newdict) + dict_unref (newdict); + + return 0; +} + + +int32_t +mq_update_parent_size (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict, dict_t *xdata) +{ + int64_t *size = NULL; + int32_t ret = -1; + dict_t *newdict = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : + GF_LOG_WARNING), + "xattrop call failed: %s", strerror (op_errno)); + + goto err; + } + + LOCK (&local->contri->lock); + { + local->contri->contribution += local->delta; + } + UNLOCK (&local->contri->lock); + + gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64, + local->loc.path, local->ctx->size, + local->contri->contribution); + + if (dict == NULL) { + op_errno = EINVAL; + goto err; + } + + ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + newdict = dict_new (); + if (!newdict) { + op_errno = ENOMEM; + ret = -1; + goto err; + } + + QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err); + + *size = hton64 (local->delta); + + ret = dict_set_bin (newdict, QUOTA_SIZE_KEY, size, 8); + if (ret < 0) { + op_errno = -ret; + goto err; + } + + if (uuid_is_null (local->parent_loc.gfid)) + uuid_copy (local->parent_loc.gfid, + local->parent_loc.inode->gfid); + GF_UUID_ASSERT (local->parent_loc.gfid); + + STACK_WIND (frame, + mq_mark_undirty, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, + &local->parent_loc, + GF_XATTROP_ADD_ARRAY64, + newdict, NULL); + ret = 0; +err: + if (op_ret == -1 || ret < 0) { + local->err = op_errno; + mq_release_parent_lock (frame, NULL, this, 0, 0, NULL); + } + + if (newdict) + dict_unref (newdict); + + return 0; +} + +int32_t +mq_update_inode_contribution (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, + struct iatt *postparent) +{ + int32_t ret = -1; + int64_t *size = NULL, size_int = 0, contri_int = 0; + int64_t *contri = NULL; + int64_t *delta = NULL; + char contri_key [512] = {0, }; + dict_t *newdict = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : + GF_LOG_WARNING), + "failed to get size and contribution of path (%s)(%s)", + local->loc.path, strerror (op_errno)); + goto err; + } + + ctx = local->ctx; + contribution = local->contri; + + //prepare to update size & contribution of the inode + GET_CONTRI_KEY (contri_key, contribution->gfid, ret); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + + LOCK (&ctx->lock); + { + if (local->loc.inode->ia_type == IA_IFDIR ) { + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, + (void **) &size); + if (ret < 0) { + op_errno = EINVAL; + goto unlock; + } + + ctx->size = ntoh64 (*size); + } else + ctx->size = buf->ia_blocks * 512; + + size_int = ctx->size; + } +unlock: + UNLOCK (&ctx->lock); + + if (ret < 0) { + goto err; + } + + ret = dict_get_bin (dict, contri_key, (void **) &contri); + + LOCK (&contribution->lock); + { + if (ret < 0) + contribution->contribution = 0; + else + contribution->contribution = ntoh64 (*contri); + + contri_int = contribution->contribution; + } + UNLOCK (&contribution->lock); + + gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64, + local->loc.path, size_int, contri_int); + + local->delta = size_int - contri_int; + + if (local->delta == 0) { + mq_mark_undirty (frame, NULL, this, 0, 0, NULL, NULL); + return 0; + } + + newdict = dict_new (); + if (newdict == NULL) { + op_errno = ENOMEM; + ret = -1; + goto err; + } + + QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err); + + *delta = hton64 (local->delta); + + ret = dict_set_bin (newdict, contri_key, delta, 8); + if (ret < 0) { + op_errno = -ret; + ret = -1; + goto err; + } + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, + mq_update_parent_size, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, + &local->loc, + GF_XATTROP_ADD_ARRAY64, + newdict, NULL); + ret = 0; + +err: + if (op_ret == -1 || ret < 0) { + local->err = op_errno; + + mq_release_parent_lock (frame, NULL, this, 0, 0, NULL); + } + + if (newdict) + dict_unref (newdict); + + return 0; +} + +int32_t +mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + int32_t ret = -1; + char contri_key [512] = {0, }; + dict_t *newdict = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG + : GF_LOG_WARNING, + "couldnt mark inode corresponding to path (%s) dirty " + "(%s)", local->parent_loc.path, strerror (op_errno)); + goto err; + } + + VALIDATE_OR_GOTO (local->ctx, err); + VALIDATE_OR_GOTO (local->contri, err); + + gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty", local->parent_loc.path); + + //update parent ctx + ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); + if (ret == -1) { + op_errno = EINVAL; + goto err; + } + + LOCK (&ctx->lock); + { + ctx->dirty = 1; + } + UNLOCK (&ctx->lock); + + newdict = dict_new (); + if (newdict == NULL) { + op_errno = ENOMEM; + goto err; + } + + if (local->loc.inode->ia_type == IA_IFDIR) { + ret = dict_set_int64 (newdict, QUOTA_SIZE_KEY, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "dict_set failed."); + goto err; + } + } + + GET_CONTRI_KEY (contri_key, local->contri->gfid, ret); + if (ret < 0) { + op_errno = ENOMEM; + goto err; + } + + ret = dict_set_int64 (newdict, contri_key, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "dict_set failed."); + goto err; + } + + mq_set_ctx_updation_status (local->ctx, _gf_false); + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND (frame, mq_update_inode_contribution, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->loc, newdict); + + ret = 0; + +err: + if ((op_ret == -1) || (ret < 0)) { + local->err = op_errno; + + mq_set_ctx_updation_status (local->ctx, _gf_false); + + mq_release_parent_lock (frame, NULL, this, 0, 0, NULL); + } + + if (newdict) + dict_unref (newdict); + + return 0; +} + +int32_t +mq_markdirty (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = -1; + dict_t *dict = NULL; + quota_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1){ + gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG + : GF_LOG_WARNING, "acquiring locks failed on %s (%s)", + local->parent_loc.path, strerror (op_errno)); + + local->err = op_errno; + + mq_set_ctx_updation_status (local->ctx, _gf_false); + + mq_inodelk_cbk (frame, NULL, this, 0, 0, NULL); + + return 0; + } + + gf_log (this->name, GF_LOG_TRACE, + "inodelk succeeded on %s", local->parent_loc.path); + + dict = dict_new (); + if (!dict) { + ret = -1; + goto err; + } + + ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 1); + if (ret == -1) + goto err; + + uuid_copy (local->parent_loc.gfid, + local->parent_loc.inode->gfid); + GF_UUID_ASSERT (local->parent_loc.gfid); + + STACK_WIND (frame, mq_fetch_child_size_and_contri, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + &local->parent_loc, dict, 0, NULL); + + ret = 0; +err: + if (ret == -1) { + local->err = 1; + + mq_set_ctx_updation_status (local->ctx, _gf_false); + + mq_release_parent_lock (frame, NULL, this, 0, 0, NULL); + } + + if (dict) + dict_unref (dict); + + return 0; +} + + +int32_t +mq_get_lock_on_parent (call_frame_t *frame, xlator_t *this) +{ + struct gf_flock lock = {0, }; + quota_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO ("marker", frame, fr_destroy); + + local = frame->local; + gf_log (this->name, GF_LOG_DEBUG, "taking lock on %s", + local->parent_loc.path); + + if (local->parent_loc.inode == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "parent inode is not valid, aborting " + "transaction."); + goto fr_destroy; + } + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + STACK_WIND (frame, + mq_markdirty, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->parent_loc, F_SETLKW, &lock, NULL); + + return 0; + +fr_destroy: + QUOTA_STACK_DESTROY (frame, this); + + return -1; +} + + +int +mq_start_quota_txn (xlator_t *this, loc_t *loc, + quota_inode_ctx_t *ctx, + inode_contribution_t *contri) +{ + int32_t ret = -1; + call_frame_t *frame = NULL; + quota_local_t *local = NULL; + + frame = create_frame (this, this->ctx->pool); + if (frame == NULL) + goto err; + + mq_assign_lk_owner (this, frame); + + local = mq_local_new (); + if (local == NULL) + goto fr_destroy; + + frame->local = local; + + ret = mq_loc_copy (&local->loc, loc); + if (ret < 0) + goto fr_destroy; + + ret = mq_inode_loc_fill (NULL, local->loc.parent, + &local->parent_loc); + if (ret < 0) + goto fr_destroy; + + local->ctx = ctx; + local->contri = contri; + + ret = mq_get_lock_on_parent (frame, this); + if (ret == -1) + goto err; + + return 0; + +fr_destroy: + QUOTA_STACK_DESTROY (frame, this); +err: + mq_set_ctx_updation_status (ctx, _gf_false); + + return -1; +} + + +int +mq_initiate_quota_txn (xlator_t *this, loc_t *loc) +{ + int32_t ret = -1; + gf_boolean_t status = _gf_false; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", loc, out); + GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + + ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "inode ctx get failed, aborting quota txn"); + ret = -1; + goto out; + } + + contribution = mq_get_contribution_node (loc->parent, ctx); + if (contribution == NULL) + goto out; + + /* To improve performance, donot start another transaction + * if one is already in progress for same inode + */ + status = _gf_true; + + ret = mq_test_and_set_ctx_updation_status (ctx, &status); + if (ret < 0) + goto out; + + if (status == _gf_false) { + mq_start_quota_txn (this, loc, ctx, contribution); + } + + ret = 0; +out: + return ret; +} + + +/* int32_t */ +/* validate_inode_size_contribution (xlator_t *this, loc_t *loc, int64_t size, */ +/* int64_t contribution) */ +/* { */ +/* if (size != contribution) { */ +/* mq_initiate_quota_txn (this, loc); */ +/* } */ + +/* return 0; */ +/* } */ + + +int32_t +mq_inspect_directory_xattr (xlator_t *this, + loc_t *loc, + dict_t *dict, + struct iatt buf) +{ + int32_t ret = 0; + int8_t dirty = -1; + int64_t *size = NULL, size_int = 0; + int64_t *contri = NULL, contri_int = 0; + char contri_key [512] = {0, }; + gf_boolean_t not_root = _gf_false; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (ret < 0) { + ctx = mq_inode_ctx_new (loc->inode, this); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "mq_inode_ctx_new failed"); + ret = -1; + goto err; + } + } + + if (strcmp (loc->path, "/") != 0) { + contribution = mq_add_new_contribution_node (this, ctx, loc); + if (contribution == NULL) { + if (!uuid_is_null (loc->inode->gfid)) + gf_log (this->name, GF_LOG_WARNING, + "cannot add a new contribution node"); + ret = -1; + goto err; + } + } + + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (ret < 0) + goto out; + + ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); + if (ret < 0) + goto out; + + if (strcmp (loc->path, "/") != 0) { + not_root = _gf_true; + + GET_CONTRI_KEY (contri_key, contribution->gfid, ret); + if (ret < 0) + goto out; + + ret = dict_get_bin (dict, contri_key, (void **) &contri); + if (ret < 0) + goto out; + + LOCK (&contribution->lock); + { + contribution->contribution = ntoh64 (*contri); + contri_int = contribution->contribution; + } + UNLOCK (&contribution->lock); + } + + LOCK (&ctx->lock); + { + ctx->size = ntoh64 (*size); + ctx->dirty = dirty; + size_int = ctx->size; + } + UNLOCK (&ctx->lock); + + gf_log (this->name, GF_LOG_DEBUG, "size=%"PRId64 + " contri=%"PRId64, size_int, contri_int); + + if (dirty) { + ret = mq_update_dirty_inode (this, loc, ctx, contribution); + } + + if ((!dirty || ret == 0) && (not_root == _gf_true) && + (size_int != contri_int)) { + mq_initiate_quota_txn (this, loc); + } + + ret = 0; +out: + if (ret) + mq_set_inode_xattr (this, loc); +err: + return ret; +} + +int32_t +mq_inspect_file_xattr (xlator_t *this, + loc_t *loc, + dict_t *dict, + struct iatt buf) +{ + int32_t ret = -1; + uint64_t contri_int = 0, size = 0; + int64_t *contri_ptr = NULL; + char contri_key [512] = {0, }; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (ret < 0) { + ctx = mq_inode_ctx_new (loc->inode, this); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "mq_inode_ctx_new failed"); + ret = -1; + goto out; + } + } + + contribution = mq_add_new_contribution_node (this, ctx, loc); + if (contribution == NULL) + goto out; + + LOCK (&ctx->lock); + { + ctx->size = 512 * buf.ia_blocks; + size = ctx->size; + } + UNLOCK (&ctx->lock); + + list_for_each_entry (contribution, &ctx->contribution_head, + contri_list) { + GET_CONTRI_KEY (contri_key, contribution->gfid, ret); + if (ret < 0) + continue; + + ret = dict_get_bin (dict, contri_key, (void **) &contri_int); + if (ret == 0) { + contri_ptr = (int64_t *)(unsigned long)contri_int; + + LOCK (&contribution->lock); + { + contribution->contribution = ntoh64 (*contri_ptr); + contri_int = contribution->contribution; + } + UNLOCK (&contribution->lock); + + gf_log (this->name, GF_LOG_DEBUG, + "size=%"PRId64 " contri=%"PRId64, size, contri_int); + + if (size != contri_int) { + mq_initiate_quota_txn (this, loc); + } + } else + mq_initiate_quota_txn (this, loc); + } + +out: + return ret; +} + +int32_t +mq_xattr_state (xlator_t *this, + loc_t *loc, + dict_t *dict, + struct iatt buf) +{ + if (buf.ia_type == IA_IFREG || + buf.ia_type == IA_IFLNK) { + mq_inspect_file_xattr (this, loc, dict, buf); + } else if (buf.ia_type == IA_IFDIR) + mq_inspect_directory_xattr (this, loc, dict, buf); + + return 0; +} + +int32_t +mq_req_xattr (xlator_t *this, + loc_t *loc, + dict_t *dict) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", dict, out); + + if (!loc) + goto set_size; + + //if not "/" then request contribution + if (strcmp (loc->path, "/") == 0) + goto set_size; + + ret = mq_dict_set_contribution (this, dict, loc); + if (ret == -1) + goto out; + +set_size: + ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0); + if (ret < 0) { + ret = -1; + goto out; + } + + ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 0); + if (ret < 0) { + ret = -1; + goto out; + } + + ret = 0; + +out: + return ret; +} + + +int32_t +mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + QUOTA_STACK_DESTROY (frame, this); + + return 0; +} + +int32_t +_mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = 0; + char contri_key [512] = {0, }; + quota_local_t *local = NULL; + + local = (quota_local_t *) frame->local; + + if (op_ret == -1 || local->err == -1) { + mq_removexattr_cbk (frame, NULL, this, -1, 0, NULL); + return 0; + } + + frame->local = NULL; + + if (local->hl_count > 1) { + GET_CONTRI_KEY (contri_key, local->contri->gfid, ret); + + STACK_WIND (frame, mq_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + &local->loc, contri_key, NULL); + ret = 0; + } else { + mq_removexattr_cbk (frame, NULL, this, 0, 0, NULL); + } + + if (strcmp (local->parent_loc.path, "/") != 0) { + ret = mq_get_parent_inode_local (this, local); + if (ret < 0) + goto out; + + mq_start_quota_txn (this, &local->loc, local->ctx, local->contri); + } +out: + mq_local_unref (this, local); + + return 0; +} + +int32_t +mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int32_t ret = -1; + struct gf_flock lock = {0, }; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + int64_t contribution = 0; + + local = frame->local; + if (op_ret == -1) + local->err = -1; + + ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); + + LOCK (&local->contri->lock); + { + contribution = local->contri->contribution; + } + UNLOCK (&local->contri->lock); + + if (contribution == local->size) { + if (ret == 0) { + LOCK (&ctx->lock); + { + ctx->size -= contribution; + } + UNLOCK (&ctx->lock); + + LOCK (&local->contri->lock); + { + local->contri->contribution = 0; + } + UNLOCK (&local->contri->lock); + } + } + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + STACK_WIND (frame, + _mq_inode_remove_done, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->parent_loc, + F_SETLKW, &lock, NULL); + return 0; +} + +int32_t +mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = -1; + int64_t *size = NULL; + dict_t *dict = NULL; + quota_local_t *local = NULL; + + local = frame->local; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "inodelk set failed on %s", local->parent_loc.path); + QUOTA_STACK_DESTROY (frame, this); + return 0; + } + + VALIDATE_OR_GOTO (local->contri, err); + + dict = dict_new (); + if (dict == NULL) { + ret = -1; + goto err; + } + + QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err); + + *size = hton64 (-local->size); + + ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8); + if (ret < 0) + goto err; + + uuid_copy (local->parent_loc.gfid, + local->parent_loc.inode->gfid); + GF_UUID_ASSERT (local->parent_loc.gfid); + + STACK_WIND (frame, mq_inode_remove_done, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, &local->parent_loc, + GF_XATTROP_ADD_ARRAY64, dict, NULL); + dict_unref (dict); + return 0; + +err: + local->err = 1; + mq_inode_remove_done (frame, NULL, this, -1, 0, NULL, NULL); + if (dict) + dict_unref (dict); + return 0; +} + +int32_t +mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri) +{ + int32_t ret = -1; + struct gf_flock lock = {0,}; + call_frame_t *frame = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", loc, out); + + ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (ret < 0) + goto out; + + contribution = mq_get_contribution_node (loc->parent, ctx); + if (contribution == NULL) + goto out; + + local = mq_local_new (); + if (local == NULL) { + ret = -1; + goto out; + } + + if (contri >= 0) { + local->size = contri; + } else { + LOCK (&contribution->lock); + { + local->size = contribution->contribution; + } + UNLOCK (&contribution->lock); + } + + if (local->size == 0) { + ret = 0; + goto out; + } + + ret = mq_loc_copy (&local->loc, loc); + if (ret < 0) + goto out; + + local->ctx = ctx; + local->contri = contribution; + + ret = mq_inode_loc_fill (NULL, loc->parent, &local->parent_loc); + if (ret < 0) + goto out; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + ret = -1; + goto out; + } + + mq_assign_lk_owner (this, frame); + + frame->local = local; + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + if (local->parent_loc.inode == NULL) { + ret = -1; + gf_log (this->name, GF_LOG_DEBUG, + "Inode is NULL, so can't stackwind."); + goto out; + } + + STACK_WIND (frame, + mq_reduce_parent_size_xattr, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->parent_loc, F_SETLKW, &lock, NULL); + local = NULL; + ret = 0; + +out: + if (local != NULL) + mq_local_unref (this, local); + + return ret; +} + + +int32_t +init_quota_priv (xlator_t *this) +{ + return 0; +} + + +int32_t +mq_rename_update_newpath (xlator_t *this, loc_t *loc) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contribution = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", loc, out); + GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + + ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (ret < 0) + goto out; + + contribution = mq_add_new_contribution_node (this, ctx, loc); + if (contribution == NULL) { + ret = -1; + goto out; + } + + mq_initiate_quota_txn (this, loc); +out: + return ret; +} + +int32_t +mq_forget (xlator_t *this, quota_inode_ctx_t *ctx) +{ + inode_contribution_t *contri = NULL; + inode_contribution_t *next = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO ("marker", ctx, out); + + list_for_each_entry_safe (contri, next, &ctx->contribution_head, + contri_list) { + list_del (&contri->contri_list); + GF_FREE (contri); + } + + LOCK_DESTROY (&ctx->lock); + GF_FREE (ctx); +out: + return 0; +} diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h new file mode 100644 index 000000000..385760ac4 --- /dev/null +++ b/xlators/features/marker/src/marker-quota.h @@ -0,0 +1,130 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _MARKER_QUOTA_H +#define _MARKER_QUOTA_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "marker-mem-types.h" + +#define QUOTA_XATTR_PREFIX "trusted.glusterfs" +#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty" + +#define CONTRIBUTION "contri" +#define CONTRI_KEY_MAX 512 +#define READDIR_BUF 4096 + + +#define QUOTA_STACK_DESTROY(_frame, _this) \ + do { \ + quota_local_t *_local = NULL; \ + _local = _frame->local; \ + _frame->local = NULL; \ + STACK_DESTROY (_frame->root); \ + mq_local_unref (_this, _local); \ + } while (0) + + +#define QUOTA_ALLOC(var, type, ret) \ + do { \ + ret = 0; \ + var = GF_CALLOC (sizeof (type), 1, \ + gf_marker_mt_##type); \ + if (!var) { \ + gf_log ("", GF_LOG_ERROR, \ + "out of memory"); \ + ret = -1; \ + } \ + } while (0); + +#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \ + do { \ + var = GF_CALLOC (sizeof (type), 1, \ + gf_marker_mt_##type); \ + if (!var) { \ + gf_log ("", GF_LOG_ERROR, \ + "out of memory"); \ + ret = -1; \ + goto label; \ + } \ + ret = 0; \ + } while (0); + +#define GET_CONTRI_KEY(var, _gfid, _ret) \ + do { \ + char _gfid_unparsed[40]; \ + uuid_unparse (_gfid, _gfid_unparsed); \ + _ret = snprintf (var, CONTRI_KEY_MAX, QUOTA_XATTR_PREFIX \ + ".%s.%s." CONTRIBUTION, "quota", \ + _gfid_unparsed); \ + } while (0); + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK (lock); \ + var ++; \ + UNLOCK (lock); \ + } while (0) + +struct quota_inode_ctx { + int64_t size; + int8_t dirty; + gf_boolean_t updation_status; + gf_lock_t lock; + struct list_head contribution_head; +}; +typedef struct quota_inode_ctx quota_inode_ctx_t; + +struct inode_contribution { + struct list_head contri_list; + int64_t contribution; + uuid_t gfid; + gf_lock_t lock; +}; +typedef struct inode_contribution inode_contribution_t; + +int32_t +mq_get_lock_on_parent (call_frame_t *, xlator_t *); + +int32_t +mq_req_xattr (xlator_t *, loc_t *, dict_t *); + +int32_t +init_quota_priv (xlator_t *); + +int32_t +mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt); + +int32_t +mq_set_inode_xattr (xlator_t *, loc_t *); + +int +mq_initiate_quota_txn (xlator_t *, loc_t *); + +int32_t +mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *, + int32_t, int32_t, fd_t *, dict_t *); + +int32_t +mq_reduce_parent_size (xlator_t *, loc_t *, int64_t); + +int32_t +mq_rename_update_newpath (xlator_t *, loc_t *); + +int32_t +mq_inspect_file_xattr (xlator_t *this, loc_t *loc, dict_t *dict, struct iatt buf); + +int32_t +mq_forget (xlator_t *, quota_inode_ctx_t *); +#endif diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c new file mode 100644 index 000000000..6a2c85691 --- /dev/null +++ b/xlators/features/marker/src/marker.c @@ -0,0 +1,2862 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "libxlator.h" +#include "marker.h" +#include "marker-mem-types.h" +#include "marker-quota.h" +#include "marker-quota-helper.h" +#include "marker-common.h" +#include "byte-order.h" + +#define _GF_UID_GID_CHANGED 1 + +void +fini (xlator_t *this); + +int32_t +marker_start_setxattr (call_frame_t *, xlator_t *); + +marker_local_t * +marker_local_ref (marker_local_t *local) +{ + GF_VALIDATE_OR_GOTO ("marker", local, err); + + LOCK (&local->lock); + { + local->ref++; + } + UNLOCK (&local->lock); + + return local; +err: + return NULL; +} + +int +marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +{ + int ret = -1; + + if (!loc) + return ret; + + if (inode) { + loc->inode = inode_ref (inode); + if (uuid_is_null (loc->gfid)) { + uuid_copy (loc->gfid, loc->inode->gfid); + } + } + + if (parent) + loc->parent = inode_ref (parent); + + if (path) { + loc->path = gf_strdup (path); + if (!loc->path) { + gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); + goto loc_wipe; + } + + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + } + + ret = 0; +loc_wipe: + if (ret < 0) + loc_wipe (loc); + + return ret; +} + +int +marker_inode_loc_fill (inode_t *inode, loc_t *loc) +{ + char *resolvedpath = NULL; + int ret = -1; + inode_t *parent = NULL; + + if ((!inode) || (!loc)) + return ret; + + parent = inode_parent (inode, NULL, NULL); + + ret = inode_path (inode, NULL, &resolvedpath); + if (ret < 0) + goto err; + + ret = marker_loc_fill (loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; + +err: + if (parent) + inode_unref (parent); + + GF_FREE (resolvedpath); + + return ret; +} + +int32_t +marker_trav_parent (marker_local_t *local) +{ + int32_t ret = 0; + loc_t loc = {0, }; + inode_t *parent = NULL; + int8_t need_unref = 0; + + if (!local->loc.parent) { + parent = inode_parent (local->loc.inode, NULL, NULL); + if (parent) + need_unref = 1; + } else + parent = local->loc.parent; + + ret = marker_inode_loc_fill (parent, &loc); + + if (ret < 0) { + ret = -1; + goto out; + } + + loc_wipe (&local->loc); + + local->loc = loc; +out: + if (need_unref) + inode_unref (parent); + + return ret; +} + +int32_t +marker_error_handler (xlator_t *this, marker_local_t *local, int32_t op_errno) +{ + marker_conf_t *priv = NULL; + const char *path = NULL; + + priv = (marker_conf_t *) this->private; + path = local + ? (local->loc.path + ? local->loc.path : uuid_utoa(local->loc.gfid)) + : "<nul>"; + + gf_log (this->name, GF_LOG_CRITICAL, + "Indexing gone corrupt at %s (reason: %s)." + " Geo-replication slave content needs to be revalidated", + path, strerror (op_errno)); + unlink (priv->timestamp_file); + + return 0; +} + +int32_t +marker_local_unref (marker_local_t *local) +{ + int32_t var = 0; + + if (local == NULL) + return -1; + + LOCK (&local->lock); + { + var = --local->ref; + } + UNLOCK (&local->lock); + + if (var != 0) + goto out; + + loc_wipe (&local->loc); + loc_wipe (&local->parent_loc); + if (local->xdata) + dict_unref (local->xdata); + + if (local->oplocal) { + marker_local_unref (local->oplocal); + local->oplocal = NULL; + } + mem_put (local); +out: + return 0; +} + +int32_t +stat_stampfile (xlator_t *this, marker_conf_t *priv, + struct volume_mark **status) +{ + struct stat buf = {0, }; + struct volume_mark *vol_mark = NULL; + + vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1, + gf_marker_mt_volume_mark); + + vol_mark->major = 1; + vol_mark->minor = 0; + + GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16); + memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16); + + if (stat (priv->timestamp_file, &buf) != -1) { + vol_mark->retval = 0; + vol_mark->sec = htonl (buf.st_ctime); + vol_mark->usec = htonl (ST_CTIM_NSEC (&buf)/1000); + } else + vol_mark->retval = 1; + + *status = vol_mark; + + return 0; +} + +int32_t +marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this, + const char *name, struct volume_mark *vol_mark, + dict_t *xdata) +{ + int32_t ret = -1; + dict_t *dict = NULL; + + if (vol_mark == NULL){ + STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); + + goto out; + } + + dict = dict_new (); + + ret = dict_set_bin (dict, (char *)name, vol_mark, + sizeof (struct volume_mark)); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "failed to set key %s", + name); + + STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata); + + dict_unref (dict); +out: + return 0; +} + +int32_t +call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name) +{ + struct volume_mark *vol_mark = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t ret = _gf_true; + + priv = (marker_conf_t *)this->private; + + if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || + strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { + ret = _gf_false; + goto out; + } + + stat_stampfile (this, priv, &vol_mark); + + marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL); +out: + return ret; +} + +int32_t +marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + if (cookie) { + gf_log (this->name, GF_LOG_DEBUG, + "Filtering the quota extended attributes"); + + dict_foreach_fnmatch (dict, "trusted.glusterfs.quota*", + marker_filter_quota_xattr, NULL); + } + + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +int32_t +marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + gf_boolean_t ret = _gf_false; + marker_conf_t *priv = NULL; + unsigned long cookie = 0; + + priv = this->private; + + if (priv == NULL || (priv->feature_enabled & GF_XTIME) == 0) + goto wind; + + gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); + + ret = call_from_special_client (frame, this, name); +wind: + if (ret == _gf_false) { + if (name == NULL) { + /* Signifies that marker translator + * has to filter the quota's xattr's, + * this is to prevent afr from performing + * self healing on marker-quota xattrs' + */ + cookie = 1; + } + STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, + name, xdata); + } + + return 0; +} + + +int32_t +marker_setxattr_done (call_frame_t *frame) +{ + marker_local_t *local = NULL; + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_DESTROY (frame->root); + + marker_local_unref (local); + + return 0; +} + +int +marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = 0; + int32_t done = 0; + marker_local_t *local = NULL; + + local = (marker_local_t*) frame->local; + + if (op_ret == -1 && op_errno == ENOSPC) { + marker_error_handler (this, local, op_errno); + done = 1; + goto out; + } + + if (local) { + if (local->loc.path && strcmp (local->loc.path, "/") == 0) { + done = 1; + goto out; + } + if (__is_root_gfid (local->loc.gfid)) { + done = 1; + goto out; + } + } + + ret = marker_trav_parent (local); + + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Error occurred " + "while traversing to the parent, stopping marker"); + + done = 1; + + goto out; + } + + marker_start_setxattr (frame, this); + +out: + if (done) { + marker_setxattr_done (frame); + } + + return 0; +} + +int32_t +marker_start_setxattr (call_frame_t *frame, xlator_t *this) +{ + int32_t ret = -1; + dict_t *dict = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + local = (marker_local_t*) frame->local; + + if (!local) + goto out; + + dict = dict_new (); + + if (!dict) + goto out; + + if (local->loc.inode && uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + ret = dict_set_static_bin (dict, priv->marker_xattr, + (void *)local->timebuf, 8); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set marker xattr (%s)", local->loc.path); + goto out; + } + + STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0, + NULL); + + ret = 0; +out: + if (dict) + dict_unref (dict); + + return ret; +} + +void +marker_gettimeofday (marker_local_t *local) +{ + struct timeval tv = {0, }; + + gettimeofday (&tv, NULL); + + local->timebuf [0] = htonl (tv.tv_sec); + local->timebuf [1] = htonl (tv.tv_usec); + + return; +} + +int32_t +marker_create_frame (xlator_t *this, marker_local_t *local) +{ + call_frame_t *frame = NULL; + + frame = create_frame (this, this->ctx->pool); + + frame->local = (void *) local; + + marker_start_setxattr (frame, this); + + return 0; +} + +int32_t +marker_xtime_update_marks (xlator_t *this, marker_local_t *local) +{ + marker_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + priv = this->private; + + if ((local->pid == GF_CLIENT_PID_GSYNCD + && !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE)) + || (local->pid == GF_CLIENT_PID_DEFRAG)) + goto out; + + marker_gettimeofday (local); + + marker_local_ref (local); + + marker_create_frame (this, local); +out: + return 0; +} + + +int32_t +marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "error occurred " + "while Creating a file %s", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_set_inode_xattr (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); + +out: + marker_local_unref (local); + + return 0; +} + +int +marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + + return 0; +err: + STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL, + NULL, NULL, NULL, NULL); + return 0; +} + + +int32_t +marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "error occurred " + "while Creating a file %s", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_set_inode_xattr (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); + +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, + fd, xdata); + return 0; +err: + STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL, NULL); + + return 0; +} + + +int32_t +marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "error occurred " + "while write, %s", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); + +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; +err: + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "error occurred " + "rmdir %s", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, + postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_reduce_parent_size (this, &local->loc, -1); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + return 0; +err: + STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, + "%s occurred in unlink", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, + postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if ((priv->feature_enabled & GF_QUOTA) && (local->ia_nlink == 1)) + mq_reduce_parent_size (this, &local->loc, -1); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + + +int32_t +marker_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + marker_local_t *local = NULL; + + local = frame->local; + if (op_ret < 0) { + goto err; + } + + if (local == NULL) { + op_errno = EINVAL; + goto err; + } + + local->ia_nlink = buf->ia_nlink; + + STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, + local->xdata); + return 0; +err: + frame->local = NULL; + STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL, NULL); + marker_local_unref (local); + return 0; +} + + +int32_t +marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto unlink_wind; + + local = mem_get0 (this->local_pool); + local->xflag = xflag; + if (xdata) + local->xdata = dict_ref (xdata); + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; + + if (uuid_is_null (loc->gfid) && loc->inode) + uuid_copy (loc->gfid, loc->inode->gfid); + + STACK_WIND (frame, marker_unlink_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + +unlink_wind: + STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +err: + frame->local = NULL; + STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + marker_local_unref (local); + return 0; +} + + +int32_t +marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "linking a file ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, newloc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; +err: + STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + + return 0; +} + + +int32_t +marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + loc_t newloc = {0, }; + marker_conf_t *priv = NULL; + + local = frame->local; + oplocal = local->oplocal; + + priv = this->private; + + frame->local = NULL; + + if (op_ret < 0) { + if (local->err == 0) { + local->err = op_errno; + } + + gf_log (this->name, GF_LOG_WARNING, + "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)", + local->parent_loc.path, + uuid_utoa (local->parent_loc.inode->gfid), + strerror (op_errno)); + } + + if (local->stub != NULL) { + call_resume (local->stub); + local->stub = NULL; + } else if (local->err != 0) { + STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL, + NULL, NULL, NULL, NULL); + } + + mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution); + + if (local->loc.inode != NULL) { + mq_reduce_parent_size (this, &local->loc, local->contribution); + } + + newloc.inode = inode_ref (oplocal->loc.inode); + newloc.path = gf_strdup (local->loc.path); + newloc.name = strrchr (newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref (local->loc.parent); + + mq_rename_update_newpath (this, &newloc); + + loc_wipe (&newloc); + + if (priv->feature_enabled & GF_XTIME) { + //update marks on oldpath + uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks (this, oplocal); + marker_xtime_update_marks (this, local); + } + + marker_local_unref (local); + marker_local_unref (oplocal); + return 0; +} + + +int32_t +marker_rename_release_newp_lock (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + struct gf_flock lock = {0, }; + + local = frame->local; + oplocal = local->oplocal; + + if (op_ret < 0) { + if (local->err == 0) { + local->err = op_errno; + } + + gf_log (this->name, GF_LOG_WARNING, + "inodelk (UNLOCK) failed on %s (gfid:%s) (%s)", + oplocal->parent_loc.path, + uuid_utoa (oplocal->parent_loc.inode->gfid), + strerror (op_errno)); + } + + if (local->next_lock_on == NULL) { + marker_rename_done (frame, NULL, this, 0, 0, NULL); + goto out; + } + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + STACK_WIND (frame, + marker_rename_done, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &local->parent_loc, F_SETLKW, &lock, NULL); + +out: + return 0; +} + + +int32_t +marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + struct gf_flock lock = {0, }; + + local = frame->local; + oplocal = local->oplocal; + + if ((op_ret < 0) && (op_errno != ENOATTR)) { + local->err = op_errno; + } + + //Reset frame uid and gid if set. + if (cookie == (void *) _GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID (frame, frame->root, local); + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + STACK_WIND (frame, + marker_rename_release_newp_lock, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL); + return 0; +} + + +int32_t +marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + call_stub_t *stub = NULL; + int32_t ret = 0; + char contri_key [512] = {0, }; + loc_t newloc = {0, }; + + local = (marker_local_t *) frame->local; + + if (local != NULL) { + oplocal = local->oplocal; + } + + priv = this->private; + + if (op_ret < 0) { + if (local != NULL) { + local->err = op_errno; + } + + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "renaming a file ", strerror (op_errno)); + } + + if (priv->feature_enabled & GF_QUOTA) { + if ((op_ret < 0) || (local == NULL)) { + goto quota_err; + } + + stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret, + op_errno, buf, preoldparent, + postoldparent, prenewparent, + postnewparent, xdata); + if (stub == NULL) { + local->err = ENOMEM; + goto quota_err; + } + + local->stub = stub; + + GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = ENOMEM; + goto quota_err; + } + + /* Removexattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID (frame, local, frame->root); + + newloc.inode = inode_ref (oplocal->loc.inode); + newloc.path = gf_strdup (local->loc.path); + newloc.name = strrchr (newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref (local->loc.parent); + uuid_copy (newloc.gfid, oplocal->loc.inode->gfid); + + STACK_WIND_COOKIE (frame, marker_rename_release_oldp_lock, + frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + &newloc, contri_key, NULL); + + loc_wipe (&newloc); + } else { + frame->local = NULL; + + STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, + preoldparent, postoldparent, prenewparent, + postnewparent, xdata); + + if ((op_ret < 0) || (local == NULL)) { + goto out; + } + + if (priv->feature_enabled & GF_XTIME) { + //update marks on oldpath + uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks (this, oplocal); + marker_xtime_update_marks (this, local); + } + } + +out: + if (!(priv->feature_enabled & GF_QUOTA)) { + marker_local_unref (local); + marker_local_unref (oplocal); + } + + return 0; + +quota_err: + marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL); + return 0; +} + + +int32_t +marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + char contri_key[512] = {0, }; + int32_t ret = 0; + int64_t *contribution = 0; + + local = frame->local; + oplocal = local->oplocal; + + //Reset frame uid and gid if set. + if (cookie == (void *) _GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID (frame, frame->root, local); + + if ((op_ret < 0) && (op_errno != ENOATTR)) { + local->err = op_errno; + gf_log (this->name, GF_LOG_WARNING, + "fetching contribution values from %s (gfid:%s) " + "failed (%s)", local->loc.path, + uuid_utoa (local->loc.inode->gfid), + strerror (op_errno)); + goto err; + } + + if (local->loc.inode != NULL) { + GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno; + goto err; + } + + if (dict_get_bin (dict, contri_key, + (void **) &contribution) == 0) { + local->contribution = ntoh64 (*contribution); + } + } + + STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &oplocal->loc, + &local->loc, NULL); + + return 0; + +err: + marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL); + return 0; +} + + +int32_t +marker_get_newpath_contribution (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + char contri_key[512] = {0, }; + int32_t ret = 0; + int64_t *contribution = 0; + + local = frame->local; + oplocal = local->oplocal; + + //Reset frame uid and gid if set. + if (cookie == (void *) _GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID (frame, frame->root, local); + + if ((op_ret < 0) && (op_errno != ENOATTR)) { + local->err = op_errno; + gf_log (this->name, GF_LOG_WARNING, + "fetching contribution values from %s (gfid:%s) " + "failed (%s)", oplocal->loc.path, + uuid_utoa (oplocal->loc.inode->gfid), + strerror (op_errno)); + goto err; + } + + GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno; + goto err; + } + + if (dict_get_bin (dict, contri_key, (void **) &contribution) == 0) + oplocal->contribution = ntoh64 (*contribution); + + if (local->loc.inode != NULL) { + GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno; + goto err; + } + + /* getxattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID (frame, local, frame->root); + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, local->loc.inode->gfid); + + GF_UUID_ASSERT (local->loc.gfid); + + STACK_WIND_COOKIE (frame, marker_do_rename, + frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + &local->loc, contri_key, NULL); + } else { + marker_do_rename (frame, NULL, this, 0, 0, NULL, NULL); + } + + return 0; +err: + marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL); + return 0; +} + + +int32_t +marker_get_oldpath_contribution (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + char contri_key[512] = {0, }; + int32_t ret = 0; + + local = frame->local; + oplocal = local->oplocal; + + if (op_ret < 0) { + local->err = op_errno; + gf_log (this->name, GF_LOG_WARNING, + "cannot hold inodelk on %s (gfid:%s) (%s)", + local->next_lock_on->path, + uuid_utoa (local->next_lock_on->inode->gfid), + strerror (op_errno)); + goto lock_err; + } + + GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno; + goto quota_err; + } + + /* getxattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID (frame, local, frame->root); + + if (uuid_is_null (oplocal->loc.gfid)) + uuid_copy (oplocal->loc.gfid, + oplocal->loc.inode->gfid); + + GF_UUID_ASSERT (oplocal->loc.gfid); + + STACK_WIND_COOKIE (frame, marker_get_newpath_contribution, + frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + &oplocal->loc, contri_key, NULL); + return 0; + +quota_err: + marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL); + return 0; + +lock_err: + if ((local->next_lock_on == NULL) + || (local->next_lock_on == &local->parent_loc)) { + local->next_lock_on = NULL; + marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL); + } else { + marker_rename_release_newp_lock (frame, NULL, this, 0, 0, NULL); + } + + return 0; +} + + +int32_t +marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + loc_t *loc = NULL; + struct gf_flock lock = {0, }; + + local = frame->local; + oplocal = local->oplocal; + + if (op_ret < 0) { + if (local->next_lock_on != &oplocal->parent_loc) { + loc = &oplocal->parent_loc; + } else { + loc = &local->parent_loc; + } + + local->err = op_errno; + gf_log (this->name, GF_LOG_WARNING, + "cannot hold inodelk on %s (gfid:%s) (%s)", + loc->path, uuid_utoa (loc->inode->gfid), + strerror (op_errno)); + goto err; + } + + if (local->next_lock_on != NULL) { + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + STACK_WIND (frame, + marker_get_oldpath_contribution, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, local->next_lock_on, + F_SETLKW, &lock, NULL); + } else { + marker_get_oldpath_contribution (frame, 0, this, 0, 0, NULL); + } + + return 0; + +err: + marker_rename_done (frame, NULL, this, 0, 0, NULL); + return 0; +} + + +int32_t +marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + marker_conf_t *priv = NULL; + struct gf_flock lock = {0, }; + loc_t *lock_on = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto rename_wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + oplocal = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, oplocal); + + frame->local = local; + + local->oplocal = marker_local_ref (oplocal); + + ret = loc_copy (&local->loc, newloc); + if (ret < 0) + goto err; + + ret = loc_copy (&oplocal->loc, oldloc); + if (ret < 0) + goto err; + + if (!(priv->feature_enabled & GF_QUOTA)) { + goto rename_wind; + } + + ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc); + if (ret < 0) + goto err; + + ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc); + if (ret < 0) + goto err; + + if ((newloc->inode != NULL) && (newloc->parent != oldloc->parent) + && (uuid_compare (newloc->parent->gfid, + oldloc->parent->gfid) < 0)) { + lock_on = &local->parent_loc; + local->next_lock_on = &oplocal->parent_loc; + } else { + lock_on = &oplocal->parent_loc; + if ((newloc->inode != NULL) && (newloc->parent + != oldloc->parent)) { + local->next_lock_on = &local->parent_loc; + } + } + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + STACK_WIND (frame, + marker_rename_inodelk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + this->name, lock_on, + F_SETLKW, &lock, NULL); + + return 0; + +rename_wind: + STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + + return 0; +err: + STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL, + NULL, NULL, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "truncating a file ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); + +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +err: + STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "truncating a file ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +err: + STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "creating symlinks ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_set_inode_xattr (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int +marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, + xdata); + return 0; +err: + STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL, + NULL, NULL, NULL, NULL); + return 0; +} + + +int32_t +marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "creating symlinks ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + priv = this->private; + + if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) { + mq_set_inode_xattr (this, &local->loc); + } + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int +marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + local->mode = mode; + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + xdata); + return 0; +err: + STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL, + NULL, NULL, NULL, NULL); + return 0; +} + + +int32_t +marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "fallocating a file ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; +err: + STACK_UNWIND_STRICT (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred during discard", + strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +err: + STACK_UNWIND_STRICT (discard, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + +int32_t +marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill", + strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +err: + STACK_UNWIND_STRICT (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +/* when a call from the special client is received on + * key trusted.glusterfs.volume-mark with value "RESET" + * or if the value is 0length, update the change the + * access time and modification time via touching the + * timestamp file. + */ +int32_t +call_from_sp_client_to_reset_tmfile (call_frame_t *frame, + xlator_t *this, + dict_t *dict) +{ + int32_t fd = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + data_t *data = NULL; + marker_conf_t *priv = NULL; + + if (frame == NULL || this == NULL || dict == NULL) + return -1; + + priv = this->private; + + data = dict_get (dict, "trusted.glusterfs.volume-mark"); + if (data == NULL) + return -1; + + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + op_ret = -1; + op_errno = EPERM; + + goto out; + } + + if (data->len == 0 || (data->len == 5 && + memcmp (data->data, "RESET", 5) == 0)) { + fd = open (priv->timestamp_file, O_WRONLY|O_TRUNC); + if (fd != -1) { + /* TODO check whether the O_TRUNC would update the + * timestamps on a zero length file on all machies. + */ + close (fd); + } + + if (fd != -1 || errno == ENOENT) { + op_ret = 0; + op_errno = 0; + } else { + op_ret = -1; + op_errno = errno; + } + } else { + op_ret = -1; + op_errno = EINVAL; + } +out: + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); + + return 0; +} + + +int32_t +marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred in " + "setxattr ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); + if (ret == 0) + return 0; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; +err: + STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL); + + return 0; +} + + +int32_t +marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "creating symlinks ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); + if (ret == 0) + return 0; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; +err: + STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL); + + return 0; +} + + +int32_t +marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "%s occurred while " + "creating symlinks ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre, + statpost, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + + +int32_t +marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; +err: + STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + if (op_ret == -1) { + gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : + GF_LOG_ERROR), + "%s occurred during setattr of %s", + strerror (op_errno), + (local ? local->loc.path : "<nul>")); + } + + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, + statpost, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; +err: + STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +int32_t +marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "%s occurred while " + "creating symlinks ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; +err: + STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL); + + return 0; +} + + +int32_t +marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "lookup failed with %s", + strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + dict, postparent); + + if (op_ret == -1 || local == NULL) + goto out; + + /* copy the gfid from the stat structure instead of inode, + * since if the lookup is fresh lookup, then the inode + * would have not yet linked to the inode table which happens + * in protocol/server. + */ + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, buf->ia_gfid); + + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) { + mq_xattr_state (this, &local->loc, dict, *buf); + } + +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = loc_copy (&local->loc, loc); + if (ret == -1) + goto err; + + if ((priv->feature_enabled & GF_QUOTA) && xattr_req) + mq_req_xattr (this, loc, xattr_req); +wind: + STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + return 0; +err: + STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL); + + return 0; +} + +int +marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + /* TODO: fill things */ + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; +} + +int +marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + if ((priv->feature_enabled & GF_QUOTA) && dict) + mq_req_xattr (this, NULL, dict); + +wind: + STACK_WIND (frame, marker_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + + return 0; +} + + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + + +int32_t +init_xtime_priv (xlator_t *this, dict_t *options) +{ + data_t *data = NULL; + int32_t ret = -1; + marker_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO (this->name, options, out); + GF_VALIDATE_OR_GOTO (this->name, this->private, out); + + priv = this->private; + + if((data = dict_get (options, VOLUME_UUID)) != NULL) { + priv->volume_uuid = data->data; + + ret = uuid_parse (priv->volume_uuid, priv->volume_uuid_bin); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "invalid volume uuid %s", priv->volume_uuid); + goto out; + } + + ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s", + MARKER_XATTR_PREFIX, priv->volume_uuid, + XTIME); + + if (ret == -1){ + priv->marker_xattr = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "Failed to allocate memory"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "the volume-uuid = %s", priv->volume_uuid); + } else { + priv->volume_uuid = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "please specify the volume-uuid" + "in the translator options"); + + return -1; + } + + if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) { + priv->timestamp_file = data->data; + + gf_log (this->name, GF_LOG_DEBUG, + "the timestamp-file is = %s", + priv->timestamp_file); + + } else { + priv->timestamp_file = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "please specify the timestamp-file" + "in the translator options"); + + goto out; + } + + ret = 0; +out: + return ret; +} + +void +marker_xtime_priv_cleanup (xlator_t *this) +{ + marker_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + + priv = (marker_conf_t *) this->private; + + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + GF_FREE (priv->volume_uuid); + + GF_FREE (priv->timestamp_file); + + GF_FREE (priv->marker_xattr); +out: + return; +} + +void +marker_priv_cleanup (xlator_t *this) +{ + marker_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("marker", this, out); + + priv = (marker_conf_t *) this->private; + + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + marker_xtime_priv_cleanup (this); + + LOCK_DESTROY (&priv->lock); + + GF_FREE (priv); +out: + return; +} + +int32_t +reconfigure (xlator_t *this, dict_t *options) +{ + int32_t ret = 0; + data_t *data = NULL; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; + + GF_ASSERT (this); + GF_ASSERT (this->private); + + priv = this->private; + + priv->feature_enabled = 0; + + GF_VALIDATE_OR_GOTO (this->name, options, out); + + data = dict_get (options, "quota"); + if (data) { + ret = gf_string2boolean (data->data, &flag); + if (ret == 0 && flag == _gf_true) { + ret = init_quota_priv (this); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to initialize quota private"); + } else { + priv->feature_enabled |= GF_QUOTA; + } + } + } + + data = dict_get (options, "xtime"); + if (data) { + ret = gf_string2boolean (data->data, &flag); + if (ret == 0 && flag == _gf_true) { + marker_xtime_priv_cleanup (this); + + ret = init_xtime_priv (this, options); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to initialize xtime private, " + "xtime updation will fail"); + } else { + priv->feature_enabled |= GF_XTIME; + data = dict_get (options, "gsync-force-xtime"); + if (!data) + goto out; + ret = gf_string2boolean (data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + } +out: + return ret; +} + + +int32_t +init (xlator_t *this) +{ + dict_t *options = NULL; + data_t *data = NULL; + int32_t ret = 0; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "marker translator needs subvolume defined."); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling."); + return -1; + } + + options = this->options; + + ALLOCATE_OR_GOTO (this->private, marker_conf_t, err); + + priv = this->private; + + priv->feature_enabled = 0; + + LOCK_INIT (&priv->lock); + + data = dict_get (options, "quota"); + if (data) { + ret = gf_string2boolean (data->data, &flag); + if (ret == 0 && flag == _gf_true) { + ret = init_quota_priv (this); + if (ret < 0) + goto err; + + priv->feature_enabled |= GF_QUOTA; + } + } + + data = dict_get (options, "xtime"); + if (data) { + ret = gf_string2boolean (data->data, &flag); + if (ret == 0 && flag == _gf_true) { + ret = init_xtime_priv (this, options); + if (ret < 0) + goto err; + + priv->feature_enabled |= GF_XTIME; + data = dict_get (options, "gsync-force-xtime"); + if (!data) + goto cont; + ret = gf_string2boolean (data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + + cont: + this->local_pool = mem_pool_new (marker_local_t, 128); + if (!this->local_pool) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto err; + } + + return 0; +err: + marker_priv_cleanup (this); + + return -1; +} + +int32_t +marker_forget (xlator_t *this, inode_t *inode) +{ + marker_inode_ctx_t *ctx = NULL; + uint64_t value = 0; + + if (inode_ctx_del (inode, this, &value) != 0) + goto out; + + ctx = (marker_inode_ctx_t *)(unsigned long)value; + if (ctx == NULL) { + goto out; + } + + mq_forget (this, ctx->quota_ctx); + + GF_FREE (ctx); +out: + return 0; +} + +void +fini (xlator_t *this) +{ + marker_priv_cleanup (this); +} + +struct xlator_fops fops = { + .lookup = marker_lookup, + .create = marker_create, + .mkdir = marker_mkdir, + .writev = marker_writev, + .truncate = marker_truncate, + .ftruncate = marker_ftruncate, + .symlink = marker_symlink, + .link = marker_link, + .unlink = marker_unlink, + .rmdir = marker_rmdir, + .rename = marker_rename, + .mknod = marker_mknod, + .setxattr = marker_setxattr, + .fsetxattr = marker_fsetxattr, + .setattr = marker_setattr, + .fsetattr = marker_fsetattr, + .removexattr = marker_removexattr, + .getxattr = marker_getxattr, + .readdirp = marker_readdirp, + .fallocate = marker_fallocate, + .discard = marker_discard, + .zerofill = marker_zerofill, +}; + +struct xlator_cbks cbks = { + .forget = marker_forget +}; + +struct volume_options options[] = { + {.key = {"volume-uuid"}}, + {.key = {"timestamp-file"}}, + {.key = {"quota"}}, + {.key = {"xtime"}}, + {.key = {"gsync-force-xtime"}}, + {.key = {NULL}} +}; diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h new file mode 100644 index 000000000..1a58f8cfc --- /dev/null +++ b/xlators/features/marker/src/marker.h @@ -0,0 +1,138 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _MARKER_H +#define _MARKER_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "marker-quota.h" +#include "xlator.h" +#include "defaults.h" +#include "uuid.h" +#include "call-stub.h" + +#define MARKER_XATTR_PREFIX "trusted.glusterfs" +#define XTIME "xtime" +#define VOLUME_MARK "volume-mark" +#define VOLUME_UUID "volume-uuid" +#define TIMESTAMP_FILE "timestamp-file" + +enum { + GF_QUOTA = 1, + GF_XTIME = 2, + GF_XTIME_GSYNC_FORCE = 4, +}; + +/*initialize the local variable*/ +#define MARKER_INIT_LOCAL(_frame,_local) do { \ + _frame->local = _local; \ + _local->pid = _frame->root->pid; \ + memset (&_local->loc, 0, sizeof (loc_t)); \ + _local->ref = 1; \ + _local->uid = -1; \ + _local->gid = -1; \ + LOCK_INIT (&_local->lock); \ + _local->oplocal = NULL; \ + } while (0) + +/* try alloc and if it fails, goto label */ +#define ALLOCATE_OR_GOTO(var, type, label) do { \ + var = GF_CALLOC (sizeof (type), 1, \ + gf_marker_mt_##type); \ + if (!var) { \ + gf_log (this->name, GF_LOG_ERROR, \ + "out of memory :("); \ + goto label; \ + } \ + } while (0) + +#define _MARKER_SET_UID_GID(dest, src) \ + do { \ + if (src->uid != -1 && \ + src->gid != -1) { \ + dest->uid = src->uid; \ + dest->gid = src->gid; \ + } \ + } while (0) + +#define MARKER_SET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID (dest, src); \ + frame->root->uid = 0; \ + frame->root->gid = 0; \ + frame->cookie = (void *) _GF_UID_GID_CHANGED; \ + } while (0) + +#define MARKER_RESET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID (dest, src); \ + frame->cookie = NULL; \ + } while (0) + +struct marker_local{ + uint32_t timebuf[2]; + pid_t pid; + loc_t loc; + loc_t parent_loc; + loc_t *next_lock_on; + uid_t uid; + gid_t gid; + int32_t ref; + int32_t ia_nlink; + gf_lock_t lock; + mode_t mode; + int32_t err; + call_stub_t *stub; + int64_t contribution; + struct marker_local *oplocal; + + /* marker quota specific */ + int64_t delta; + int64_t d_off; + int64_t sum; + int64_t size; + int32_t hl_count; + int32_t dentry_child_count; + + fd_t *fd; + call_frame_t *frame; + + quota_inode_ctx_t *ctx; + inode_contribution_t *contri; + + int xflag; + dict_t *xdata; +}; +typedef struct marker_local marker_local_t; + +#define quota_local_t marker_local_t + +struct marker_inode_ctx { + struct quota_inode_ctx *quota_ctx; +}; +typedef struct marker_inode_ctx marker_inode_ctx_t; + +struct marker_conf{ + char feature_enabled; + char *size_key; + char *dirty_key; + char *volume_uuid; + uuid_t volume_uuid_bin; + char *timestamp_file; + char *marker_xattr; + uint64_t quota_lk_owner; + gf_lock_t lock; +}; +typedef struct marker_conf marker_conf_t; + +#endif diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am index 58cfed0f9..393a7bd08 100644 --- a/xlators/features/path-convertor/src/Makefile.am +++ b/xlators/features/path-convertor/src/Makefile.am @@ -2,13 +2,14 @@ xlator_LTLIBRARIES = path-converter.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features -path_converter_la_LDFLAGS = -module -avoidversion +path_converter_la_LDFLAGS = -module -avoid-version path_converter_la_SOURCES = path.c path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h new file mode 100644 index 000000000..77ada8d53 --- /dev/null +++ b/xlators/features/path-convertor/src/path-mem-types.h @@ -0,0 +1,22 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __PATH_MEM_TYPES_H__ +#define __PATH_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_path_mem_types_ { + gf_path_mt_path_private_t = gf_common_mt_end + 1, + gf_path_mt_char, + gf_path_mt_regex_t, + gf_path_mt_end +}; +#endif + diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c index 3589e1485..5c52e0a8d 100644 --- a/xlators/features/path-convertor/src/path.c +++ b/xlators/features/path-convertor/src/path.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ /* TODO: add gf_log to all the cases returning errors */ #ifndef _CONFIG_H @@ -35,6 +25,7 @@ #include <errno.h> #include "glusterfs.h" #include "xlator.h" +#include "path-mem-types.h" typedef struct path_private { @@ -51,7 +42,7 @@ static char * name_this_to_that (xlator_t *xl, const char *path, const char *name) { path_private_t *priv = xl->private; - char priv_path[ZR_PATH_MAX] = {0,}; + char priv_path[PATH_MAX] = {0,}; char *tmp_name = NULL; int32_t path_len = strlen (path); int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN; @@ -63,7 +54,9 @@ name_this_to_that (xlator_t *xl, const char *path, const char *name) if (priv->end_off && (total_len > priv->end_off)) { j = priv->start_off; - tmp_name = CALLOC (1, (total_len + ZR_FILE_CONTENT_STRLEN)); + tmp_name = GF_CALLOC (1, (total_len + + ZR_FILE_CONTENT_STRLEN), + gf_path_mt_char); ERR_ABORT (tmp_name); /* Get the complete path for the file first */ @@ -104,7 +97,7 @@ path_this_to_that (xlator_t *xl, const char *path) int32_t i = 0, j = 0; if (priv->end_off && (path_len > priv->start_off)) { - priv_path = CALLOC (1, path_len); + priv_path = GF_CALLOC (1, path_len, gf_path_mt_char); ERR_ABORT (priv_path); if (priv->start_off && (path_len > priv->start_off)) @@ -378,7 +371,7 @@ path_lookup (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -405,7 +398,7 @@ path_stat (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -434,7 +427,7 @@ path_readlink (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -465,7 +458,7 @@ path_mknod (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -494,7 +487,7 @@ path_mkdir (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -521,7 +514,7 @@ path_unlink (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -548,7 +541,7 @@ path_rmdir (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -577,7 +570,7 @@ path_symlink (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -615,11 +608,11 @@ path_rename (call_frame_t *frame, oldloc->path = oldloc_path; if (tmp_oldloc_path != oldloc_path) - FREE (tmp_oldloc_path); + GF_FREE (tmp_oldloc_path); newloc->path = newloc_path; if (tmp_newloc_path != newloc_path) - FREE (tmp_newloc_path); + GF_FREE (tmp_newloc_path); return 0; } @@ -657,11 +650,11 @@ path_link (call_frame_t *frame, oldloc->path = oldloc_path; if (tmp_oldloc_path != oldloc_path) - FREE (tmp_oldloc_path); + GF_FREE (tmp_oldloc_path); newloc->path = newloc_path; if (tmp_newloc_path != newloc_path) - FREE (tmp_newloc_path); + GF_FREE (tmp_newloc_path); return 0; } @@ -704,7 +697,7 @@ path_setattr (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -734,7 +727,7 @@ path_truncate (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -768,7 +761,7 @@ path_open (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -801,7 +794,7 @@ path_create (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -843,10 +836,9 @@ path_setxattr (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); - if (tmp_name) - FREE (tmp_name); + GF_FREE (tmp_name); return 0; } @@ -880,10 +872,10 @@ path_getxattr (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); if (tmp_name != name) - FREE (tmp_name); + GF_FREE (tmp_name); return 0; } @@ -917,10 +909,10 @@ path_removexattr (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); if (tmp_name != name) - FREE (tmp_name); + GF_FREE (tmp_name); return 0; } @@ -949,7 +941,7 @@ path_opendir (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -978,7 +970,7 @@ path_access (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -1020,7 +1012,7 @@ path_checksum (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -1047,14 +1039,14 @@ path_entrylk (call_frame_t *frame, xlator_t *this, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } int32_t path_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *lock) + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) { char *loc_path = (char *)loc->path; char *tmp_path = NULL; @@ -1073,7 +1065,7 @@ path_inodelk (call_frame_t *frame, xlator_t *this, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } @@ -1105,11 +1097,29 @@ path_xattrop (call_frame_t *frame, loc->path = loc_path; if (tmp_path != loc_path) - FREE (tmp_path); + GF_FREE (tmp_path); return 0; } +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_path_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} int32_t init (xlator_t *this) @@ -1128,7 +1138,7 @@ init (xlator_t *this) "dangling volume. check volfile "); } - priv = CALLOC (1, sizeof (*priv)); + priv = GF_CALLOC (1, sizeof (*priv), gf_path_mt_path_private_t); ERR_ABORT (priv); if (dict_get (options, "start-offset")) { priv->start_off = data_to_int32 (dict_get (options, @@ -1141,7 +1151,8 @@ init (xlator_t *this) if (dict_get (options, "regex")) { int32_t ret = 0; - priv->preg = CALLOC (1, sizeof (regex_t)); + priv->preg = GF_CALLOC (1, sizeof (regex_t), + gf_path_mt_regex_t); ERR_ABORT (priv->preg); ret = regcomp (priv->preg, data_to_str (dict_get (options, "regex")), @@ -1149,7 +1160,7 @@ init (xlator_t *this) if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to compile the 'option regex'"); - FREE (priv); + GF_FREE (priv); return -1; } if (dict_get (options, "replace-with")) { @@ -1196,11 +1207,6 @@ struct xlator_fops fops = { .setattr = path_setattr, }; - -struct xlator_mops mops = { -}; - - struct xlator_cbks cbks = { }; diff --git a/xlators/features/protect/Makefile.am b/xlators/features/protect/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/protect/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/protect/src/Makefile.am b/xlators/features/protect/src/Makefile.am new file mode 100644 index 000000000..7eb93f32e --- /dev/null +++ b/xlators/features/protect/src/Makefile.am @@ -0,0 +1,21 @@ +xlator_LTLIBRARIES = prot_dht.la prot_client.la prot_server.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +prot_dht_la_LDFLAGS = -module -avoidversion +prot_dht_la_SOURCES = prot_dht.c +prot_dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +prot_client_la_LDFLAGS = -module -avoidversion +prot_client_la_SOURCES = prot_client.c +prot_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +prot_server_la_LDFLAGS = -module -avoidversion +prot_server_la_SOURCES = prot_server.c +prot_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/protect/src/prot_client.c b/xlators/features/protect/src/prot_client.c new file mode 100644 index 000000000..a27216d0a --- /dev/null +++ b/xlators/features/protect/src/prot_client.c @@ -0,0 +1,215 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" + +#include <execinfo.h> + +#define NUM_FRAMES 20 + +static char PROTECT_KEY[] = "trusted.glusterfs.protect"; + +enum { + PROT_ACT_NONE = 0, + PROT_ACT_LOG, + PROT_ACT_REJECT, +}; + +void +pcli_print_trace (char *name, call_frame_t *frame) +{ + void *frames[NUM_FRAMES]; + char **symbols; + int size; + int i; + + gf_log (name, GF_LOG_INFO, "Translator stack:"); + while (frame) { + gf_log (name, GF_LOG_INFO, "%s (%s)", + frame->wind_from, frame->this->name); + frame = frame->next; + } + + size = backtrace(frames,NUM_FRAMES); + if (size <= 0) { + return; + } + symbols = backtrace_symbols(frames,size); + if (!symbols) { + return; + } + + gf_log(name, GF_LOG_INFO, "Processor stack:"); + for (i = 0; i < size; ++i) { + gf_log (name, GF_LOG_INFO, "%s", symbols[i]); + } + free(symbols); +} + +int32_t +pcli_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + uint64_t value; + + if (newloc->parent == oldloc->parent) { + gf_log (this->name, GF_LOG_DEBUG, "rename in same directory"); + goto simple_unwind; + } + if (!oldloc->parent) { + goto simple_unwind; + } + if (inode_ctx_get(oldloc->parent,this,&value) != 0) { + goto simple_unwind; + } + + if (value != PROT_ACT_NONE) { + gf_log (this->name, GF_LOG_WARNING, + "got rename for protected %s", oldloc->path); + pcli_print_trace(this->name,frame->next); + if (value == PROT_ACT_REJECT) { + STACK_UNWIND_STRICT (rename, frame, -1, EPERM, + NULL, NULL, NULL, NULL, NULL, + xdata); + return 0; + } + } + +simple_unwind: + STACK_WIND_TAIL (frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, + xdata); + return 0; +} + +int32_t +pcli_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + data_t *data; + uint64_t value; + + /* + * We can't use dict_get_str and strcmp here, because the value comes + * directly from the user and might not be NUL-terminated (it would + * be if we had set it ourselves. + */ + + data = dict_get(dict,PROTECT_KEY); + if (!data) { + goto simple_wind; + } + + if (dict->count > 1) { + gf_log (this->name, GF_LOG_WARNING, + "attempted to mix %s with other keys", PROTECT_KEY); + goto simple_wind; + } + + gf_log (this->name, GF_LOG_DEBUG, "got %s request", PROTECT_KEY); + if (!strncmp(data->data,"log",data->len)) { + gf_log (this->name, GF_LOG_DEBUG, + "logging removals on %s", loc->path); + value = PROT_ACT_LOG; + } + else if (!strncmp(data->data,"reject",data->len)) { + gf_log (this->name, GF_LOG_DEBUG, + "rejecting removals on %s", loc->path); + value = PROT_ACT_REJECT; + } + else { + gf_log (this->name, GF_LOG_DEBUG, + "removing protection on %s", loc->path); + value = PROT_ACT_NONE; + } + /* Right now the value doesn't matter - just the presence. */ + if (inode_ctx_set(loc->inode,this,&value) != 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set protection status for %s", loc->path); + } + STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL); + return 0; + +simple_wind: + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; +} + +int32_t +pcli_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + uint64_t value; + + if (!loc->parent || (inode_ctx_get(loc->parent,this,&value) != 0)) { + goto simple_unwind; + } + + if (value != PROT_ACT_NONE) { + gf_log (this->name, GF_LOG_WARNING, + "got unlink for protected %s", loc->path); + pcli_print_trace(this->name,frame->next); + if (value == PROT_ACT_REJECT) { + STACK_UNWIND_STRICT (unlink, frame, -1, EPERM, + NULL, NULL, NULL); + return 0; + } + } + +simple_unwind: + STACK_WIND_TAIL (frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +init (xlator_t *this) +{ + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + + +struct xlator_fops fops = { + .rename = pcli_rename, + .setxattr = pcli_setxattr, + .unlink = pcli_unlink, +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/protect/src/prot_dht.c b/xlators/features/protect/src/prot_dht.c new file mode 100644 index 000000000..feec6ffd6 --- /dev/null +++ b/xlators/features/protect/src/prot_dht.c @@ -0,0 +1,168 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" + +enum gf_pdht_mem_types_ { + gf_pdht_mt_coord_t = gf_common_mt_end + 1, + gf_pdht_mt_end +}; + +typedef struct { + pthread_mutex_t lock; + uint16_t refs; + int32_t op_ret; + int32_t op_errno; + dict_t *xdata; +} pdht_coord_t; + +static char PROTECT_KEY[] = "trusted.glusterfs.protect"; + +void +pdht_unref_and_unlock (call_frame_t *frame, xlator_t *this, + pdht_coord_t *coord) +{ + gf_boolean_t should_unwind; + + should_unwind = (--(coord->refs) == 0); + pthread_mutex_unlock(&coord->lock); + + if (should_unwind) { + STACK_UNWIND_STRICT (setxattr, frame, + coord->op_ret, coord->op_errno, + coord->xdata); + if (coord->xdata) { + dict_unref(coord->xdata); + } + GF_FREE(coord); + } +} + +int32_t +pdht_recurse_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + pdht_coord_t *coord = cookie; + + pthread_mutex_lock(&coord->lock); + if (op_ret) { + coord->op_ret = op_ret; + coord->op_errno = op_errno; + } + if (xdata) { + if (coord->xdata) { + dict_unref(coord->xdata); + } + coord->xdata = dict_ref(xdata); + } + pdht_unref_and_unlock(frame,this,coord); + + return 0; +} + +void +pdht_recurse (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata, xlator_t *xl, pdht_coord_t *coord) +{ + xlator_list_t *iter; + + if (!strcmp(xl->type,"features/prot_client")) { + pthread_mutex_lock(&coord->lock); + ++(coord->refs); + pthread_mutex_unlock(&coord->lock); + STACK_WIND_COOKIE (frame, pdht_recurse_cbk, coord, xl, + xl->fops->setxattr, loc, dict, flags, xdata); + } + + else for (iter = xl->children; iter; iter = iter->next) { + pdht_recurse (frame, this, loc, dict, flags, xdata, + iter->xlator, coord); + } +} + +int32_t +pdht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + pdht_coord_t *coord; + + if (!dict_get(dict,PROTECT_KEY)) { + goto simple_wind; + } + + if (dict->count > 1) { + gf_log (this->name, GF_LOG_WARNING, + "attempted to mix %s with other keys", PROTECT_KEY); + goto simple_wind; + } + + coord = GF_CALLOC(1,sizeof(*coord),gf_pdht_mt_coord_t); + if (!coord) { + gf_log (this->name, GF_LOG_WARNING, "allocation failed"); + goto simple_wind; + } + + pthread_mutex_init(&coord->lock,NULL); + coord->refs = 1; + coord->op_ret = 0; + coord->xdata = NULL; + + pdht_recurse(frame,this,loc,dict,flags,xdata,this,coord); + pthread_mutex_lock(&coord->lock); + pdht_unref_and_unlock(frame,this,coord); + + return 0; + +simple_wind: + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; +} + +int32_t +init (xlator_t *this) +{ + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + +struct xlator_fops fops = { + .setxattr = pdht_setxattr, +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/protect/src/prot_server.c b/xlators/features/protect/src/prot_server.c new file mode 100644 index 000000000..beaee0889 --- /dev/null +++ b/xlators/features/protect/src/prot_server.c @@ -0,0 +1,51 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" + +int32_t +init (xlator_t *this) +{ + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + + +struct xlator_fops fops = { +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/qemu-block/Makefile.am b/xlators/features/qemu-block/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/features/qemu-block/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/features/qemu-block/src/Makefile.am b/xlators/features/qemu-block/src/Makefile.am new file mode 100644 index 000000000..08a7b62a0 --- /dev/null +++ b/xlators/features/qemu-block/src/Makefile.am @@ -0,0 +1,155 @@ +if ENABLE_QEMU_BLOCK +xlator_LTLIBRARIES = qemu-block.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +qemu_block_la_LDFLAGS = -module -avoid-version +qemu_block_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GLIB_LIBS) -lz -lrt + +qemu_block_la_SOURCES_qemu = \ + $(CONTRIBDIR)/qemu/qemu-coroutine.c \ + $(CONTRIBDIR)/qemu/qemu-coroutine-lock.c \ + $(CONTRIBDIR)/qemu/qemu-coroutine-sleep.c \ + $(CONTRIBDIR)/qemu/coroutine-ucontext.c \ + $(CONTRIBDIR)/qemu/block.c \ + $(CONTRIBDIR)/qemu/nop-symbols.c + +qemu_block_la_SOURCES_qemu_util = \ + $(CONTRIBDIR)/qemu/util/aes.c \ + $(CONTRIBDIR)/qemu/util/bitmap.c \ + $(CONTRIBDIR)/qemu/util/bitops.c \ + $(CONTRIBDIR)/qemu/util/cutils.c \ + $(CONTRIBDIR)/qemu/util/error.c \ + $(CONTRIBDIR)/qemu/util/hbitmap.c \ + $(CONTRIBDIR)/qemu/util/iov.c \ + $(CONTRIBDIR)/qemu/util/module.c \ + $(CONTRIBDIR)/qemu/util/oslib-posix.c \ + $(CONTRIBDIR)/qemu/util/qemu-option.c \ + $(CONTRIBDIR)/qemu/util/qemu-error.c \ + $(CONTRIBDIR)/qemu/util/qemu-thread-posix.c \ + $(CONTRIBDIR)/qemu/util/unicode.c \ + $(CONTRIBDIR)/qemu/util/hexdump.c + +qemu_block_la_SOURCES_qemu_block = \ + $(CONTRIBDIR)/qemu/block/snapshot.c \ + $(CONTRIBDIR)/qemu/block/qcow2-cache.c \ + $(CONTRIBDIR)/qemu/block/qcow2-cluster.c \ + $(CONTRIBDIR)/qemu/block/qcow2-refcount.c \ + $(CONTRIBDIR)/qemu/block/qcow2-snapshot.c \ + $(CONTRIBDIR)/qemu/block/qcow2.c \ + $(CONTRIBDIR)/qemu/block/qed-check.c \ + $(CONTRIBDIR)/qemu/block/qed-cluster.c \ + $(CONTRIBDIR)/qemu/block/qed-gencb.c \ + $(CONTRIBDIR)/qemu/block/qed-l2-cache.c \ + $(CONTRIBDIR)/qemu/block/qed-table.c \ + $(CONTRIBDIR)/qemu/block/qed.c + +qemu_block_la_SOURCES_qemu_qobject = \ + $(CONTRIBDIR)/qemu/qobject/json-lexer.c \ + $(CONTRIBDIR)/qemu/qobject/json-parser.c \ + $(CONTRIBDIR)/qemu/qobject/json-streamer.c \ + $(CONTRIBDIR)/qemu/qobject/qbool.c \ + $(CONTRIBDIR)/qemu/qobject/qdict.c \ + $(CONTRIBDIR)/qemu/qobject/qerror.c \ + $(CONTRIBDIR)/qemu/qobject/qfloat.c \ + $(CONTRIBDIR)/qemu/qobject/qint.c \ + $(CONTRIBDIR)/qemu/qobject/qjson.c \ + $(CONTRIBDIR)/qemu/qobject/qlist.c \ + $(CONTRIBDIR)/qemu/qobject/qstring.c + +qemu_block_la_SOURCES = \ + $(qemu_block_la_SOURCES_qemu) \ + $(qemu_block_la_SOURCES_qemu_util) \ + $(qemu_block_la_SOURCES_qemu_block) \ + $(qemu_block_la_SOURCES_qemu_qobject) \ + bdrv-xlator.c \ + coroutine-synctask.c \ + bh-syncop.c \ + monitor-logging.c \ + clock-timer.c \ + qemu-block.c \ + qb-coroutines.c + +noinst_HEADERS_qemu = \ + $(CONTRIBDIR)/qemu/config-host.h \ + $(CONTRIBDIR)/qemu/qapi-types.h \ + $(CONTRIBDIR)/qemu/qmp-commands.h \ + $(CONTRIBDIR)/qemu/trace/generated-tracers.h \ + $(CONTRIBDIR)/qemu/include/config.h \ + $(CONTRIBDIR)/qemu/include/glib-compat.h \ + $(CONTRIBDIR)/qemu/include/qemu-common.h \ + $(CONTRIBDIR)/qemu/include/trace.h \ + $(CONTRIBDIR)/qemu/include/block/coroutine.h \ + $(CONTRIBDIR)/qemu/include/block/aio.h \ + $(CONTRIBDIR)/qemu/include/block/block.h \ + $(CONTRIBDIR)/qemu/include/block/block_int.h \ + $(CONTRIBDIR)/qemu/include/block/blockjob.h \ + $(CONTRIBDIR)/qemu/include/block/coroutine.h \ + $(CONTRIBDIR)/qemu/include/block/coroutine_int.h \ + $(CONTRIBDIR)/qemu/include/block/snapshot.h \ + $(CONTRIBDIR)/qemu/include/exec/cpu-common.h \ + $(CONTRIBDIR)/qemu/include/exec/hwaddr.h \ + $(CONTRIBDIR)/qemu/include/exec/poison.h \ + $(CONTRIBDIR)/qemu/include/fpu/softfloat.h \ + $(CONTRIBDIR)/qemu/include/migration/migration.h \ + $(CONTRIBDIR)/qemu/include/migration/qemu-file.h \ + $(CONTRIBDIR)/qemu/include/migration/vmstate.h \ + $(CONTRIBDIR)/qemu/include/monitor/monitor.h \ + $(CONTRIBDIR)/qemu/include/monitor/readline.h \ + $(CONTRIBDIR)/qemu/include/qapi/error.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/json-lexer.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/json-parser.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/json-streamer.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qbool.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qdict.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qerror.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qfloat.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qint.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qjson.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qlist.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qobject.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qstring.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/types.h \ + $(CONTRIBDIR)/qemu/include/qemu/aes.h \ + $(CONTRIBDIR)/qemu/include/qemu/atomic.h \ + $(CONTRIBDIR)/qemu/include/qemu/bitmap.h \ + $(CONTRIBDIR)/qemu/include/qemu/bitops.h \ + $(CONTRIBDIR)/qemu/include/qemu/bswap.h \ + $(CONTRIBDIR)/qemu/include/qemu/compiler.h \ + $(CONTRIBDIR)/qemu/include/qemu/error-report.h \ + $(CONTRIBDIR)/qemu/include/qemu/event_notifier.h \ + $(CONTRIBDIR)/qemu/include/qemu/hbitmap.h \ + $(CONTRIBDIR)/qemu/include/qemu/host-utils.h \ + $(CONTRIBDIR)/qemu/include/qemu/iov.h \ + $(CONTRIBDIR)/qemu/include/qemu/main-loop.h \ + $(CONTRIBDIR)/qemu/include/qemu/module.h \ + $(CONTRIBDIR)/qemu/include/qemu/notify.h \ + $(CONTRIBDIR)/qemu/include/qemu/option.h \ + $(CONTRIBDIR)/qemu/include/qemu/option_int.h \ + $(CONTRIBDIR)/qemu/include/qemu/osdep.h \ + $(CONTRIBDIR)/qemu/include/qemu/queue.h \ + $(CONTRIBDIR)/qemu/include/qemu/sockets.h \ + $(CONTRIBDIR)/qemu/include/qemu/thread-posix.h \ + $(CONTRIBDIR)/qemu/include/qemu/thread.h \ + $(CONTRIBDIR)/qemu/include/qemu/timer.h \ + $(CONTRIBDIR)/qemu/include/qemu/typedefs.h \ + $(CONTRIBDIR)/qemu/include/sysemu/sysemu.h \ + $(CONTRIBDIR)/qemu/include/sysemu/os-posix.h \ + $(CONTRIBDIR)/qemu/block/qcow2.h \ + $(CONTRIBDIR)/qemu/block/qed.h + +noinst_HEADERS = \ + $(noinst_HEADERS_qemu) \ + qemu-block.h \ + qemu-block-memory-types.h \ + qb-coroutines.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/qemu \ + -I$(CONTRIBDIR)/qemu/include \ + -DGLUSTER_XLATOR + +AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) $(GLIB_CFLAGS) + +CLEANFILES = + +endif diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c new file mode 100644 index 000000000..106c59775 --- /dev/null +++ b/xlators/features/qemu-block/src/bdrv-xlator.c @@ -0,0 +1,397 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "inode.h" +#include "syncop.h" +#include "qemu-block.h" +#include "block/block_int.h" + +typedef struct BDRVGlusterState { + inode_t *inode; +} BDRVGlusterState; + +static QemuOptsList runtime_opts = { + .name = "gluster", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "filename", + .type = QEMU_OPT_STRING, + .help = "GFID of file", + }, + { /* end of list */ } + }, +}; + +inode_t * +qb_inode_from_filename (const char *filename) +{ + const char *iptr = NULL; + inode_t *inode = NULL; + + iptr = filename + 17; + sscanf (iptr, "%p", &inode); + + return inode; +} + + +int +qb_inode_to_filename (inode_t *inode, char *filename, int size) +{ + return snprintf (filename, size, "gluster://inodep:%p", inode); +} + + +static fd_t * +fd_from_bs (BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + return fd_anonymous (s->inode); +} + + +static int +qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags) +{ + inode_t *inode = NULL; + BDRVGlusterState *s = bs->opaque; + QemuOpts *opts = NULL; + Error *local_err = NULL; + const char *filename = NULL; + char gfid_str[128]; + int ret; + qb_conf_t *conf = THIS->private; + + opts = qemu_opts_create_nofail(&runtime_opts); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (error_is_set(&local_err)) { + qerror_report_err(local_err); + error_free(local_err); + return -EINVAL; + } + + filename = qemu_opt_get(opts, "filename"); + + /* + * gfid:<gfid> format means we're opening a backing image. + */ + ret = sscanf(filename, "gluster://gfid:%s", gfid_str); + if (ret) { + loc_t loc = {0,}; + struct iatt buf = {0,}; + uuid_t gfid; + + uuid_parse(gfid_str, gfid); + + loc.inode = inode_find(conf->root_inode->table, gfid); + if (!loc.inode) { + loc.inode = inode_new(conf->root_inode->table); + uuid_copy(loc.inode->gfid, gfid); + } + + uuid_copy(loc.gfid, loc.inode->gfid); + ret = syncop_lookup(FIRST_CHILD(THIS), &loc, NULL, &buf, NULL, + NULL); + if (ret) { + loc_wipe(&loc); + return -errno; + } + + s->inode = inode_ref(loc.inode); + loc_wipe(&loc); + } else { + inode = qb_inode_from_filename (filename); + if (!inode) + return -EINVAL; + + s->inode = inode_ref(inode); + } + + return 0; +} + + +static int +qemu_gluster_create (const char *filename, QEMUOptionParameter *options) +{ + uint64_t total_size = 0; + inode_t *inode = NULL; + fd_t *fd = NULL; + struct iatt stat = {0, }; + int ret = 0; + + inode = qb_inode_from_filename (filename); + if (!inode) + return -EINVAL; + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = fd_anonymous (inode); + if (!fd) + return -ENOMEM; + + ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat); + if (ret) { + fd_unref (fd); + return -errno; + } + + if (stat.ia_size) { + /* format ONLY if the filesize is 0 bytes */ + fd_unref (fd); + return -EFBIG; + } + + if (total_size) { + ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size); + if (ret) { + fd_unref (fd); + return -errno; + } + } + + fd_unref (fd); + return 0; +} + + +static int +qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + fd_t *fd = NULL; + off_t offset = 0; + size_t size = 0; + struct iovec *iov = NULL; + int count = 0; + struct iobref *iobref = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + if (!fd) + return -EIO; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + + ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0, + &iov, &count, &iobref); + if (ret < 0) { + ret = -errno; + goto out; + } + + iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */ + +out: + GF_FREE (iov); + if (iobref) + iobref_unref (iobref); + fd_unref (fd); + return ret; +} + + +static int +qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + fd_t *fd = NULL; + off_t offset = 0; + size_t size = 0; + struct iobref *iobref = NULL; + struct iobuf *iobuf = NULL; + struct iovec iov = {0, }; + int ret = -ENOMEM; + + fd = fd_from_bs (bs); + if (!fd) + return -EIO; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + + iobuf = iobuf_get2 (THIS->ctx->iobuf_pool, size); + if (!iobuf) + goto out; + + iobref = iobref_new (); + if (!iobref) { + iobuf_unref (iobuf); + goto out; + } + + iobref_add (iobref, iobuf); + + iov_unload (iobuf_ptr (iobuf), qiov->iov, qiov->niov); /* *choke!* */ + + iov.iov_base = iobuf_ptr (iobuf); + iov.iov_len = size; + + ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0); + if (ret < 0) + ret = -errno; + +out: + if (iobuf) + iobuf_unref (iobuf); + if (iobref) + iobref_unref (iobref); + fd_unref (fd); + return ret; +} + + +static int +qemu_gluster_co_flush (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + + ret = syncop_flush (FIRST_CHILD(THIS), fd); + + fd_unref (fd); + + return ret; +} + + +static int +qemu_gluster_co_fsync (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + + ret = syncop_fsync (FIRST_CHILD(THIS), fd, 0); + + fd_unref (fd); + + return ret; +} + + +static int +qemu_gluster_truncate (BlockDriverState *bs, int64_t offset) +{ + fd_t *fd = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + + ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, offset); + + fd_unref (fd); + + if (ret < 0) + return ret; + + return ret; +} + + +static int64_t +qemu_gluster_getlength (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + struct iatt iatt = {0, }; + + fd = fd_from_bs (bs); + + ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt); + if (ret < 0) + return -1; + + return iatt.ia_size; +} + + +static int64_t +qemu_gluster_allocated_file_size (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + struct iatt iatt = {0, }; + + fd = fd_from_bs (bs); + + ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt); + if (ret < 0) + return -1; + + return iatt.ia_blocks * 512; +} + + +static void +qemu_gluster_close (BlockDriverState *bs) +{ + BDRVGlusterState *s = NULL; + + s = bs->opaque; + + inode_unref (s->inode); + + return; +} + + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_os = qemu_gluster_co_flush, + .bdrv_co_flush_to_disk = qemu_gluster_co_fsync, + .bdrv_truncate = qemu_gluster_truncate, + .create_options = qemu_gluster_create_options, +}; + + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster); +} + + +block_init(bdrv_gluster_init); diff --git a/xlators/features/qemu-block/src/bh-syncop.c b/xlators/features/qemu-block/src/bh-syncop.c new file mode 100644 index 000000000..e8686f6d4 --- /dev/null +++ b/xlators/features/qemu-block/src/bh-syncop.c @@ -0,0 +1,48 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "syncop.h" +#include "qemu-block-memory-types.h" + +#include "block/aio.h" + +void +qemu_bh_schedule (QEMUBH *bh) +{ + return; +} + +void +qemu_bh_cancel (QEMUBH *bh) +{ + return; +} + +void +qemu_bh_delete (QEMUBH *bh) +{ + +} + +QEMUBH * +qemu_bh_new (QEMUBHFunc *cb, void *opaque) +{ + return NULL; +} diff --git a/xlators/features/qemu-block/src/clock-timer.c b/xlators/features/qemu-block/src/clock-timer.c new file mode 100644 index 000000000..fcbec6ad1 --- /dev/null +++ b/xlators/features/qemu-block/src/clock-timer.c @@ -0,0 +1,60 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "syncop.h" +#include "qemu-block-memory-types.h" + +#include "qemu/timer.h" + +QEMUClock *vm_clock; +int use_rt_clock = 0; + +QEMUTimer *qemu_new_timer (QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque) +{ + return NULL; +} + +int64_t qemu_get_clock_ns (QEMUClock *clock) +{ + return 0; +} + +void qemu_mod_timer (QEMUTimer *ts, int64_t expire_time) +{ + return; +} + +void qemu_free_timer (QEMUTimer *ts) +{ + +} + +void qemu_del_timer (QEMUTimer *ts) +{ + +} + +bool qemu_aio_wait() +{ + synctask_wake (synctask_get()); + synctask_yield (synctask_get()); + return 0; +} diff --git a/xlators/features/qemu-block/src/coroutine-synctask.c b/xlators/features/qemu-block/src/coroutine-synctask.c new file mode 100644 index 000000000..e43988a95 --- /dev/null +++ b/xlators/features/qemu-block/src/coroutine-synctask.c @@ -0,0 +1,116 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "syncop.h" +#include "qemu-block-memory-types.h" + +#include "qemu-block.h" + +/* + * This code serves as the bridge from the main glusterfs context to the qemu + * coroutine context via synctask. We create a single threaded syncenv with a + * single synctask responsible for processing a queue of coroutines. The qemu + * code invoked from within the synctask function handlers uses the ucontext + * coroutine implementation and scheduling logic internal to qemu. This + * effectively donates a thread of execution to qemu and its internal coroutine + * management. + * + * NOTE: The existence of concurrent synctasks has proven quite racy with regard + * to qemu coroutine management, particularly related to the lifecycle + * differences with top-level synctasks and internally created coroutines and + * interactions with qemu-internal queues (and locks, in turn). We explicitly + * disallow this scenario, via the queue, until it is more well supported. + */ + +static struct { + struct list_head queue; + gf_lock_t lock; + struct synctask *task; +} qb_co; + +static void +init_qbco() +{ + INIT_LIST_HEAD(&qb_co.queue); + LOCK_INIT(&qb_co.lock); +} + +static int +synctask_nop_cbk (int ret, call_frame_t *frame, void *opaque) +{ + return 0; +} + +static int +qb_synctask_wrap (void *opaque) +{ + qb_local_t *qb_local, *tmp; + + LOCK(&qb_co.lock); + + while (!list_empty(&qb_co.queue)) { + list_for_each_entry_safe(qb_local, tmp, &qb_co.queue, list) { + list_del_init(&qb_local->list); + break; + } + + UNLOCK(&qb_co.lock); + + qb_local->synctask_fn(qb_local); + /* qb_local is now unwound and gone! */ + + LOCK(&qb_co.lock); + } + + qb_co.task = NULL; + + UNLOCK(&qb_co.lock); + + return 0; +} + +int +qb_coroutine (call_frame_t *frame, synctask_fn_t fn) +{ + qb_local_t *qb_local = NULL; + qb_conf_t *qb_conf = NULL; + static int init = 0; + + qb_local = frame->local; + qb_local->synctask_fn = fn; + qb_conf = frame->this->private; + + if (!init) { + init = 1; + init_qbco(); + } + + LOCK(&qb_co.lock); + + if (!qb_co.task) + qb_co.task = synctask_create(qb_conf->env, qb_synctask_wrap, + synctask_nop_cbk, frame, NULL); + + list_add_tail(&qb_local->list, &qb_co.queue); + + UNLOCK(&qb_co.lock); + + return 0; +} diff --git a/xlators/features/qemu-block/src/monitor-logging.c b/xlators/features/qemu-block/src/monitor-logging.c new file mode 100644 index 000000000..d37c37f0f --- /dev/null +++ b/xlators/features/qemu-block/src/monitor-logging.c @@ -0,0 +1,50 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "qemu-block-memory-types.h" + +#include "block/block_int.h" + +Monitor *cur_mon; + +int +monitor_cur_is_qmp() +{ + /* No QMP support here */ + return 0; +} + +void +monitor_set_error (Monitor *mon, QError *qerror) +{ + /* NOP here */ + return; +} + + +void +monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) +{ + char buf[4096]; + + vsnprintf(buf, sizeof(buf), fmt, ap); + + gf_log (THIS->name, GF_LOG_ERROR, "%s", buf); +} diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c new file mode 100644 index 000000000..7c52adb21 --- /dev/null +++ b/xlators/features/qemu-block/src/qb-coroutines.c @@ -0,0 +1,662 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "inode.h" +#include "call-stub.h" +#include "defaults.h" +#include "qemu-block-memory-types.h" +#include "qemu-block.h" +#include "qb-coroutines.h" + + +int +qb_format_and_resume (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + char filename[64]; + char base_filename[128]; + int use_base = 0; + qb_inode_t *qb_inode = NULL; + Error *local_err = NULL; + fd_t *fd = NULL; + dict_t *xattr = NULL; + qb_conf_t *qb_conf = NULL; + int ret = -1; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + qb_conf = frame->this->private; + + qb_inode_to_filename (inode, filename, 64); + + qb_inode = qb_inode_ctx_get (frame->this, inode); + + /* + * See if the caller specified a backing image. + */ + if (!uuid_is_null(qb_inode->backing_gfid) || qb_inode->backing_fname) { + loc_t loc = {0,}; + char gfid_str[64]; + struct iatt buf; + + if (!uuid_is_null(qb_inode->backing_gfid)) { + loc.inode = inode_find(qb_conf->root_inode->table, + qb_inode->backing_gfid); + if (!loc.inode) { + loc.inode = inode_new(qb_conf->root_inode->table); + uuid_copy(loc.inode->gfid, + qb_inode->backing_gfid); + } + uuid_copy(loc.gfid, loc.inode->gfid); + } else if (qb_inode->backing_fname) { + loc.inode = inode_new(qb_conf->root_inode->table); + loc.name = qb_inode->backing_fname; + loc.parent = inode_parent(inode, NULL, NULL); + loc_path(&loc, loc.name); + } + + /* + * Lookup the backing image. Verify existence and/or get the + * gfid if we don't already have it. + */ + ret = syncop_lookup(FIRST_CHILD(frame->this), &loc, NULL, &buf, + NULL, NULL); + GF_FREE(qb_inode->backing_fname); + if (ret) { + loc_wipe(&loc); + ret = errno; + goto err; + } + + uuid_copy(qb_inode->backing_gfid, buf.ia_gfid); + loc_wipe(&loc); + + /* + * We pass the filename of the backing image into the qemu block + * subsystem as the associated gfid. This is embedded into the + * clone image and passed along to the gluster bdrv backend when + * the block subsystem needs to operate on the backing image on + * behalf of the clone. + */ + uuid_unparse(qb_inode->backing_gfid, gfid_str); + snprintf(base_filename, sizeof(base_filename), + "gluster://gfid:%s", gfid_str); + use_base = 1; + } + + bdrv_img_create (filename, qb_inode->fmt, + use_base ? base_filename : NULL, 0, 0, qb_inode->size, + 0, &local_err, true); + + if (error_is_set (&local_err)) { + gf_log (frame->this->name, GF_LOG_ERROR, "%s", + error_get_pretty (local_err)); + error_free (local_err); + QB_STUB_UNWIND (stub, -1, EIO); + return 0; + } + + fd = fd_anonymous (inode); + if (!fd) { + gf_log (frame->this->name, GF_LOG_ERROR, + "could not create anonymous fd for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ENOMEM); + return 0; + } + + xattr = dict_new (); + if (!xattr) { + gf_log (frame->this->name, GF_LOG_ERROR, + "could not allocate xattr dict for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ENOMEM); + fd_unref (fd); + return 0; + } + + ret = dict_set_str (xattr, qb_conf->qb_xattr_key, local->fmt); + if (ret) { + gf_log (frame->this->name, GF_LOG_ERROR, + "could not dict_set for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ENOMEM); + fd_unref (fd); + dict_unref (xattr); + return 0; + } + + ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0); + if (ret) { + ret = errno; + gf_log (frame->this->name, GF_LOG_ERROR, + "failed to setxattr for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ret); + fd_unref (fd); + dict_unref (xattr); + return 0; + } + + fd_unref (fd); + dict_unref (xattr); + + QB_STUB_UNWIND (stub, 0, 0); + + return 0; + +err: + QB_STUB_UNWIND(stub, -1, ret); + return 0; +} + + +static BlockDriverState * +qb_bs_create (inode_t *inode, const char *fmt) +{ + char filename[64]; + BlockDriverState *bs = NULL; + BlockDriver *drv = NULL; + int op_errno = 0; + int ret = 0; + + bs = bdrv_new (uuid_utoa (inode->gfid)); + if (!bs) { + op_errno = ENOMEM; + gf_log (THIS->name, GF_LOG_ERROR, + "could not allocate @bdrv for gfid:%s", + uuid_utoa (inode->gfid)); + goto err; + } + + drv = bdrv_find_format (fmt); + if (!drv) { + op_errno = EINVAL; + gf_log (THIS->name, GF_LOG_ERROR, + "Unknown file format: %s for gfid:%s", + fmt, uuid_utoa (inode->gfid)); + goto err; + } + + qb_inode_to_filename (inode, filename, 64); + + ret = bdrv_open (bs, filename, NULL, BDRV_O_RDWR, drv); + if (ret < 0) { + op_errno = -ret; + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to bdrv_open() gfid:%s (%s)", + uuid_utoa (inode->gfid), strerror (op_errno)); + goto err; + } + + return bs; +err: + errno = op_errno; + return NULL; +} + + +int +qb_co_open (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + qb_inode->refcnt++; + + QB_STUB_RESUME (stub); + + return 0; +} + + +int +qb_co_writev (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + QEMUIOVector qiov = {0, }; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + qemu_iovec_init_external (&qiov, stub->args.vector, stub->args.count); + + ret = bdrv_pwritev (qb_inode->bs, stub->args.offset, &qiov); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_co_readv (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = {0, }; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + if (stub->args.offset >= qb_inode->size) { + QB_STUB_UNWIND (stub, 0, 0); + return 0; + } + + iobuf = iobuf_get2 (frame->this->ctx->iobuf_pool, stub->args.size); + if (!iobuf) { + QB_STUB_UNWIND (stub, -1, ENOMEM); + return 0; + } + + iobref = iobref_new (); + if (!iobref) { + QB_STUB_UNWIND (stub, -1, ENOMEM); + iobuf_unref (iobuf); + return 0; + } + + if (iobref_add (iobref, iobuf) < 0) { + iobuf_unref (iobuf); + iobref_unref (iobref); + QB_STUB_UNWIND (stub, -1, ENOMEM); + return 0; + } + + ret = bdrv_pread (qb_inode->bs, stub->args.offset, iobuf_ptr (iobuf), + stub->args.size); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + iobref_unref (iobref); + return 0; + } + + iov.iov_base = iobuf_ptr (iobuf); + iov.iov_len = ret; + + stub->args_cbk.vector = iov_dup (&iov, 1); + stub->args_cbk.count = 1; + stub->args_cbk.iobref = iobref; + + QB_STUB_UNWIND (stub, ret, 0); + + return 0; +} + + +int +qb_co_fsync (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + ret = bdrv_flush (qb_inode->bs); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +static void +qb_update_size_xattr (xlator_t *this, fd_t *fd, const char *fmt, off_t offset) +{ + char val[QB_XATTR_VAL_MAX]; + qb_conf_t *qb_conf = NULL; + dict_t *xattr = NULL; + + qb_conf = this->private; + + snprintf (val, QB_XATTR_VAL_MAX, "%s:%llu", + fmt, (long long unsigned) offset); + + xattr = dict_new (); + if (!xattr) + return; + + if (dict_set_str (xattr, qb_conf->qb_xattr_key, val) != 0) { + dict_unref (xattr); + return; + } + + syncop_fsetxattr (FIRST_CHILD(this), fd, xattr, 0); + dict_unref (xattr); +} + + +int +qb_co_truncate (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + off_t offset = 0; + xlator_t *this = NULL; + + this = THIS; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.prestat); + stub->args_cbk.prestat.ia_size = qb_inode->size; + + ret = bdrv_truncate (qb_inode->bs, stub->args.offset); + if (ret < 0) + goto out; + + offset = bdrv_getlength (qb_inode->bs); + + qb_inode->size = offset; + + syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.poststat); + stub->args_cbk.poststat.ia_size = qb_inode->size; + + qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size); + +out: + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_co_close (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + BlockDriverState *bs = NULL; + + local = opaque; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (THIS, inode); + + if (!--qb_inode->refcnt) { + bs = qb_inode->bs; + qb_inode->bs = NULL; + bdrv_delete (bs); + } + + frame = local->frame; + frame->local = NULL; + qb_local_free (THIS, local); + STACK_DESTROY (frame->root); + + return 0; +} + + +int +qb_snapshot_create (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + QEMUSnapshotInfo sn; + struct timeval tv = {0, }; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + memset (&sn, 0, sizeof (sn)); + pstrcpy (sn.name, sizeof(sn.name), local->name); + gettimeofday (&tv, NULL); + sn.date_sec = tv.tv_sec; + sn.date_nsec = tv.tv_usec * 1000; + + ret = bdrv_snapshot_create (qb_inode->bs, &sn); + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_snapshot_delete (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + ret = bdrv_snapshot_delete (qb_inode->bs, local->name); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_snapshot_goto (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + ret = bdrv_snapshot_goto (qb_inode->bs, local->name); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} diff --git a/xlators/features/qemu-block/src/qb-coroutines.h b/xlators/features/qemu-block/src/qb-coroutines.h new file mode 100644 index 000000000..583319f3b --- /dev/null +++ b/xlators/features/qemu-block/src/qb-coroutines.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QB_COROUTINES_H +#define __QB_COROUTINES_H + +#include "syncop.h" +#include "call-stub.h" +#include "block/block_int.h" +#include "monitor/monitor.h" + +int qb_format_and_resume (void *opaque); +int qb_snapshot_create (void *opaque); +int qb_snapshot_delete (void *opaque); +int qb_snapshot_goto (void *opaque); +int qb_co_open (void *opaque); +int qb_co_close (void *opaque); +int qb_co_writev (void *opaque); +int qb_co_readv (void *opaque); +int qb_co_fsync (void *opaque); +int qb_co_truncate (void *opaque); + +#endif /* __QB_COROUTINES_H */ diff --git a/xlators/features/qemu-block/src/qemu-block-memory-types.h b/xlators/features/qemu-block/src/qemu-block-memory-types.h new file mode 100644 index 000000000..267b3893f --- /dev/null +++ b/xlators/features/qemu-block/src/qemu-block-memory-types.h @@ -0,0 +1,25 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef __QB_MEM_TYPES_H__ +#define __QB_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_qb_mem_types_ { + gf_qb_mt_qb_conf_t = gf_common_mt_end + 1, + gf_qb_mt_qb_inode_t, + gf_qb_mt_qb_local_t, + gf_qb_mt_coroutinesynctask_t, + gf_qb_mt_end +}; +#endif + diff --git a/xlators/features/qemu-block/src/qemu-block.c b/xlators/features/qemu-block/src/qemu-block.c new file mode 100644 index 000000000..48bbf3140 --- /dev/null +++ b/xlators/features/qemu-block/src/qemu-block.c @@ -0,0 +1,1140 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "inode.h" +#include "call-stub.h" +#include "defaults.h" +#include "qemu-block-memory-types.h" +#include "qemu-block.h" +#include "qb-coroutines.h" + + +qb_inode_t * +__qb_inode_ctx_get (xlator_t *this, inode_t *inode) +{ + uint64_t value = 0; + qb_inode_t *qb_inode = NULL; + + __inode_ctx_get (inode, this, &value); + qb_inode = (qb_inode_t *)(unsigned long) value; + + return qb_inode; +} + + +qb_inode_t * +qb_inode_ctx_get (xlator_t *this, inode_t *inode) +{ + qb_inode_t *qb_inode = NULL; + + LOCK (&inode->lock); + { + qb_inode = __qb_inode_ctx_get (this, inode); + } + UNLOCK (&inode->lock); + + return qb_inode; +} + + +qb_inode_t * +qb_inode_ctx_del (xlator_t *this, inode_t *inode) +{ + uint64_t value = 0; + qb_inode_t *qb_inode = NULL; + + inode_ctx_del (inode, this, &value); + qb_inode = (qb_inode_t *)(unsigned long) value; + + return qb_inode; +} + + +int +qb_inode_cleanup (xlator_t *this, inode_t *inode, int warn) +{ + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_del (this, inode); + + if (!qb_inode) + return 0; + + if (warn) + gf_log (this->name, GF_LOG_WARNING, + "inode %s no longer block formatted", + uuid_utoa (inode->gfid)); + + /* free (qb_inode->bs); */ + + GF_FREE (qb_inode); + + return 0; +} + + +int +qb_iatt_fixup (xlator_t *this, inode_t *inode, struct iatt *iatt) +{ + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, inode); + if (!qb_inode) + return 0; + + iatt->ia_size = qb_inode->size; + + return 0; +} + + +int +qb_format_extract (xlator_t *this, char *format, inode_t *inode) +{ + char *s, *save; + uint64_t size = 0; + char fmt[QB_XATTR_VAL_MAX+1] = {0, }; + qb_inode_t *qb_inode = NULL; + char *formatstr = NULL; + uuid_t gfid = {0,}; + char gfid_str[64] = {0,}; + int ret; + + strncpy(fmt, format, QB_XATTR_VAL_MAX); + + s = strtok_r(fmt, ":", &save); + if (!s) + goto invalid; + formatstr = gf_strdup(s); + + s = strtok_r(NULL, ":", &save); + if (!s) + goto invalid; + if (gf_string2bytesize (s, &size)) + goto invalid; + if (!size) + goto invalid; + + s = strtok_r(NULL, "\0", &save); + if (s && !strncmp(s, "<gfid:", strlen("<gfid:"))) { + /* + * Check for valid gfid backing image specifier. + */ + if (strlen(s) + 1 > sizeof(gfid_str)) + goto invalid; + ret = sscanf(s, "<gfid:%[^>]s", gfid_str); + if (ret == 1) { + ret = uuid_parse(gfid_str, gfid); + if (ret < 0) + goto invalid; + } + } + + qb_inode = qb_inode_ctx_get (this, inode); + if (!qb_inode) + qb_inode = GF_CALLOC (1, sizeof (*qb_inode), + gf_qb_mt_qb_inode_t); + if (!qb_inode) { + GF_FREE(formatstr); + return ENOMEM; + } + + strncpy(qb_inode->fmt, formatstr, QB_XATTR_VAL_MAX); + qb_inode->size = size; + + /* + * If a backing gfid was not specified, interpret any remaining bytes + * associated with a backing image as a filename local to the parent + * directory. The format processing will validate further. + */ + if (!uuid_is_null(gfid)) + uuid_copy(qb_inode->backing_gfid, gfid); + else if (s) + qb_inode->backing_fname = gf_strdup(s); + + inode_ctx_set (inode, this, (void *)&qb_inode); + + GF_FREE(formatstr); + + return 0; + +invalid: + GF_FREE(formatstr); + + gf_log (this->name, GF_LOG_WARNING, + "invalid format '%s' in inode %s", format, + uuid_utoa (inode->gfid)); + return EINVAL; +} + + +void +qb_local_free (xlator_t *this, qb_local_t *local) +{ + if (local->inode) + inode_unref (local->inode); + if (local->fd) + fd_unref (local->fd); + GF_FREE (local); +} + + +int +qb_local_init (call_frame_t *frame) +{ + qb_local_t *qb_local = NULL; + + qb_local = GF_CALLOC (1, sizeof (*qb_local), gf_qb_mt_qb_local_t); + if (!qb_local) + return -1; + INIT_LIST_HEAD(&qb_local->list); + + qb_local->frame = frame; + frame->local = qb_local; + + return 0; +} + + +int +qb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *buf, + dict_t *xdata, struct iatt *postparent) +{ + char *format = NULL; + qb_conf_t *conf = NULL; + + conf = this->private; + + if (op_ret == -1) + goto out; + + /* + * Cache the root inode for dealing with backing images. The format + * coroutine and the gluster qemu backend driver both use the root inode + * table to verify and/or redirect I/O to the backing image via + * anonymous fd's. + */ + if (!conf->root_inode && __is_root_gfid(inode->gfid)) + conf->root_inode = inode_ref(inode); + + if (!xdata) + goto out; + + if (dict_get_str (xdata, conf->qb_xattr_key, &format)) + goto out; + + if (!format) { + qb_inode_cleanup (this, inode, 1); + goto out; + } + + op_errno = qb_format_extract (this, format, inode); + if (op_errno) + op_ret = -1; + + qb_iatt_fixup (this, inode, buf); +out: + QB_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); + return 0; +} + + +int +qb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + qb_conf_t *conf = NULL; + + conf = this->private; + + xdata = xdata ? dict_ref (xdata) : dict_new (); + + if (!xdata) + goto enomem; + + if (dict_set_int32 (xdata, conf->qb_xattr_key, 0)) + goto enomem; + + STACK_WIND (frame, qb_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref (xdata); + return 0; +enomem: + QB_STACK_UNWIND (lookup, frame, -1, ENOMEM, 0, 0, 0, 0); + if (xdata) + dict_unref (xdata); + return 0; +} + + +int +qb_setxattr_format (call_frame_t *frame, xlator_t *this, call_stub_t *stub, + dict_t *xattr, inode_t *inode) +{ + char *format = NULL; + int op_errno = 0; + qb_local_t *qb_local = NULL; + data_t *data = NULL; + qb_inode_t *qb_inode; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-format"))) { + QB_STUB_RESUME (stub); + return 0; + } + + format = alloca (data->len + 1); + memcpy (format, data->data, data->len); + format[data->len] = 0; + + op_errno = qb_format_extract (this, format, inode); + if (op_errno) { + QB_STUB_UNWIND (stub, -1, op_errno); + return 0; + } + qb_inode = qb_inode_ctx_get(this, inode); + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + + snprintf(qb_local->fmt, QB_XATTR_VAL_MAX, "%s:%lu", qb_inode->fmt, + qb_inode->size); + + qb_coroutine (frame, qb_format_and_resume); + + return 0; +} + + +int +qb_setxattr_snapshot_create (call_frame_t *frame, xlator_t *this, + call_stub_t *stub, dict_t *xattr, inode_t *inode) +{ + qb_local_t *qb_local = NULL; + char *name = NULL; + data_t *data = NULL; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) { + QB_STUB_RESUME (stub); + return 0; + } + + name = alloca (data->len + 1); + memcpy (name, data->data, data->len); + name[data->len] = 0; + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + strncpy (qb_local->name, name, 128); + + qb_coroutine (frame, qb_snapshot_create); + + return 0; +} + + +int +qb_setxattr_snapshot_delete (call_frame_t *frame, xlator_t *this, + call_stub_t *stub, dict_t *xattr, inode_t *inode) +{ + qb_local_t *qb_local = NULL; + char *name = NULL; + data_t *data = NULL; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) { + QB_STUB_RESUME (stub); + return 0; + } + + name = alloca (data->len + 1); + memcpy (name, data->data, data->len); + name[data->len] = 0; + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + strncpy (qb_local->name, name, 128); + + qb_coroutine (frame, qb_snapshot_delete); + + return 0; +} + +int +qb_setxattr_snapshot_goto (call_frame_t *frame, xlator_t *this, + call_stub_t *stub, dict_t *xattr, inode_t *inode) +{ + qb_local_t *qb_local = NULL; + char *name = NULL; + data_t *data = NULL; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) { + QB_STUB_RESUME (stub); + return 0; + } + + name = alloca (data->len + 1); + memcpy (name, data->data, data->len); + name[data->len] = 0; + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + strncpy (qb_local->name, name, 128); + + qb_coroutine (frame, qb_snapshot_goto); + + return 0; +} + + +int +qb_setxattr_common (call_frame_t *frame, xlator_t *this, call_stub_t *stub, + dict_t *xattr, inode_t *inode) +{ + data_t *data = NULL; + + if ((data = dict_get (xattr, "trusted.glusterfs.block-format"))) { + qb_setxattr_format (frame, this, stub, xattr, inode); + return 0; + } + + if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) { + qb_setxattr_snapshot_create (frame, this, stub, xattr, inode); + return 0; + } + + if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) { + qb_setxattr_snapshot_delete (frame, this, stub, xattr, inode); + return 0; + } + + if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) { + qb_setxattr_snapshot_goto (frame, this, stub, xattr, inode); + return 0; + } + + QB_STUB_RESUME (stub); + + return 0; +} + + +int +qb_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, + int flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (qb_local_init (frame) != 0) + goto enomem; + + stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, xattr, + flags, xdata); + if (!stub) + goto enomem; + + qb_setxattr_common (frame, this, stub, xattr, loc->inode); + + return 0; +enomem: + QB_STACK_UNWIND (setxattr, frame, -1, ENOMEM, 0); + return 0; +} + + +int +qb_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (qb_local_init (frame) != 0) + goto enomem; + + stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr, + flags, xdata); + if (!stub) + goto enomem; + + qb_setxattr_common (frame, this, stub, xattr, fd->inode); + + return 0; +enomem: + QB_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, 0); + return 0; +} + + +int +qb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + qb_local_t *qb_local = NULL; + + qb_local = frame->local; + + if (op_ret < 0) + goto unwind; + + if (!qb_inode_ctx_get (this, qb_local->inode)) + goto unwind; + + stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno, fd, xdata); + if (!stub) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + qb_local->stub = stub; + + qb_coroutine (frame, qb_co_open); + + return 0; +unwind: + QB_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + + +int +qb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, loc->inode); + if (!qb_inode) { + STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, + xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (loc->inode); + qb_local->fd = fd_ref (fd); + + STACK_WIND (frame, qb_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +enomem: + QB_STACK_UNWIND (open, frame, -1, ENOMEM, 0, 0); + return 0; +} + + +int +qb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, + offset, flags, iobref, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_writev_stub (frame, NULL, fd, vector, count, + offset, flags, iobref, xdata); + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_writev); + + return 0; +enomem: + QB_STACK_UNWIND (writev, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, + flags, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_readv_stub (frame, NULL, fd, size, offset, + flags, xdata); + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_readv); + + return 0; +enomem: + QB_STACK_UNWIND (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + return 0; +} + + +int +qb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int dsync, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, dsync, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_fsync_stub (frame, NULL, fd, dsync, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_fsync); + + return 0; +enomem: + QB_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_flush_stub (frame, NULL, fd, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_fsync); + + return 0; +enomem: + QB_STACK_UNWIND (flush, frame, -1, ENOMEM, 0); + return 0; +} + +static int32_t +qb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + qb_conf_t *conf = this->private; + gf_dirent_t *entry; + char *format; + + list_for_each_entry(entry, &entries->list, list) { + if (!entry->inode || !entry->dict) + continue; + + format = NULL; + if (dict_get_str(entry->dict, conf->qb_xattr_key, &format)) + continue; + + if (!format) { + qb_inode_cleanup(this, entry->inode, 1); + continue; + } + + if (qb_format_extract(this, format, entry->inode)) + continue; + + qb_iatt_fixup(this, entry->inode, &entry->d_stat); + } + + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} + +static int32_t +qb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + qb_conf_t *conf = this->private; + + xdata = xdata ? dict_ref(xdata) : dict_new(); + if (!xdata) + goto enomem; + + if (dict_set_int32 (xdata, conf->qb_xattr_key, 0)) + goto enomem; + + STACK_WIND(frame, qb_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + dict_unref(xdata); + return 0; + +enomem: + QB_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); + if (xdata) + dict_unref(xdata); + return 0; +} + +int +qb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, loc->inode); + if (!qb_inode) { + STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, + xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (loc->inode); + qb_local->fd = fd_anonymous (loc->inode); + + qb_local->stub = fop_truncate_stub (frame, NULL, loc, offset, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_truncate); + + return 0; +enomem: + QB_STACK_UNWIND (truncate, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, + xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_ftruncate_stub (frame, NULL, fd, offset, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_truncate); + + return 0; +enomem: + QB_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, iatt); + inode_unref (inode); + } + + QB_STACK_UNWIND (stat, frame, op_ret, op_errno, iatt, xdata); + + return 0; +} + +int +qb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, loc->inode)) + frame->local = inode_ref (loc->inode); + + STACK_WIND (frame, qb_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + + +int +qb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, iatt); + inode_unref (inode); + } + + QB_STACK_UNWIND (fstat, frame, op_ret, op_errno, iatt, xdata); + + return 0; +} + + +int +qb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, fd->inode)) + frame->local = inode_ref (fd->inode); + + STACK_WIND (frame, qb_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; +} + + +int +qb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *pre, struct iatt *post, + dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, pre); + qb_iatt_fixup (this, inode, post); + inode_unref (inode); + } + + QB_STACK_UNWIND (setattr, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + + +int +qb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, + int valid, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, loc->inode)) + frame->local = inode_ref (loc->inode); + + STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, buf, valid, xdata); + return 0; +} + + +int +qb_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *pre, struct iatt *post, + dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, pre); + qb_iatt_fixup (this, inode, post); + inode_unref (inode); + } + + QB_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + + +int +qb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf, + int valid, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, fd->inode)) + frame->local = inode_ref (fd->inode); + + STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid, xdata); + return 0; +} + + +int +qb_forget (xlator_t *this, inode_t *inode) +{ + return qb_inode_cleanup (this, inode, 0); +} + + +int +qb_release (xlator_t *this, fd_t *fd) +{ + call_frame_t *frame = NULL; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, + "Could not allocate frame. " + "Leaking QEMU BlockDriverState"); + return -1; + } + + if (qb_local_init (frame) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not allocate local. " + "Leaking QEMU BlockDriverState"); + STACK_DESTROY (frame->root); + return -1; + } + + if (qb_coroutine (frame, qb_co_close) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not allocate coroutine. " + "Leaking QEMU BlockDriverState"); + qb_local_free (this, frame->local); + frame->local = NULL; + STACK_DESTROY (frame->root); + } + + return 0; +} + +int +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_qb_mt_end + 1); + + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init " + "failed"); + return ret; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + return 0; +} + + +int +init (xlator_t *this) +{ + qb_conf_t *conf = NULL; + int32_t ret = -1; + static int bdrv_inited = 0; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: qemu-block (%s) not configured with exactly " + "one child", this->name); + goto out; + } + + conf = GF_CALLOC (1, sizeof (*conf), gf_qb_mt_qb_conf_t); + if (!conf) + goto out; + + /* configure 'option window-size <size>' */ + GF_OPTION_INIT ("default-password", conf->default_password, str, out); + + /* qemu coroutines use "co_mutex" for synchronizing among themselves. + However "co_mutex" itself is not threadsafe if the coroutine framework + is multithreaded (which usually is not). However synctasks are + fundamentally multithreaded, so for now create a syncenv which has + scaling limits set to max 1 thread so that the qemu coroutines can + execute "safely". + + Future work: provide an implementation of "co_mutex" which is + threadsafe and use the global multithreaded ctx->env syncenv. + */ + conf->env = syncenv_new (0, 1, 1); + + this->private = conf; + + ret = 0; + + snprintf (conf->qb_xattr_key, QB_XATTR_KEY_MAX, QB_XATTR_KEY_FMT, + this->name); + + cur_mon = (void *) 1; + + if (!bdrv_inited) { + bdrv_init (); + bdrv_inited = 1; + } + +out: + if (ret) + GF_FREE (conf); + + return ret; +} + + +void +fini (xlator_t *this) +{ + qb_conf_t *conf = NULL; + + conf = this->private; + + this->private = NULL; + + if (conf->root_inode) + inode_unref(conf->root_inode); + GF_FREE (conf); + + return; +} + + +struct xlator_fops fops = { + .lookup = qb_lookup, + .fsetxattr = qb_fsetxattr, + .setxattr = qb_setxattr, + .open = qb_open, + .writev = qb_writev, + .readv = qb_readv, + .fsync = qb_fsync, + .truncate = qb_truncate, + .ftruncate = qb_ftruncate, + .stat = qb_stat, + .fstat = qb_fstat, + .setattr = qb_setattr, + .fsetattr = qb_fsetattr, + .flush = qb_flush, +/* + .getxattr = qb_getxattr, + .fgetxattr = qb_fgetxattr +*/ + .readdirp = qb_readdirp, +}; + + +struct xlator_cbks cbks = { + .forget = qb_forget, + .release = qb_release, +}; + + +struct xlator_dumpops dumpops = { +}; + + +struct volume_options options[] = { + { .key = {"default-password"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "", + .description = "Default password for the AES encrypted block images." + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/qemu-block/src/qemu-block.h b/xlators/features/qemu-block/src/qemu-block.h new file mode 100644 index 000000000..c95f2799a --- /dev/null +++ b/xlators/features/qemu-block/src/qemu-block.h @@ -0,0 +1,109 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QEMU_BLOCK_H +#define __QEMU_BLOCK_H + +#include "syncop.h" +#include "call-stub.h" +#include "block/block_int.h" +#include "monitor/monitor.h" + +/* QB_XATTR_KEY_FMT is the on-disk xattr stored in the inode which + indicates that the file must be "interpreted" by the block format + logic. The value of the key is of the pattern: + + "format:virtual_size" + + e.g + + "qcow2:20GB" or "qed:100GB" + + The format and virtual size are colon separated. The format is + a case sensitive string which qemu recognizes. virtual_size is + specified as a size which glusterfs recognizes as size (i.e., + value accepted by gf_string2bytesize()) +*/ +#define QB_XATTR_KEY_FMT "trusted.glusterfs.%s.format" + +#define QB_XATTR_KEY_MAX 64 + +#define QB_XATTR_VAL_MAX 64 + + +typedef struct qb_inode { + char fmt[QB_XATTR_VAL_MAX]; /* this is only the format, not "format:size" */ + size_t size; /* virtual size in bytes */ + BlockDriverState *bs; + int refcnt; + uuid_t backing_gfid; + char *backing_fname; +} qb_inode_t; + + +typedef struct qb_conf { + Monitor *mon; + struct syncenv *env; + char qb_xattr_key[QB_XATTR_KEY_MAX]; + char *default_password; + inode_t *root_inode; +} qb_conf_t; + + +typedef struct qb_local { + call_frame_t *frame; /* backpointer */ + call_stub_t *stub; + inode_t *inode; + fd_t *fd; + char fmt[QB_XATTR_VAL_MAX+1]; + char name[256]; + synctask_fn_t synctask_fn; + struct list_head list; +} qb_local_t; + +void qb_local_free (xlator_t *this, qb_local_t *local); +int qb_coroutine (call_frame_t *frame, synctask_fn_t fn); +inode_t *qb_inode_from_filename (const char *filename); +int qb_inode_to_filename (inode_t *inode, char *filename, int size); +int qb_format_extract (xlator_t *this, char *format, inode_t *inode); + +qb_inode_t *qb_inode_ctx_get (xlator_t *this, inode_t *inode); + +#define QB_STACK_UNWIND(typ, frame, args ...) do { \ + qb_local_t *__local = frame->local; \ + xlator_t *__this = frame->this; \ + \ + frame->local = NULL; \ + STACK_UNWIND_STRICT (typ, frame, args); \ + if (__local) \ + qb_local_free (__this, __local); \ + } while (0) + +#define QB_STUB_UNWIND(stub, op_ret, op_errno) do { \ + qb_local_t *__local = stub->frame->local; \ + xlator_t *__this = stub->frame->this; \ + \ + stub->frame->local = NULL; \ + call_unwind_error (stub, op_ret, op_errno); \ + if (__local) \ + qb_local_free (__this, __local); \ + } while (0) + +#define QB_STUB_RESUME(stub_errno) do { \ + qb_local_t *__local = stub->frame->local; \ + xlator_t *__this = stub->frame->this; \ + \ + stub->frame->local = NULL; \ + call_resume (stub); \ + if (__local) \ + qb_local_free (__this, __local); \ + } while (0) + +#endif /* !__QEMU_BLOCK_H */ diff --git a/xlators/features/quiesce/Makefile.am b/xlators/features/quiesce/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/features/quiesce/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am new file mode 100644 index 000000000..15e46629e --- /dev/null +++ b/xlators/features/quiesce/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = quiesce.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +quiesce_la_LDFLAGS = -module -avoid-version + +quiesce_la_SOURCES = quiesce.c +quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = quiesce.h quiesce-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h new file mode 100644 index 000000000..6e582f424 --- /dev/null +++ b/xlators/features/quiesce/src/quiesce-mem-types.h @@ -0,0 +1,20 @@ +/* + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QUIESCE_MEM_TYPES_H__ +#define __QUIESCE_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_quiesce_mem_types_ { + gf_quiesce_mt_priv_t = gf_common_mt_end + 1, + gf_quiesce_mt_end +}; +#endif diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c new file mode 100644 index 000000000..24c7dc6ed --- /dev/null +++ b/xlators/features/quiesce/src/quiesce.c @@ -0,0 +1,2610 @@ +/* + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "quiesce.h" +#include "defaults.h" +#include "call-stub.h" + +/* TODO: */ +/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */ + + +/* Quiesce Specific Functions */ +void +gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local) +{ + if (!local || !this || !this->private) + return; + + if (local->loc.inode) + loc_wipe (&local->loc); + if (local->fd) + fd_unref (local->fd); + GF_FREE (local->name); + GF_FREE (local->volname); + if (local->dict) + dict_unref (local->dict); + if (local->iobref) + iobref_unref (local->iobref); + GF_FREE (local->vector); + + mem_put (local); +} + +call_stub_t * +gf_quiesce_dequeue (xlator_t *this) +{ + call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + + priv = this->private; + + if (!priv || list_empty (&priv->req)) + return NULL; + + LOCK (&priv->lock); + { + stub = list_entry (priv->req.next, call_stub_t, list); + list_del_init (&stub->list); + priv->queue_size--; + } + UNLOCK (&priv->lock); + + return stub; +} + +void * +gf_quiesce_dequeue_start (void *data) +{ + xlator_t *this = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + this = data; + priv = this->private; + THIS = this; + + while (!list_empty (&priv->req)) { + stub = gf_quiesce_dequeue (this); + if (stub) { + call_resume (stub); + } + } + + return 0; +} + + +void +gf_quiesce_timeout (void *data) +{ + xlator_t *this = NULL; + quiesce_priv_t *priv = NULL; + + this = data; + priv = this->private; + THIS = this; + + LOCK (&priv->lock); + { + priv->pass_through = _gf_true; + } + UNLOCK (&priv->lock); + + gf_quiesce_dequeue_start (this); + + return; +} + +void +gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub) +{ + quiesce_priv_t *priv = NULL; + struct timespec timeout = {0,}; + + priv = this->private; + if (!priv) { + gf_log_callingfn (this->name, GF_LOG_ERROR, + "this->private == NULL"); + return; + } + + LOCK (&priv->lock); + { + list_add_tail (&stub->list, &priv->req); + priv->queue_size++; + } + UNLOCK (&priv->lock); + + if (!priv->timer) { + timeout.tv_sec = 20; + timeout.tv_nsec = 0; + + priv->timer = gf_timer_call_after (this->ctx, + timeout, + gf_quiesce_timeout, + (void *) this); + } + + return; +} + + + +/* _CBK function section */ + +int32_t +quiesce_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_lookup_stub (frame, default_lookup_resume, + &local->loc, local->dict); + if (!stub) { + STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + dict, postparent); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_stat_stub (frame, default_stat_resume, + &local->loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_access_stub (frame, default_access_resume, + &local->loc, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readlink_stub (frame, default_readlink_resume, + &local->loc, local->size, xdata); + if (!stub) { + STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, + NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_open_stub (frame, default_open_resume, + &local->loc, local->flag, local->fd, + xdata); + if (!stub) { + STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readv_stub (frame, default_readv_resume, + local->fd, local->size, local->offset, + local->io_flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, + NULL, 0, NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, + stbuf, iobref, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_flush_stub (frame, default_flush_resume, + local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + + +int32_t +quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fsync_stub (frame, default_fsync_resume, + local->fd, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, + NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fstat_stub (frame, default_fstat_resume, + local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_opendir_stub (frame, default_opendir_resume, + &local->loc, local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, + local->fd, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_statfs_stub (frame, default_statfs_resume, + &local->loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, + local->fd, local->name, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + +int32_t +quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_getxattr_stub (frame, default_getxattr_resume, + &local->loc, local->name, xdata); + if (!stub) { + STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + +int32_t +quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uint32_t weak_checksum, + uint8_t *strong_checksum, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_rchecksum_stub (frame, default_rchecksum_resume, + local->fd, local->offset, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, + 0, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum, + strong_checksum, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + +int32_t +quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readdir_stub (frame, default_readdir_resume, + local->fd, local->size, local->offset, xdata); + if (!stub) { + STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + +int32_t +quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readdirp_stub (frame, default_readdirp_resume, + local->fd, local->size, local->offset, + local->dict); + if (!stub) { + STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + +#if 0 + +int32_t +quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_writev_stub (frame, default_writev_resume, + local->fd, local->vector, local->flag, + local->offset, local->io_flags, + local->iobref, xdata); + if (!stub) { + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, + NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_xattrop_stub (frame, default_xattrop_resume, + &local->loc, local->xattrop_flags, + local->dict, xdata); + if (!stub) { + STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fxattrop_stub (frame, default_fxattrop_resume, + local->fd, local->xattrop_flags, + local->dict, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_lk_stub (frame, default_lk_resume, + local->fd, local->flag, &local->flock, xdata); + if (!stub) { + STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, + NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_inodelk_stub (frame, default_inodelk_resume, + local->volname, &local->loc, + local->flag, &local->flock, xdata); + if (!stub) { + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + + +int32_t +quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_finodelk_stub (frame, default_finodelk_resume, + local->volname, local->fd, + local->flag, &local->flock, xdata); + if (!stub) { + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_entrylk_stub (frame, default_entrylk_resume, + local->volname, &local->loc, + local->name, local->cmd, local->type, xdata); + if (!stub) { + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fentrylk_stub (frame, default_fentrylk_resume, + local->volname, local->fd, + local->name, local->cmd, local->type, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_setattr_stub (frame, default_setattr_resume, + &local->loc, &local->stbuf, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, + NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, + statpost, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +int32_t +quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + local = frame->local; + frame->local = NULL; + + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fsetattr_stub (frame, default_fsetattr_resume, + local->fd, &local->stbuf, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, + NULL, NULL, NULL); + goto out; + } + + gf_quiesce_enqueue (this, stub); + goto out; + } + + STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre, + statpost, xdata); +out: + gf_quiesce_local_wipe (this, local); + + return 0; +} + +#endif /* if 0 */ + + +/* FOP */ + +/* No retransmittion */ + +int32_t +quiesce_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, + name, xdata); + return 0; + } + + stub = fop_removexattr_stub (frame, default_removexattr_resume, + loc, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset, xdata); + return 0; + } + + stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata); + if (!stub) { + STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fsetxattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + dict_t *dict, + int32_t flags, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_fsetxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, + fd, + dict, + flags, xdata); + return 0; + } + + stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, + fd, dict, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, + dict, + flags, xdata); + return 0; + } + + stub = fop_setxattr_stub (frame, default_setxattr_resume, + loc, dict, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + /* Don't send O_APPEND below, as write() re-transmittions can + fail with O_APPEND */ + STACK_WIND (frame, default_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, (flags & ~O_APPEND), mode, umask, fd, xdata); + return 0; + } + + stub = fop_create_stub (frame, default_create_resume, + loc, (flags & ~O_APPEND), mode, umask, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_link_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; + } + + stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata); + if (!stub) { + STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_rename_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldloc, newloc, xdata); + return 0; + } + + stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata); + if (!stub) { + STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int +quiesce_symlink (call_frame_t *frame, xlator_t *this, + const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, default_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, loc, umask, xdata); + return 0; + } + + stub = fop_symlink_stub (frame, default_symlink_resume, + linkpath, loc, umask, xdata); + if (!stub) { + STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int +quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, default_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + loc, flags, xdata); + return 0; + } + + stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, int xflag, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc, xflag, xdata); + return 0; + } + + stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata); + if (!stub) { + STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int +quiesce_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, default_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + loc, mode, umask, xdata); + return 0; + } + + stub = fop_mkdir_stub (frame, default_mkdir_resume, + loc, mode, umask, xdata); + if (!stub) { + STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int +quiesce_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, default_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev, umask, xdata); + return 0; + } + + stub = fop_mknod_stub (frame, default_mknod_resume, + loc, mode, rdev, umask, xdata); + if (!stub) { + STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND (frame, + default_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset, xdata); + return 0; + } + + stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata); + if (!stub) { + STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +/* Re-transmittion */ + +int32_t +quiesce_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + local->size = size; + frame->local = local; + + STACK_WIND (frame, + quiesce_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, + size, xdata); + return 0; + } + + stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata); + if (!stub) { + STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + local->flag = mask; + frame->local = local; + + STACK_WIND (frame, + quiesce_access_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, + loc, + mask, xdata); + return 0; + } + + stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata); + if (!stub) { + STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fgetxattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + const char *name, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + if (name) + local->name = gf_strdup (name); + + frame->local = local; + + STACK_WIND (frame, + quiesce_fgetxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, + fd, + name, xdata); + return 0; + } + + stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + frame->local = local; + + STACK_WIND (frame, + quiesce_statfs_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, + loc, xdata); + return 0; + } + + stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + local->flag = flags; + frame->local = local; + + STACK_WIND (frame, + quiesce_fsyncdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, + fd, + flags, xdata); + return 0; + } + + stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + local->fd = fd_ref (fd); + frame->local = local; + + STACK_WIND (frame, + quiesce_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, fd, xdata); + return 0; + } + + stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + frame->local = local; + + STACK_WIND (frame, + quiesce_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, + fd, xdata); + return 0; + } + + stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + local->flag = flags; + frame->local = local; + + STACK_WIND (frame, + quiesce_fsync_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, + fd, + flags, xdata); + return 0; + } + + stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + frame->local = local; + + STACK_WIND (frame, + quiesce_flush_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd, xdata); + return 0; + } + + stub = fop_flush_stub (frame, default_flush_resume, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, + vector, + count, + off, flags, + iobref, xdata); + return 0; + } + + stub = fop_writev_stub (frame, default_writev_resume, + fd, vector, count, off, flags, iobref, xdata); + if (!stub) { + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + local->size = size; + local->offset = offset; + local->io_flag = flags; + frame->local = local; + + STACK_WIND (frame, + quiesce_readv_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, + size, + offset, flags, xdata); + return 0; + } + + stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset, + flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, + NULL, 0, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, fd_t *fd, + dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + local->fd = fd_ref (fd); + + /* Don't send O_APPEND below, as write() re-transmittions can + fail with O_APPEND */ + local->flag = (flags & ~O_APPEND); + frame->local = local; + + STACK_WIND (frame, + quiesce_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, (flags & ~O_APPEND), fd, xdata); + return 0; + } + + stub = fop_open_stub (frame, default_open_resume, loc, + (flags & ~O_APPEND), fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + if (name) + local->name = gf_strdup (name); + + frame->local = local; + + STACK_WIND (frame, + quiesce_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, + name, xdata); + return 0; + } + + stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_xattrop_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, + loc, + flags, + dict, xdata); + return 0; + } + + stub = fop_xattrop_stub (frame, default_xattrop_resume, + loc, flags, dict, xdata); + if (!stub) { + STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_fxattrop_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, + fd, + flags, + dict, xdata); + return 0; + } + + stub = fop_fxattrop_stub (frame, default_fxattrop_resume, + fd, flags, dict, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_lk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, + fd, + cmd, + lock, xdata); + return 0; + } + + stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata); + if (!stub) { + STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_inodelk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + volume, loc, cmd, lock, xdata); + return 0; + } + + stub = fop_inodelk_stub (frame, default_inodelk_resume, + volume, loc, cmd, lock, xdata); + if (!stub) { + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_finodelk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->finodelk, + volume, fd, cmd, lock, xdata); + return 0; + } + + stub = fop_finodelk_stub (frame, default_finodelk_resume, + volume, fd, cmd, lock, xdata); + if (!stub) { + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, default_entrylk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, + volume, loc, basename, cmd, type, xdata); + return 0; + } + + stub = fop_entrylk_stub (frame, default_entrylk_resume, + volume, loc, basename, cmd, type, xdata); + if (!stub) { + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, default_fentrylk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fentrylk, + volume, fd, basename, cmd, type, xdata); + return 0; + } + + stub = fop_fentrylk_stub (frame, default_fentrylk_resume, + volume, fd, basename, cmd, type, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_rchecksum (call_frame_t *frame, + xlator_t *this, + fd_t *fd, off_t offset, + int32_t len, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + local->offset = offset; + local->flag = len; + frame->local = local; + + STACK_WIND (frame, + quiesce_rchecksum_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rchecksum, + fd, offset, len, xdata); + return 0; + } + + stub = fop_rchecksum_stub (frame, default_rchecksum_resume, + fd, offset, len, xdata); + if (!stub) { + STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t off, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + local->size = size; + local->offset = off; + frame->local = local; + + STACK_WIND (frame, + quiesce_readdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, + fd, size, off, xdata); + return 0; + } + + stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata); + if (!stub) { + STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_readdirp (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t off, dict_t *dict) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + local->fd = fd_ref (fd); + local->size = size; + local->offset = off; + local->dict = dict_ref (dict); + frame->local = local; + + STACK_WIND (frame, + quiesce_readdirp_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, + fd, size, off, dict); + return 0; + } + + stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size, + off, dict); + if (!stub) { + STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_setattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_setattr_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setattr, + loc, stbuf, valid, xdata); + return 0; + } + + stub = fop_setattr_stub (frame, default_setattr_resume, + loc, stbuf, valid, xdata); + if (!stub) { + STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + + +int32_t +quiesce_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + frame->local = local; + + STACK_WIND (frame, + quiesce_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc, xdata); + return 0; + } + + stub = fop_stat_stub (frame, default_stat_resume, loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + local = mem_get0 (priv->local_pool); + loc_dup (loc, &local->loc); + local->dict = dict_ref (xattr_req); + frame->local = local; + + STACK_WIND (frame, + quiesce_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xattr_req); + return 0; + } + + stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req); + if (!stub) { + STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +quiesce_fsetattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND (frame, + default_fsetattr_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsetattr, + fd, stbuf, valid, xdata); + return 0; + } + + stub = fop_fsetattr_stub (frame, default_fsetattr_resume, + fd, stbuf, valid, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue (this, stub); + + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_quiesce_mt_end + 1); + + return ret; +} + +int +init (xlator_t *this) +{ + int ret = -1; + quiesce_priv_t *priv = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "'quiesce' not configured with exactly one child"); + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC (1, sizeof (*priv), gf_quiesce_mt_priv_t); + if (!priv) + goto out; + + priv->local_pool = mem_pool_new (quiesce_local_t, + GF_FOPS_EXPECTED_IN_PARALLEL); + + LOCK_INIT (&priv->lock); + priv->pass_through = _gf_false; + + INIT_LIST_HEAD (&priv->req); + + this->private = priv; + ret = 0; +out: + return ret; +} + +void +fini (xlator_t *this) +{ + quiesce_priv_t *priv = NULL; + + priv = this->private; + if (!priv) + goto out; + this->private = NULL; + + mem_pool_destroy (priv->local_pool); + LOCK_DESTROY (&priv->lock); + GF_FREE (priv); +out: + return; +} + +int +notify (xlator_t *this, int event, void *data, ...) +{ + int ret = 0; + quiesce_priv_t *priv = NULL; + struct timespec timeout = {0,}; + + priv = this->private; + if (!priv) + goto out; + + switch (event) { + case GF_EVENT_CHILD_UP: + { + ret = pthread_create (&priv->thr, NULL, gf_quiesce_dequeue_start, + this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create the quiesce-dequeue thread"); + } + + LOCK (&priv->lock); + { + priv->pass_through = _gf_true; + } + UNLOCK (&priv->lock); + break; + } + case GF_EVENT_CHILD_DOWN: + LOCK (&priv->lock); + { + priv->pass_through = _gf_false; + } + UNLOCK (&priv->lock); + + if (priv->timer) + break; + timeout.tv_sec = 20; + timeout.tv_nsec = 0; + + priv->timer = gf_timer_call_after (this->ctx, + timeout, + gf_quiesce_timeout, + (void *) this); + + if (priv->timer == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Cannot create timer"); + } + + break; + default: + break; + } + + ret = default_notify (this, event, data); +out: + return ret; +} + + +struct xlator_fops fops = { + /* write/modifying fops */ + .mknod = quiesce_mknod, + .create = quiesce_create, + .truncate = quiesce_truncate, + .ftruncate = quiesce_ftruncate, + .setxattr = quiesce_setxattr, + .removexattr = quiesce_removexattr, + .symlink = quiesce_symlink, + .unlink = quiesce_unlink, + .link = quiesce_link, + .mkdir = quiesce_mkdir, + .rmdir = quiesce_rmdir, + .rename = quiesce_rename, + + /* The below calls are known to change state, hence + re-transmittion is not advised */ + .lk = quiesce_lk, + .inodelk = quiesce_inodelk, + .finodelk = quiesce_finodelk, + .entrylk = quiesce_entrylk, + .fentrylk = quiesce_fentrylk, + .xattrop = quiesce_xattrop, + .fxattrop = quiesce_fxattrop, + .setattr = quiesce_setattr, + .fsetattr = quiesce_fsetattr, + + /* Special case, re-transmittion is not harmful * + * as offset is properly sent from above layers */ + /* TODO: not re-transmitted as of now */ + .writev = quiesce_writev, + + /* re-transmittable fops */ + .lookup = quiesce_lookup, + .stat = quiesce_stat, + .fstat = quiesce_fstat, + .access = quiesce_access, + .readlink = quiesce_readlink, + .getxattr = quiesce_getxattr, + .open = quiesce_open, + .readv = quiesce_readv, + .flush = quiesce_flush, + .fsync = quiesce_fsync, + .statfs = quiesce_statfs, + .opendir = quiesce_opendir, + .readdir = quiesce_readdir, + .readdirp = quiesce_readdirp, + .fsyncdir = quiesce_fsyncdir, + +}; + +struct xlator_dumpops dumpops; + + +struct xlator_cbks cbks; + + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h new file mode 100644 index 000000000..878ed77e9 --- /dev/null +++ b/xlators/features/quiesce/src/quiesce.h @@ -0,0 +1,51 @@ +/* + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QUIESCE_H__ +#define __QUIESCE_H__ + +#include "quiesce-mem-types.h" +#include "xlator.h" +#include "timer.h" + +#define GF_FOPS_EXPECTED_IN_PARALLEL 512 + +typedef struct { + gf_timer_t *timer; + gf_boolean_t pass_through; + gf_lock_t lock; + struct list_head req; + int queue_size; + pthread_t thr; + struct mem_pool *local_pool; +} quiesce_priv_t; + +typedef struct { + fd_t *fd; + char *name; + char *volname; + loc_t loc; + off_t size; + off_t offset; + mode_t mode; + int32_t flag; + struct iatt stbuf; + struct iovec *vector; + struct iobref *iobref; + dict_t *dict; + struct gf_flock flock; + entrylk_cmd cmd; + entrylk_type type; + gf_xattrop_flags_t xattrop_flags; + int32_t wbflags; + uint32_t io_flag; +} quiesce_local_t; + +#endif diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am index 886d83964..9546f4276 100644 --- a/xlators/features/quota/src/Makefile.am +++ b/xlators/features/quota/src/Makefile.am @@ -1,13 +1,17 @@ xlator_LTLIBRARIES = quota.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -quota_la_LDFLAGS = -module -avoidversion +quota_la_LDFLAGS = -module -avoid-version quota_la_SOURCES = quota.c -quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +noinst_HEADERS = quota-mem-types.h quota.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/xlators/cluster/dht/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h new file mode 100644 index 000000000..3082865da --- /dev/null +++ b/xlators/features/quota/src/quota-mem-types.h @@ -0,0 +1,27 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __QUOTA_MEM_TYPES_H__ +#define __QUOTA_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_quota_mem_types_ { + gf_quota_mt_quota_priv_t = gf_common_mt_end + 1, + gf_quota_mt_quota_inode_ctx_t, + gf_quota_mt_loc_t, + gf_quota_mt_char, + gf_quota_mt_int64_t, + gf_quota_mt_int32_t, + gf_quota_mt_limits_t, + gf_quota_mt_quota_dentry_t, + gf_quota_mt_end +}; +#endif + diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 822c29bd3..c527e7ca7 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -1,1132 +1,3503 @@ /* - Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ +#include <fnmatch.h> -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif +#include "quota.h" +#include "common-utils.h" +#include "defaults.h" -#include <sys/time.h> +int32_t +quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, + char *name, uuid_t par); +struct volume_options options[]; -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" +int +quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +{ + int ret = -1; -#ifndef MAX_IOVEC -#define MAX_IOVEC 16 -#endif - -struct quota_local { - struct iatt stbuf; - inode_t *inode; - char *path; - fd_t *fd; - off_t offset; - int32_t count; - struct iovec vector[MAX_IOVEC]; - struct iobref *iobref; - loc_t loc; -}; + if (!loc) { + return ret; + } + if (inode) { + loc->inode = inode_ref (inode); + } -struct quota_priv { - char only_first_time; /* Used to make sure a call is done only one time */ - gf_lock_t lock; /* Used while updating variables */ + if (parent) { + loc->parent = inode_ref (parent); + } - uint64_t disk_usage_limit; /* Used for Disk usage quota */ - uint64_t current_disk_usage; /* Keep the current usage value */ + loc->path = gf_strdup (path); + if (!loc->path) { + goto loc_wipe; + } - uint32_t min_free_disk_limit; /* user specified limit, in %*/ - uint32_t current_free_disk; /* current free disk space available, in % */ - uint32_t refresh_interval; /* interval in seconds */ - uint32_t min_disk_last_updated_time; /* used for interval calculation */ + loc->name = strrchr (loc->path, '/'); + if (loc->name) { + loc->name++; + } else { + goto loc_wipe; + } - loc_t root_loc; /* Store '/' loc_t to make xattr calls */ -}; + ret = 0; + +loc_wipe: + if (ret < 0) { + loc_wipe (loc); + } + + return ret; +} int -quota_statvfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *stbuf) -{ - struct quota_priv *priv = this->private; - - if (op_ret >= 0) { - priv->current_free_disk = - (stbuf->f_bavail * 100) / stbuf->f_blocks; - } +quota_inode_loc_fill (inode_t *inode, loc_t *loc) +{ + char *resolvedpath = NULL; + inode_t *parent = NULL; + int ret = -1; + xlator_t *this = NULL; + + if ((!inode) || (!loc)) { + return ret; + } + + this = THIS; + + if ((inode) && __is_root_gfid (inode->gfid)) { + loc->parent = NULL; + goto ignore_parent; + } + + parent = inode_parent (inode, 0, NULL); + if (!parent) { + gf_log (this->name, GF_LOG_DEBUG, + "cannot find parent for inode (gfid:%s)", + uuid_utoa (inode->gfid)); + goto err; + } + +ignore_parent: + ret = inode_path (inode, NULL, &resolvedpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "cannot construct path for inode (gfid:%s)", + uuid_utoa (inode->gfid)); + goto err; + } - STACK_DESTROY (frame->root); - return 0; + ret = quota_loc_fill (loc, inode, parent, resolvedpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "cannot fill loc"); + goto err; + } + +err: + if (parent) { + inode_unref (parent); + } + + GF_FREE (resolvedpath); + + return ret; } -void -gf_quota_usage_subtract (xlator_t *this, size_t size) +int32_t +quota_local_cleanup (xlator_t *this, quota_local_t *local) { - struct quota_priv *priv = NULL; + if (local == NULL) { + goto out; + } - priv = this->private; + loc_wipe (&local->loc); + loc_wipe (&local->newloc); + loc_wipe (&local->oldloc); + loc_wipe (&local->validate_loc); - LOCK (&priv->lock); - { - if (priv->current_disk_usage < size) - priv->current_disk_usage = 0; - else - priv->current_disk_usage -= size; - } - UNLOCK (&priv->lock); + inode_unref (local->inode); + LOCK_DESTROY (&local->lock); + + mem_put (local); +out: + return 0; +} + + +static inline quota_local_t * +quota_local_new () +{ + quota_local_t *local = NULL; + local = mem_get0 (THIS->local_pool); + if (local) + LOCK_INIT (&local->lock); + return local; +} + + +quota_dentry_t * +__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par) +{ + quota_dentry_t *dentry = NULL; + GF_UNUSED int32_t ret = 0; + + QUOTA_ALLOC_OR_GOTO (dentry, quota_dentry_t, err); + + INIT_LIST_HEAD (&dentry->next); + + dentry->name = gf_strdup (name); + if (dentry->name == NULL) { + GF_FREE (dentry); + goto err; + } + + uuid_copy (dentry->par, par); + + list_add_tail (&dentry->next, &ctx->parents); +err: + return dentry; } void -gf_quota_usage_add (xlator_t *this, size_t size) +__quota_dentry_free (quota_dentry_t *dentry) { - struct quota_priv *priv = this->private; + if (dentry == NULL) { + goto out; + } - LOCK (&priv->lock); - { - priv->current_disk_usage += size; - } - UNLOCK (&priv->lock); + list_del_init (&dentry->next); + + GF_FREE (dentry->name); + GF_FREE (dentry); +out: + return; } -void -gf_quota_update_current_free_disk (xlator_t *this) +int32_t +quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - call_frame_t *frame = NULL; - call_pool_t *pool = NULL; + quota_local_t *local = NULL; + uint32_t validate_count = 0, link_count = 0; + int32_t ret = 0; + quota_inode_ctx_t *ctx = NULL; + int64_t *size = 0; + uint64_t value = 0; + call_stub_t *stub = NULL; + + local = frame->local; + + if (op_ret < 0) { + goto unwind; + } - struct quota_priv *priv = NULL; + GF_ASSERT (local); + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno, + EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, dict, unwind, op_errno, + EINVAL); + + ret = inode_ctx_get (local->validate_loc.inode, this, &value); + + ctx = (quota_inode_ctx_t *)(unsigned long)value; + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "quota context is not present in inode (gfid:%s)", + uuid_utoa (local->validate_loc.inode->gfid)); + op_errno = EINVAL; + goto unwind; + } - pool = this->ctx->pool; - frame = create_frame (this, pool); - - priv = this->private; + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "size key not present in dict"); + op_errno = EINVAL; + goto unwind; + } + + local->just_validated = 1; /* so that we don't go into infinite + * loop of validation and checking + * limit when timeout is zero. + */ + LOCK (&ctx->lock); + { + ctx->size = ntoh64 (*size); + gettimeofday (&ctx->tv, NULL); + } + UNLOCK (&ctx->lock); + + quota_check_limit (frame, local->validate_loc.inode, this, NULL, NULL); + return 0; - STACK_WIND (frame, quota_statvfs_cbk, - this->children->xlator, - this->children->xlator->fops->statfs, &(priv->root_loc)); +unwind: + LOCK (&local->lock); + { + local->op_ret = -1; + local->op_errno = op_errno; + + validate_count = --local->validate_count; + link_count = local->link_count; - return ; + if ((validate_count == 0) && (link_count == 0)) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + return 0; } -int -gf_quota_check_free_disk (xlator_t *this) -{ - struct quota_priv * priv = NULL; - struct timeval tv = {0, 0}; - - priv = this->private; - if (priv->min_free_disk_limit) { - gettimeofday (&tv, NULL); - if (tv.tv_sec > (priv->refresh_interval + - priv->min_disk_last_updated_time)) { - priv->min_disk_last_updated_time = tv.tv_sec; - gf_quota_update_current_free_disk (this); - } - if (priv->current_free_disk <= priv->min_free_disk_limit) - return -1; - } +static inline uint64_t +quota_time_elapsed (struct timeval *now, struct timeval *then) +{ + return (now->tv_sec - then->tv_sec); +} - return 0; + +int32_t +quota_timeout (struct timeval *tv, int32_t timeout) +{ + struct timeval now = {0,}; + int32_t timed_out = 0; + + gettimeofday (&now, NULL); + + if (quota_time_elapsed (&now, tv) >= timeout) { + timed_out = 1; + } + + return timed_out; } -int -quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +int32_t +quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, + char *name, uuid_t par) { - struct quota_priv *priv = this->private; - struct quota_local *local = NULL; + int32_t ret = -1; + inode_t *_inode = NULL, *parent = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + char need_validate = 0, need_unwind = 0; + int64_t delta = 0; + call_stub_t *stub = NULL; + int32_t validate_count = 0, link_count = 0; + uint64_t value = 0; + char just_validated = 0; + uuid_t trav_uuid = {0,}; - local = frame->local; + GF_VALIDATE_OR_GOTO ("quota", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); - if ((op_ret >= 0) && priv->disk_usage_limit) { - gf_quota_usage_subtract (this, (local->stbuf.ia_blocks - - postbuf->ia_blocks) * 512); - loc_wipe (&local->loc); - } + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + + delta = local->delta; + + GF_VALIDATE_OR_GOTO (this->name, local->stub, out); + + priv = this->private; + + inode_ctx_get (inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + + _inode = inode_ref (inode); + + LOCK (&local->lock); + { + just_validated = local->just_validated; + local->just_validated = 0; + + if (just_validated) { + local->validate_count--; + } + } + UNLOCK (&local->lock); + + if ( par != NULL ) { + uuid_copy (trav_uuid, par); + } + + do { + if (ctx != NULL) { + LOCK (&ctx->lock); + { + if (ctx->limit >= 0) { + if (!just_validated + && quota_timeout (&ctx->tv, + priv->timeout)) { + need_validate = 1; + } else if ((ctx->size + delta) + >= ctx->limit) { + local->op_ret = -1; + local->op_errno = EDQUOT; + need_unwind = 1; + } + } + } + UNLOCK (&ctx->lock); + + if (need_validate) { + goto validate; + } + + if (need_unwind) { + break; + } + } + + if (__is_root_gfid (_inode->gfid)) { + break; + } + + parent = inode_parent (_inode, trav_uuid, name); + + if (name != NULL) { + name = NULL; + uuid_clear (trav_uuid); + } + + if (parent == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "cannot find parent for inode (gfid:%s), hence " + "aborting enforcing quota-limits and continuing" + " with the fop", uuid_utoa (_inode->gfid)); + } + + inode_unref (_inode); + _inode = parent; + just_validated = 0; + + if (_inode == NULL) { + break; + } + + value = 0; + inode_ctx_get (_inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + } while (1); + + ret = 0; + + if (_inode != NULL) { + inode_unref (_inode); + } + + LOCK (&local->lock); + { + validate_count = local->validate_count; + link_count = local->link_count; + if ((validate_count == 0) && (link_count == 0)) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + +out: + return ret; - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - prebuf, postbuf); - return 0; +validate: + LOCK (&local->lock); + { + loc_wipe (&local->validate_loc); + + if (just_validated) { + local->validate_count--; + } + + local->validate_count++; + ret = quota_inode_loc_fill (_inode, &local->validate_loc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "cannot fill loc for inode (gfid:%s), hence " + "aborting quota-checks and continuing with fop", + uuid_utoa (_inode->gfid)); + local->validate_count--; + } + } + UNLOCK (&local->lock); + + if (ret < 0) { + goto loc_fill_failed; + } + + STACK_WIND (frame, quota_validate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, &local->validate_loc, + QUOTA_SIZE_KEY, NULL); + +loc_fill_failed: + inode_unref (_inode); + return 0; } -int -quota_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +int32_t +quota_get_limit_value (inode_t *inode, xlator_t *this, int64_t *n) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; + int32_t ret = 0; + char *path = NULL; + limits_t *limit_node = NULL; + quota_priv_t *priv = NULL; + + if (inode == NULL || n == NULL) { + ret = -1; + goto out; + } - priv = this->private; - local = frame->local; + *n = 0; - if (op_ret >= 0) { - local->stbuf = *buf; - } + ret = inode_path (inode, NULL, &path); + if (ret < 0) { + ret = -1; + goto out; + } + + priv = this->private; - STACK_WIND (frame, quota_truncate_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, - &local->loc, local->offset); - return 0; + list_for_each_entry (limit_node, &priv->limit_head, limit_list) { + if (strcmp (limit_node->path, path) == 0) { + *n = limit_node->value; + break; + } + } + +out: + GF_FREE (path); + + return ret; } -int -quota_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset) +static int32_t +__quota_init_inode_ctx (inode_t *inode, int64_t limit, xlator_t *this, + dict_t *dict, struct iatt *buf, + quota_inode_ctx_t **context) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; + int32_t ret = -1; + int64_t *size = 0; + quota_inode_ctx_t *ctx = NULL; - priv = this->private; + if (inode == NULL) { + goto out; + } - if (priv->disk_usage_limit) { - local = CALLOC (1, sizeof (struct quota_local)); - frame->local = local; + QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out); - loc_copy (&local->loc, loc); - local->offset = offset; + ctx->limit = limit; + if (buf) + ctx->buf = *buf; - STACK_WIND (frame, quota_truncate_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - return 0; - } + LOCK_INIT(&ctx->lock); - STACK_WIND (frame, quota_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - loc, offset); - return 0; + if (context != NULL) { + *context = ctx; + } + + INIT_LIST_HEAD (&ctx->parents); + + if (dict != NULL) { + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); + if (ret == 0) { + ctx->size = ntoh64 (*size); + gettimeofday (&ctx->tv, NULL); + } + } + + ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "cannot set quota context in inode (gfid:%s)", + uuid_utoa (inode->gfid)); + } +out: + return ret; } -int -quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +static int32_t +quota_inode_ctx_get (inode_t *inode, int64_t limit, xlator_t *this, + dict_t *dict, struct iatt *buf, quota_inode_ctx_t **ctx, + char create_if_absent) { - struct quota_priv *priv = NULL; - struct quota_local *local = NULL; + int32_t ret = 0; + uint64_t ctx_int; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx_int); + + if ((ret == 0) && (ctx != NULL)) { + *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int; + } else if (create_if_absent) { + ret = __quota_init_inode_ctx (inode, limit, this, dict, + buf, ctx); + } + } + UNLOCK (&inode->lock); + + return ret; +} - local = frame->local; - priv = this->private; - if ((op_ret >= 0) && priv->disk_usage_limit) { - gf_quota_usage_subtract (this, (local->stbuf.ia_blocks - - postbuf->ia_blocks) * 512); - fd_unref (local->fd); - } +int32_t +quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + int32_t ret = -1; + char found = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + int64_t *size = 0; + uint64_t value = 0; + limits_t *limit_node = NULL; + quota_priv_t *priv = NULL; + + local = frame->local; + + priv = this->private; + + inode_ctx_get (inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + + if ((op_ret < 0) || (local == NULL) + || (((ctx == NULL) || (ctx->limit == local->limit)) + && (local->limit < 0) && !((IA_ISREG (buf->ia_type)) + || (IA_ISLNK (buf->ia_type))))) { + goto unwind; + } + + LOCK (&priv->lock); + { + list_for_each_entry (limit_node, &priv->limit_head, + limit_list) { + if (strcmp (local->loc.path, limit_node->path) == 0) { + uuid_copy (limit_node->gfid, buf->ia_gfid); + break; + } + } + } + UNLOCK (&priv->lock); + + ret = quota_inode_ctx_get (local->loc.inode, local->limit, this, dict, + buf, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, "cannot create quota " + "context in inode(gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + LOCK (&ctx->lock); + { + + if (dict != NULL) { + ret = dict_get_bin (dict, QUOTA_SIZE_KEY, + (void **) &size); + if (ret == 0) { + ctx->size = ntoh64 (*size); + gettimeofday (&ctx->tv, NULL); + } + } + + if (local->limit != ctx->limit) { + ctx->limit = local->limit; + } + + ctx->buf = *buf; - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, - prebuf, postbuf); - return 0; + if (!(IA_ISREG (buf->ia_type) || IA_ISLNK (buf->ia_type))) { + goto unlock; + } + + if (local->loc.name == NULL) + goto unlock; + + list_for_each_entry (dentry, &ctx->parents, next) { + if ((strcmp (dentry->name, local->loc.name) == 0) && + (uuid_compare (local->loc.parent->gfid, + dentry->par) == 0)) { + found = 1; + break; + } + } + + if (!found) { + dentry = __quota_dentry_new (ctx, + (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + /* + gf_log (this->name, GF_LOG_WARNING, + "cannot create a new dentry (par:%" + PRId64", name:%s) for inode(ino:%" + PRId64", gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + */ + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } + } +unlock: + UNLOCK (&ctx->lock); + +unwind: + QUOTA_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, + dict, postparent); + return 0; } -int -quota_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +int32_t +quota_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; + int32_t ret = -1; + int64_t limit = -1; + limits_t *limit_node = NULL; + gf_boolean_t dict_newed = _gf_false; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + + priv = this->private; + + list_for_each_entry (limit_node, &priv->limit_head, limit_list) { + if (strcmp (limit_node->path, loc->path) == 0) { + limit = limit_node->value; + } + } - priv = this->private; - local = frame->local; + local = quota_local_new (); + if (local == NULL) { + goto err; + } - if (op_ret >= 0) { - local->stbuf = *buf; - } + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + goto err; + } + + frame->local = local; + + local->limit = limit; + + if (limit < 0) { + goto wind; + } + + if (xattr_req == NULL) { + xattr_req = dict_new (); + dict_newed = _gf_true; + } + + ret = dict_set_uint64 (xattr_req, QUOTA_SIZE_KEY, 0); + if (ret < 0) { + goto err; + } + +wind: + STACK_WIND (frame, quota_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + + ret = 0; - STACK_WIND (frame, quota_ftruncate_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, - local->fd, local->offset); - return 0; +err: + if (ret < 0) { + QUOTA_STACK_UNWIND (lookup, frame, -1, ENOMEM, + NULL, NULL, NULL, NULL); + } + + if (dict_newed == _gf_true) { + dict_unref (xattr_req); + } + + return 0; } -int -quota_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) +void +quota_update_size (xlator_t *this, inode_t *inode, char *name, uuid_t par, + int64_t delta) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; + inode_t *_inode = NULL; + inode_t *parent = NULL; + uint64_t value = 0; + quota_inode_ctx_t *ctx = NULL; + uuid_t trav_uuid = {0,}; + GF_VALIDATE_OR_GOTO ("quota", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); - priv = this->private; + inode_ctx_get (inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; - if (priv->disk_usage_limit) { - local = CALLOC (1, sizeof (struct quota_local)); - frame->local = local; + _inode = inode_ref (inode); - local->fd = fd_ref (fd); - local->offset = offset; + if ( par != NULL ) { + uuid_copy (trav_uuid, par); + } - STACK_WIND (frame, quota_ftruncate_fstat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd); - return 0; - } + do { + if ((ctx != NULL) && (ctx->limit >= 0)) { + LOCK (&ctx->lock); + { + ctx->size += delta; + } + UNLOCK (&ctx->lock); + } + + if (__is_root_gfid (_inode->gfid)) { + break; + } + + parent = inode_parent (_inode, trav_uuid, name); + if (parent == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "cannot find parent for inode (gfid:%s), hence " + "aborting size updation of parents", + uuid_utoa (_inode->gfid)); + } + + if (name != NULL) { + name = NULL; + uuid_clear (trav_uuid); + } - STACK_WIND (frame, quota_ftruncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, - fd, offset); - return 0; + inode_unref (_inode); + _inode = parent; + + if (_inode == NULL) { + break; + } + + inode_ctx_get (_inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + } while (1); + +out: + return; } -int -quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +int32_t +quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - struct quota_priv *priv = NULL; + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_dentry_t *dentry = NULL; + int64_t delta = 0; - priv = this->private; + local = frame->local; - if ((op_ret >= 0) && priv->disk_usage_limit) { - gf_quota_usage_add (this, buf->ia_blocks * 512); - } + if ((op_ret < 0) || (local == NULL)) { + goto out; + } + + ret = inode_ctx_get (local->loc.inode, this, &ctx_int); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get the context", local->loc.path); + goto out; + } + + ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int; + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in %s (gfid:%s)", + local->loc.path, uuid_utoa (local->loc.inode->gfid)); + goto out; + } - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; + LOCK (&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK (&ctx->lock); + + list_for_each_entry (dentry, &ctx->parents, next) { + delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512; + quota_update_size (this, local->loc.inode, + dentry->name, dentry->par, delta); + } + +out: + QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; } -int -quota_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev) +int32_t +quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { - struct quota_priv *priv = NULL; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - priv = this->private; + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } - if (gf_quota_check_free_disk (this) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "min-free-disk limit (%u) crossed, current available is %u", - priv->min_free_disk_limit, priv->current_free_disk); - STACK_UNWIND_STRICT (mknod, frame, -1, ENOSPC, NULL, NULL, - NULL, NULL); - return 0; - } + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } + + STACK_WIND (frame, quota_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, + flags, iobref, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} - if (priv->current_disk_usage > priv->disk_usage_limit) { - gf_log (this->name, GF_LOG_ERROR, - "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", - priv->disk_usage_limit, priv->current_disk_usage); - STACK_UNWIND_STRICT (mknod, frame, -1, ENOSPC, NULL, NULL, - NULL, NULL); - return 0; + +int32_t +quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + int32_t ret = -1, op_errno = EINVAL; + int32_t parents = 0; + uint64_t size = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quota_dentry_t *dentry = NULL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO ("quota", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + local->loc.inode = inode_ref (fd->inode); + + ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (fd->inode->gfid)); + goto unwind; + } + + stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count, + off, flags, iobref, xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto unwind; + } + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, unwind); + + size = iov_length (vector, count); + LOCK (&ctx->lock); + { + list_for_each_entry (dentry, &ctx->parents, next) { + parents++; + } + } + UNLOCK (&ctx->lock); + + local->delta = size; + local->stub = stub; + local->link_count = parents; + + list_for_each_entry (dentry, &ctx->parents, next) { + ret = quota_check_limit (frame, fd->inode, this, dentry->name, + dentry->par); + if (ret == -1) { + break; + } + } + + stub = NULL; + + LOCK (&local->lock); + { + local->link_count = 0; + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); } - STACK_WIND (frame, quota_mknod_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev); - return 0; + return 0; + +unwind: + QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } -int +int32_t quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, + int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - struct quota_priv *priv = NULL; + QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + return 0; +} - priv = this->private; - if ((op_ret >= 0) && priv->disk_usage_limit) { - gf_quota_usage_subtract (this, buf->ia_blocks * 512); - } +int32_t +quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, mode_t umask, dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } + + op_errno = local->op_errno; - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; + if (local->op_ret == -1) { + goto unwind; + } + + STACK_WIND (frame, quota_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL); + return 0; } -int -quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) +int32_t +quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - struct quota_priv *priv = NULL; + int32_t ret = 0, op_errno = 0; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + local = quota_local_new (); + if (local == NULL) { + op_errno = ENOMEM; + goto err; + } - priv = this->private; + frame->local = local; - if (gf_quota_check_free_disk (this) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "min-free-disk limit (%u) crossed, current available is %u", - priv->min_free_disk_limit, priv->current_free_disk); - STACK_UNWIND_STRICT (mkdir, frame, -1, ENOSPC, NULL, NULL, - NULL, NULL); - return 0; - - } + local->link_count = 1; + + ret = loc_copy (&local->loc, loc); + if (ret) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask, + xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto err; + } + + local->stub = stub; + local->delta = 0; + + quota_check_limit (frame, loc->parent, this, NULL, NULL); + + stub = NULL; + + LOCK (&local->lock); + { + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + + local->link_count = 0; + } + UNLOCK (&local->lock); - if (priv->current_disk_usage > priv->disk_usage_limit) { - gf_log (this->name, GF_LOG_ERROR, - "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", - priv->disk_usage_limit, priv->current_disk_usage); - STACK_UNWIND_STRICT (mkdir, frame, -1, ENOSPC, NULL, NULL, - NULL, NULL); - return 0; + if (stub != NULL) { + call_resume (stub); } - STACK_WIND (frame, quota_mkdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - loc, mode); + return 0; +err: + QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); - return 0; + return 0; } -int +int32_t +quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + + local = frame->local; + if (op_ret < 0) { + goto unwind; + } + + ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, "cannot create quota " + "context in inode(gfid:%s)", + uuid_utoa (inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + LOCK (&ctx->lock); + { + ctx->buf = *buf; + + dentry = __quota_dentry_new (ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "cannot create a new dentry (name:%s) for " + "inode(gfid:%s)", local->loc.name, + uuid_utoa (local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } +unlock: + UNLOCK (&ctx->lock); + +unwind: + QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int32_t +quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } + + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } + + STACK_WIND (frame, quota_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, + fd, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + return 0; +} + + +int32_t +quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + int32_t ret = -1; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->loc, loc); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode, + umask, fd, xdata); + if (stub == NULL) { + goto err; + } + + local->link_count = 1; + local->stub = stub; + local->delta = 0; + + quota_check_limit (frame, loc->parent, this, NULL, NULL); + + stub = NULL; + + LOCK (&local->lock); + { + local->link_count = 0; + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + return 0; +err: + QUOTA_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL, NULL); + + return 0; +} + + +int32_t quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - struct quota_local *local = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + uint64_t value = 0; - local = frame->local; + if (op_ret < 0) { + goto out; + } - if (local) { - if (op_ret >= 0) { - gf_quota_usage_subtract (this, - local->stbuf.ia_blocks * 512); - } - loc_wipe (&local->loc); - } + local = (quota_local_t *) frame->local; - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, postparent); - return 0; + inode_ctx_get (local->loc.inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + quota_update_size (this, local->loc.inode, (char *)local->loc.name, + local->loc.parent->gfid, + (-(ctx->buf.ia_blocks * 512))); + +out: + QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; } -int -quota_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +int32_t +quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - struct quota_local *local = NULL; + int32_t ret = 0; + quota_local_t *local = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } - local = frame->local; + frame->local = local; - if (op_ret >= 0) { - if (buf->ia_nlink == 1) { - local->stbuf = *buf; - } - } + ret = loc_copy (&local->loc, loc); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - STACK_WIND (frame, quota_unlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - &local->loc); + ret = 0; - return 0; +err: + if (ret == -1) { + QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL); + } + + return 0; } -int -quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +int32_t +quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + char found = 0; + + if (op_ret < 0) { + goto out; + } - priv = this->private; + local = (quota_local_t *) frame->local; - if (priv->disk_usage_limit) { - local = CALLOC (1, sizeof (struct quota_local)); - frame->local = local; + quota_update_size (this, local->loc.parent, NULL, NULL, + (buf->ia_blocks * 512)); - loc_copy (&local->loc, loc); + ret = quota_inode_ctx_get (inode, -1, this, NULL, NULL, &ctx, 0); + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, "cannot find quota " + "context in %s (gfid:%s)", local->loc.path, + uuid_utoa (inode->gfid)); + op_ret = -1; + op_errno = EINVAL; + goto out; + } - STACK_WIND (frame, - quota_unlink_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, - loc); - return 0; - } + LOCK (&ctx->lock); + { + list_for_each_entry (dentry, &ctx->parents, next) { + if ((strcmp (dentry->name, local->loc.name) == 0) && + (uuid_compare (local->loc.parent->gfid, + dentry->par) == 0)) { + found = 1; + gf_log (this->name, GF_LOG_WARNING, + "new entry being linked (name:%s) for " + "inode (gfid:%s) is already present " + "in inode-dentry-list", dentry->name, + uuid_utoa (local->loc.inode->gfid)); + break; + } + } + + if (!found) { + dentry = __quota_dentry_new (ctx, + (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "cannot create a new dentry (name:%s) " + "for inode(gfid:%s)", local->loc.name, + uuid_utoa (local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } - STACK_WIND (frame, quota_unlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - loc); - return 0; + ctx->buf = *buf; + } +unlock: + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + + return 0; } -int -quota_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) +int32_t +quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - struct quota_local *local = NULL; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - local = frame->local; + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } - if (local) { - if (op_ret >= 0) { - gf_quota_usage_subtract (this, local->stbuf.ia_blocks * 512); - } - loc_wipe (&local->loc); - } + op_errno = local->op_errno; + + if (local->op_ret == -1) { + goto unwind; + } + + STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, postparent); - return 0; +unwind: + QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL); + return 0; } -int -quota_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +int32_t +quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - struct quota_local *local = NULL; + int32_t ret = -1, op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + quota_inode_ctx_t *ctx = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } - local = frame->local; + frame->local = (void *) local; - if (op_ret >= 0) { - local->stbuf = *buf; - } + ret = loc_copy (&local->loc, newloc); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } - STACK_WIND (frame, quota_rmdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, - &local->loc); + stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata); + if (stub == NULL) { + goto err; + } + + local->link_count = 1; + local->stub = stub; + + ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL, &ctx, + 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + oldloc->inode ? uuid_utoa (oldloc->inode->gfid) : "0"); + op_errno = EINVAL; + goto err; + } + + local->delta = ctx->buf.ia_blocks * 512; - return 0; + quota_check_limit (frame, newloc->parent, this, NULL, NULL); + + stub = NULL; + + LOCK (&local->lock); + { + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + + local->link_count = 0; + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + ret = 0; +err: + if (ret < 0) { + QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL); + } + + return 0; } -int -quota_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +int32_t +quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *old_dentry = NULL, *dentry = NULL; + char new_dentry_found = 0; + int64_t size = 0; + + if (op_ret < 0) { + goto out; + } - priv = this->private; + local = frame->local; + if (local == NULL) { + op_ret = -1; + op_errno = EINVAL; + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } - if (priv->disk_usage_limit) { - local = CALLOC (1, sizeof (struct quota_local)); - frame->local = local; + if (IA_ISREG (local->oldloc.inode->ia_type) + || IA_ISLNK (local->oldloc.inode->ia_type)) { + size = buf->ia_blocks * 512; + } - loc_copy (&local->loc, loc); + if (local->oldloc.parent != local->newloc.parent) { + quota_update_size (this, local->oldloc.parent, NULL, NULL, (-size)); + quota_update_size (this, local->newloc.parent, NULL, NULL, size); + } - STACK_WIND (frame, quota_rmdir_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - return 0; - } + if (!(IA_ISREG (local->oldloc.inode->ia_type) + || IA_ISLNK (local->oldloc.inode->ia_type))) { + goto out; + } - STACK_WIND (frame, quota_rmdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, - loc); - return 0; + ret = quota_inode_ctx_get (local->oldloc.inode, -1, this, NULL, NULL, + &ctx, 0); + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, "quota context not" + "set in inode(gfid:%s)", + uuid_utoa (local->oldloc.inode->gfid)); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + LOCK (&ctx->lock); + { + /* decision of whether to create a context in newloc->inode + * depends on fuse_rename_cbk's choice of inode it retains + * after rename. currently it just associates oldloc->inode + * with new parent and name. If this changes, following code + * should be changed to set a new context in newloc->inode. + */ + list_for_each_entry (dentry, &ctx->parents, next) { + if ((strcmp (dentry->name, local->oldloc.name) == 0) && + (uuid_compare (local->oldloc.parent->gfid, + dentry->par) == 0)) { + old_dentry = dentry; + } else if ((strcmp (dentry->name, + local->newloc.name) == 0) && + (uuid_compare (local->oldloc.parent->gfid, + dentry->par) == 0)) { + new_dentry_found = 1; + gf_log (this->name, GF_LOG_WARNING, + "new entry being linked (name:%s) for " + "inode (gfid:%s) is already present " + "in inode-dentry-list", dentry->name, + uuid_utoa (local->newloc.inode->gfid)); + break; + } + } + + if (old_dentry != NULL) { + __quota_dentry_free (old_dentry); + } else { + gf_log (this->name, GF_LOG_WARNING, + "dentry corresponding to the path just renamed " + "(name:%s) is not present", local->oldloc.name); + } + + if (!new_dentry_found) { + dentry = __quota_dentry_new (ctx, + (char *)local->newloc.name, + local->newloc.parent->gfid); + if (dentry == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "cannot create a new dentry (name:%s) " + "for inode(gfid:%s)", local->newloc.name, + uuid_utoa (local->newloc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } + + ctx->buf = *buf; + } +unlock: + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); + + return 0; } -int +int32_t +quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } + + op_errno = local->op_errno; + + if (local->op_ret == -1) { + goto unwind; + } + + STACK_WIND (frame, quota_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + return 0; +} + + +int32_t +quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + int32_t ret = -1, op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + quota_inode_ctx_t *ctx = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->oldloc, oldloc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + ret = loc_copy (&local->newloc, newloc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc, + xdata); + if (stub == NULL) { + goto err; + } + + local->link_count = 1; + local->stub = stub; + + if (IA_ISREG (oldloc->inode->ia_type) + || IA_ISLNK (oldloc->inode->ia_type)) { + ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + oldloc->inode ? uuid_utoa (oldloc->inode->gfid) + : "0"); + op_errno = EINVAL; + goto err; + } + local->delta = ctx->buf.ia_blocks * 512; + } else { + local->delta = 0; + } + + quota_check_limit (frame, newloc->parent, this, NULL, NULL); + + stub = NULL; + + LOCK (&local->lock); + { + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + + local->link_count = 0; + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + ret = 0; +err: + if (ret == -1) { + QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, + NULL, NULL, NULL, NULL, NULL); + } + + return 0; +} + + +int32_t quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, + int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - struct quota_priv *priv = NULL; + int64_t size = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + + if (op_ret < 0) { + goto out; + } - priv = this->private; + local = frame->local; + size = buf->ia_blocks * 512; - if ((op_ret >= 0) && priv->disk_usage_limit) { - gf_quota_usage_add (this, buf->ia_blocks * 512); - } + quota_update_size (this, local->loc.parent, NULL, NULL, size); - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 1); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *buf; + + dentry = __quota_dentry_new (ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "cannot create a new dentry (name:%s) for " + "inode(gfid:%s)", local->loc.name, + uuid_utoa (local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + + return 0; } int -quota_symlink (call_frame_t *frame, xlator_t *this, - const char *linkpath, loc_t *loc) +quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - struct quota_priv *priv = NULL; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - priv = this->private; + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } - if (gf_quota_check_free_disk (this) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "min-free-disk limit (%u) crossed, current available is %u", - priv->min_free_disk_limit, priv->current_free_disk); - STACK_UNWIND_STRICT (symlink, frame, -1, ENOSPC, NULL, NULL, - NULL, NULL); - return 0; - - } - if (priv->current_disk_usage > priv->disk_usage_limit) { - gf_log (this->name, GF_LOG_ERROR, - "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", - priv->disk_usage_limit, priv->current_disk_usage); - STACK_UNWIND_STRICT (symlink, frame, -1, ENOSPC, NULL, NULL, - NULL, NULL); - return 0; + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; } - STACK_WIND (frame, quota_symlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, - linkpath, loc); - return 0; + STACK_WIND (frame, quota_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, + xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL); + return 0; } int -quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent) +quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - struct quota_priv *priv = this->private; - int ret = 0; + int32_t ret = -1; + int32_t op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } - if ((op_ret >= 0) && priv->disk_usage_limit) { - gf_quota_usage_add (this, buf->ia_blocks * 512); + frame->local = local; - ret = fd_ctx_set (fd, this, 1); - } + ret = loc_copy (&local->loc, loc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + local->link_count = 1; + + stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc, + umask, xdata); + if (stub == NULL) { + goto err; + } - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); - return 0; + local->stub = stub; + local->delta = strlen (linkpath); + + quota_check_limit (frame, loc->parent, this, NULL, NULL); + + stub = NULL; + + LOCK (&local->lock); + { + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + + local->link_count = 0; + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + return 0; + +err: + QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); + + return 0; } -int -quota_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +int32_t +quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - struct quota_priv *priv = NULL; + quota_local_t *local = NULL; + int64_t delta = 0; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } - priv = this->private; + delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512; - if (gf_quota_check_free_disk (this) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "min-free-disk limit (%u) crossed, current available is %u", - priv->min_free_disk_limit, priv->current_free_disk); - STACK_UNWIND_STRICT (create, frame, -1, ENOSPC, NULL, NULL, NULL, - NULL, NULL); - return 0; - - } - if (priv->current_disk_usage > priv->disk_usage_limit) { - gf_log (this->name, GF_LOG_ERROR, - "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", - priv->disk_usage_limit, priv->current_disk_usage); - STACK_UNWIND_STRICT (create, frame, -1, ENOSPC, NULL, NULL, NULL, - NULL, NULL); - return 0; + quota_update_size (this, local->loc.inode, NULL, NULL, delta); + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *postbuf; } + UNLOCK (&ctx->lock); - STACK_WIND (frame, quota_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - loc, flags, mode, fd); - return 0; +out: + QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; } -int -quota_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) +int32_t +quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - int ret = 0; + int32_t ret = -1; + quota_local_t *local = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->loc, loc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - if (op_ret >= 0) - ret = fd_ctx_set (fd, this, 1); + return 0; +err: + QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); - return 0; + return 0; } -int -quota_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) +int32_t +quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - STACK_WIND (frame, quota_open_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - loc, flags, fd, wbflags); - return 0; + quota_local_t *local = NULL; + int64_t delta = 0; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512; + + quota_update_size (this, local->loc.inode, NULL, NULL, delta); + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; } -int -quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +int32_t +quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - struct quota_priv *priv = NULL; - struct quota_local *local = NULL; + quota_local_t *local = NULL; + local = quota_local_new (); + if (local == NULL) + goto err; - priv = this->private; - local = frame->local; + frame->local = local; - if (priv->disk_usage_limit) { - if (op_ret >= 0) { - gf_quota_usage_add (this, (postbuf->ia_blocks - - prebuf->ia_blocks) * 512); - } - fd_unref (local->fd); - iobref_unref (local->iobref); - } + local->loc.inode = inode_ref (fd->inode); + + STACK_WIND (frame, quota_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); - return 0; + return 0; +err: + QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; } -int -quota_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +int32_t +quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this, + inode_t *inode, const char *name) { - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; - int iovlen = 0; + int32_t ret = 0; + char dir_limit [1024] = {0, }; + dict_t *dict = NULL; + quota_inode_ctx_t *ctx = NULL; + uint64_t value = 0; + + ret = inode_ctx_get (inode, this, &value); + if (ret < 0) + goto out; + + ctx = (quota_inode_ctx_t *)(unsigned long)value; + snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size, ctx->limit); + + dict = dict_new (); + if (dict == NULL) { + ret = -1; + goto out; + } + ret = dict_set_str (dict, (char *) name, dir_limit); + if (ret < 0) + goto out; - local = frame->local; - priv = this->private; + gf_log (this->name, GF_LOG_INFO, "str = %s", dir_limit); - if (op_ret >= 0) { - if (priv->current_disk_usage > priv->disk_usage_limit) { - iovlen = iov_length (local->vector, local->count); + QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL); - if (iovlen > (buf->ia_blksize - (buf->ia_size % buf->ia_blksize))) { - fd_unref (local->fd); - iobref_unref (local->iobref); - STACK_UNWIND_STRICT (writev, frame, -1, ENOSPC, - NULL, NULL); - return 0; - } - } - local->stbuf = *buf; - } - - STACK_WIND (frame, quota_writev_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - local->fd, local->vector, local->count, local->offset, - local->iobref); + ret = 0; - return 0; +out: + return ret; } -int -quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - struct iobref *iobref) -{ - struct quota_local *local = NULL; - struct quota_priv *priv = NULL; - int i = 0; - - priv = this->private; - - if (gf_quota_check_free_disk (this) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "min-free-disk limit (%u) crossed, current available is %u", - priv->min_free_disk_limit, priv->current_free_disk); - STACK_UNWIND_STRICT (writev, frame, -1, ENOSPC, - NULL, NULL); - return 0; - } +int32_t +quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + int32_t ret = 0; - if (priv->disk_usage_limit) { - local = CALLOC (1, sizeof (struct quota_local)); - local->fd = fd_ref (fd); - local->iobref = iobref_ref (iobref); - for (i = 0; i < count; i++) { - local->vector[i].iov_base = vector[i].iov_base; - local->vector[i].iov_len = vector[i].iov_len; + if (name && strcasecmp (name, "trusted.limit.list") == 0) { + ret = quota_send_dir_limit_to_cli (frame, this, fd->inode, + name); + if (ret == 0) { + return 0; } + } + + STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +} - local->count = count; - local->offset = off; - frame->local = local; - STACK_WIND (frame, quota_writev_fstat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd); - return 0; - } +int32_t +quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t ret = 0; + + if ((name != NULL) && strcasecmp (name, "trusted.limit.list") == 0) { + ret = quota_send_dir_limit_to_cli (frame, this, loc->inode, + name); + if (ret == 0) + return 0; + } - STACK_WIND (frame, quota_writev_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - fd, vector, count, off, iobref); - return 0; + STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; } -int -quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int32_t +quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_CRITICAL, - "failed to remove the disk-usage value: %s", - strerror (op_errno)); - } - - STACK_DESTROY (frame->root); - return 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + if (buf) + ctx->buf = *buf; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata); + return 0; } -int -quota_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int32_t +quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - dict_t *dict = NULL; + quota_local_t *local = NULL; + int32_t ret = -1; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_CRITICAL, - "failed to set the disk-usage value: %s", - strerror (op_errno)); - } + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } - if (cookie) { - dict = (dict_t *) cookie; - dict_unref (dict); + frame->local = local; + ret = loc_copy (&local->loc, loc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto unwind; } - STACK_DESTROY (frame->root); - return 0; + STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL); + return 0; } -int -quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *statvfs) +int32_t +quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - struct quota_priv *priv = NULL; - uint64_t f_blocks = 0; - int64_t f_bfree = 0; - uint64_t f_bused = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + if (op_ret < 0) { + goto out; + } - priv = this->private; + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } - if (op_ret != 0) - goto unwind; + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + if (buf) + ctx->buf = *buf; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + + +int32_t +quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + quota_local_t *local = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + + local->loc.inode = inode_ref (fd->inode); + + STACK_WIND (frame, quota_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL); + return 0; +} + + +int32_t +quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) +{ + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *buf; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf, xdata); + return 0; +} + + +int32_t +quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t ret = -1; + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + + ret = loc_copy (&local->loc, loc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto unwind; + } + + STACK_WIND (frame, quota_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, size, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + + +int32_t +quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *buf, struct iobref *iobref, + dict_t *xdata) +{ + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *buf; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, + buf, iobref, xdata); + return 0; +} - f_blocks = priv->disk_usage_limit / statvfs->f_frsize; - f_bused = priv->current_disk_usage / statvfs->f_frsize; - if (f_blocks && (f_blocks < statvfs->f_blocks)) - statvfs->f_blocks = f_blocks; +int32_t +quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + quota_local_t *local = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } - f_bfree = (statvfs->f_blocks - f_bused); + frame->local = local; - if (f_bfree >= 0) - statvfs->f_bfree = statvfs->f_bavail = f_bfree; - else - statvfs->f_bfree = statvfs->f_bavail = 0; + local->loc.inode = inode_ref (fd->inode); + + STACK_WIND (frame, quota_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; unwind: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, statvfs); - return 0; + QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, NULL); + return 0; +} + + +int32_t +quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + + +int32_t +quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + quota_local_t *local = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + local->loc.inode = inode_ref (fd->inode); + + frame->local = local; + + STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + +} + + +int32_t +quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + if (statpost) + ctx->buf = *statpost; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre, + statpost, xdata); + return 0; +} + + +int32_t +quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t ret = -1; + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + + ret = loc_copy (&local->loc, loc); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto unwind; + } + + STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + + +int32_t +quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto out; + } + + quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL, + &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *statpost; + } + UNLOCK (&ctx->lock); + +out: + QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre, + statpost, xdata); + return 0; +} + + +int32_t +quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + quota_local_t *local = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + + local->loc.inode = inode_ref (fd->inode); + + STACK_WIND (frame, quota_fsetattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + + +int32_t +quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + + local = frame->local; + if (op_ret < 0) { + goto unwind; + } + + ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, "cannot create quota " + "context in inode (gfid:%s)", uuid_utoa (inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + LOCK (&ctx->lock); + { + ctx->buf = *buf; + + dentry = __quota_dentry_new (ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "cannot create a new dentry (name:%s) for " + "inode(gfid:%s)", local->loc.name, + uuid_utoa (local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } +unlock: + UNLOCK (&ctx->lock); + +unwind: + QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + return 0; } int -quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { - STACK_WIND (frame, quota_statfs_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->statfs, loc); + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } + + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } - return 0; + STACK_WIND (frame, quota_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + xdata); + + return 0; + +unwind: + QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL); + return 0; } int -quota_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *value) -{ - data_t *data = NULL; - struct quota_priv *priv = this->private; - - if (op_ret >= 0) { - data = dict_get (value, "trusted.glusterfs-quota-du"); - if (data) { - LOCK (&priv->lock); - { - priv->current_disk_usage = data_to_uint64 (data); - } - UNLOCK (&priv->lock); +quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + int32_t ret = -1; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + local = quota_local_new (); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy (&local->loc, loc); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "loc_copy failed"); + goto err; + } + + stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev, + umask, xdata); + if (stub == NULL) { + goto err; + } + + local->link_count = 1; + local->stub = stub; + local->delta = 0; + + quota_check_limit (frame, loc->parent, this, NULL, NULL); + + stub = NULL; - return 0; - } - } + LOCK (&local->lock); + { + local->link_count = 0; + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } - STACK_DESTROY (frame->root); + return 0; +err: + QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } +int +quota_setxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +{ + QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); + return 0; +} -void -gf_quota_get_disk_usage (xlator_t *this) +int +quota_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +{ + int op_errno = EINVAL; + int op_ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict, + op_errno, err); + + STACK_WIND (frame, quota_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; +err: + QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int +quota_fsetxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +{ + QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int +quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) { - call_frame_t *frame = NULL; - call_pool_t *pool = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict, + op_errno, err); + + STACK_WIND (frame, quota_fsetxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, + fd, dict, flags, xdata); + return 0; + err: + QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); + return 0; +} - struct quota_priv *priv = NULL; - pool = this->ctx->pool; - frame = create_frame (this, pool); - priv = this->private; +int +quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + return 0; +} - STACK_WIND (frame, quota_getxattr_cbk, - this->children->xlator, - this->children->xlator->fops->getxattr, - &(priv->root_loc), - "trusted.glusterfs-quota-du"); - return ; +int +quota_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (this, err); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*", + name, op_errno, err); + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (loc, err); + + STACK_WIND (frame, quota_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +err: + QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); + return 0; } -void -gf_quota_cache_sync (xlator_t *this) +int +quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - struct quota_priv *priv = NULL; - call_frame_t *frame = NULL; - dict_t *dict = get_new_dict (); + QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int +quota_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*", + name, op_errno, err); + + STACK_WIND (frame, quota_fremovexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; + err: + QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) +{ + inode_t *root_inode = NULL; + quota_priv_t *priv = NULL; + uint64_t value = 0; + quota_inode_ctx_t *ctx = NULL; + limits_t *limit_node = NULL; + int64_t usage = -1; + int64_t avail = -1; + int64_t blocks = 0; + + root_inode = cookie; + + /* This fop will fail mostly in case of client disconnect's, + * which is already logged. Hence, not logging here */ + if (op_ret == -1) + goto unwind; + /* + * We should never get here unless quota_statfs (below) sent us a + * cookie, and it would only do so if the value was non-NULL. This + * check is therefore just routine defensive coding. + */ + if (!root_inode) { + gf_log(this->name,GF_LOG_WARNING, + "null inode, cannot adjust for quota"); + goto unwind; + } + if (!root_inode->table || (root_inode != root_inode->table->root)) { + gf_log(this->name,GF_LOG_WARNING, + "non-root inode, cannot adjust for quota"); + goto unwind; + } + + inode_ctx_get (root_inode, this, &value); + if (!value) { + goto unwind; + } + ctx = (quota_inode_ctx_t *)(unsigned long)value; + usage = (ctx->size) / buf->f_bsize; + priv = this->private; + + list_for_each_entry (limit_node, &priv->limit_head, limit_list) { + /* Notice that this only works for volume-level quota. */ + if (strcmp (limit_node->path, "/") == 0) { + blocks = limit_node->value / buf->f_bsize; + if (usage > blocks) { + break; + } + + buf->f_blocks = blocks; + avail = buf->f_blocks - usage; + if (buf->f_bfree > avail) { + buf->f_bfree = avail; + } + /* + * We have to assume that the total assigned quota + * won't cause us to dip into the reserved space, + * because dealing with the overcommitted cases is + * just too hairy (especially when different bricks + * might be using different reserved percentages and + * such). + */ + buf->f_bavail = buf->f_bfree; + break; + } + } +unwind: + if (root_inode) { + inode_unref(root_inode); + } + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata); + return 0; +} - priv = this->private; - frame = create_frame (this, this->ctx->pool); - dict_set (dict, "trusted.glusterfs-quota-du", - data_from_uint64 (priv->current_disk_usage)); +int32_t +quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + inode_t *root_inode = NULL; + quota_priv_t *priv = NULL; - dict_ref (dict); + priv = this->private; - STACK_WIND_COOKIE (frame, quota_setxattr_cbk, - (void *) (dict_t *) dict, - this->children->xlator, - this->children->xlator->fops->setxattr, - &(priv->root_loc), dict, 0); + if (priv->consider_statfs && loc->inode) { + root_inode = loc->inode->table->root; + inode_ref(root_inode); + STACK_WIND_COOKIE (frame, quota_statfs_cbk, root_inode, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + } + else { + /* + * We have to make sure that we never get to quota_statfs_cbk + * with a cookie that points to something other than an inode, + * which is exactly what would happen with STACK_UNWIND using + * that as a callback. Therefore, use default_statfs_cbk in + * this case instead. + * + * Also if the option deem-statfs is not set to "on" don't + * bother calculating quota limit on / in statfs_cbk. + */ + if (priv->consider_statfs) + gf_log(this->name,GF_LOG_WARNING, + "missing inode, cannot adjust for quota"); + STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + } + return 0; } int -quota_release (xlator_t *this, fd_t *fd) +quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - gf_quota_cache_sync (this); + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) + goto unwind; - return 0; + list_for_each_entry (entry, &entries->list, list) { + /* TODO: fill things */ + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; } +int +quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + int ret = 0; + + if (dict) { + ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0); + if (ret < 0) { + goto err; + } + } + STACK_WIND (frame, quota_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + return 0; +err: + STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL); + return 0; +} -/* notify */ int32_t -notify (xlator_t *this, - int32_t event, - void *data, - ...) +quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - default_notify (this, event, data); - return 0; + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_dentry_t *dentry = NULL; + int64_t delta = 0; + + local = frame->local; + + if ((op_ret < 0) || (local == NULL)) { + goto out; + } + + ret = inode_ctx_get (local->loc.inode, this, &ctx_int); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get the context", local->loc.path); + goto out; + } + + ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int; + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in %s (gfid:%s)", + local->loc.path, uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK (&ctx->lock); + + list_for_each_entry (dentry, &ctx->parents, next) { + delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512; + quota_update_size (this, local->loc.inode, + dentry->name, dentry->par, delta); + } + +out: + QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; } int32_t -quota_lookup_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - dict_t *dict, - struct iatt *postparent) -{ - STACK_UNWIND (frame, - op_ret, - op_errno, - inode, - buf, - dict, - postparent); - return 0; +quota_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } + + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } + + STACK_WIND (frame, quota_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int32_t -quota_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) -{ - struct quota_priv *priv = NULL; - - priv = this->private; - - if (priv->only_first_time) { - if (strcmp (loc->path, "/") == 0) { - loc_copy(&(priv->root_loc), loc); - priv->only_first_time = 0; - if (priv->disk_usage_limit) - gf_quota_get_disk_usage (this); - } - } +quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret = -1, op_errno = EINVAL; + int32_t parents = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quota_dentry_t *dentry = NULL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO ("quota", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + local->loc.inode = inode_ref (fd->inode); + + ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (fd->inode->gfid)); + goto unwind; + } + + stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, offset, len, + xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto unwind; + } + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, unwind); + + LOCK (&ctx->lock); + { + list_for_each_entry (dentry, &ctx->parents, next) { + parents++; + } + } + UNLOCK (&ctx->lock); + + /* + * Note that by using len as the delta we're assuming the range from + * offset to offset+len has not already been allocated. This can result + * in ENOSPC errors attempting to allocate an already allocated range. + */ + local->delta = len; + local->stub = stub; + local->link_count = parents; + + list_for_each_entry (dentry, &ctx->parents, next) { + ret = quota_check_limit (frame, fd->inode, this, dentry->name, + dentry->par); + if (ret == -1) { + break; + } + } + + stub = NULL; + + LOCK (&local->lock); + { + local->link_count = 0; + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + return 0; + +unwind: + QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, "Memory accounting" + "init failed"); + return ret; + } + + return ret; +} + + +int32_t +quota_forget (xlator_t *this, inode_t *inode) +{ + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL, *tmp; + + ret = inode_ctx_del (inode, this, &ctx_int); + + if (ret < 0) { + return 0; + } + + ctx = (quota_inode_ctx_t *) (long)ctx_int; + + LOCK (&ctx->lock); + { + list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) { + __quota_dentry_free (dentry); + } + } + UNLOCK (&ctx->lock); + + LOCK_DESTROY (&ctx->lock); + + GF_FREE (ctx); + + return 0; +} + + +int +quota_parse_limits (quota_priv_t *priv, xlator_t *this, dict_t *xl_options, + struct list_head *old_list) +{ + int32_t ret = -1; + char *str = NULL; + char *str_val = NULL; + char *path = NULL, *saveptr = NULL; + uint64_t value = 0; + limits_t *quota_lim = NULL, *old = NULL; + char *last_colon= NULL; + + ret = dict_get_str (xl_options, "limit-set", &str); + + if (str) { + path = strtok_r (str, ",", &saveptr); + + while (path) { + last_colon = strrchr (path, ':'); + *last_colon = '\0'; + str_val = last_colon + 1; + + ret = gf_string2bytesize (str_val, &value); + if (ret != 0) + goto err; + + QUOTA_ALLOC_OR_GOTO (quota_lim, limits_t, err); + + quota_lim->path = path; + + quota_lim->value = value; + + gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64, + quota_lim->path, quota_lim->value); + + if (old_list != NULL) { + list_for_each_entry (old, old_list, + limit_list) { + if (strcmp (old->path, quota_lim->path) + == 0) { + uuid_copy (quota_lim->gfid, + old->gfid); + break; + } + } + } + + LOCK (&priv->lock); + { + list_add_tail ("a_lim->limit_list, + &priv->limit_head); + } + UNLOCK (&priv->lock); + + path = strtok_r (NULL, ",", &saveptr); + } + } else { + gf_log (this->name, GF_LOG_INFO, + "no \"limit-set\" option provided"); + } + + LOCK (&priv->lock); + { + list_for_each_entry (quota_lim, &priv->limit_head, limit_list) { + gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64, + quota_lim->path, quota_lim->value); + } + } + UNLOCK (&priv->lock); + + ret = 0; +err: + return ret; +} - STACK_WIND (frame, - quota_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - loc, - xattr_req); - return 0; - } -int32_t +int32_t init (xlator_t *this) { - int ret = 0; - data_t *data = NULL; - struct quota_priv *_private = NULL; + int32_t ret = -1; + quota_priv_t *priv = NULL; + + if ((this->children == NULL) + || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: quota (%s) not configured with " + "exactly one child", this->name); + return -1; + } - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "FATAL: quota should have exactly one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + if (this->parents == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile"); + } + + QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err); + + INIT_LIST_HEAD (&priv->limit_head); + + LOCK_INIT (&priv->lock); + + this->private = priv; + + ret = quota_parse_limits (priv, this, this->options, NULL); + + if (ret) { + goto err; + } + + GF_OPTION_INIT ("timeout", priv->timeout, int64, err); + GF_OPTION_INIT ("deem-statfs", priv->consider_statfs, bool, err); + + this->local_pool = mem_pool_new (quota_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto err; + } + + ret = 0; +err: + return ret; +} + + +void +__quota_reconfigure_inode_ctx (xlator_t *this, inode_t *inode, limits_t *limit) +{ + int ret = -1; + quota_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO ("quota", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO (this->name, limit, out); + + ret = quota_inode_ctx_get (inode, limit->value, this, NULL, NULL, &ctx, + 1); + if ((ret == -1) || (ctx == NULL)) { + gf_log (this->name, GF_LOG_WARNING, "cannot create quota " + "context in inode(gfid:%s)", + uuid_utoa (inode->gfid)); + goto out; + } - _private = CALLOC (1, sizeof (struct quota_priv)); - _private->disk_usage_limit = 0; - data = dict_get (this->options, "disk-usage-limit"); - if (data) { - if (gf_string2bytesize (data->data, &_private->disk_usage_limit) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number '%s' for disk-usage limit", data->data); - ret = -1; - goto out; + LOCK (&ctx->lock); + { + ctx->limit = limit->value; + } + UNLOCK (&ctx->lock); + +out: + return; +} + + +void +__quota_reconfigure (xlator_t *this, inode_table_t *itable, limits_t *limit) +{ + inode_t *inode = NULL; + + if ((this == NULL) || (itable == NULL) || (limit == NULL)) { + goto out; + } + + if (!uuid_is_null (limit->gfid)) { + inode = inode_find (itable, limit->gfid); + } else { + inode = inode_resolve (itable, limit->path); + } + + if (inode != NULL) { + __quota_reconfigure_inode_ctx (this, inode, limit); + } + +out: + return; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + int32_t ret = -1; + quota_priv_t *priv = NULL; + limits_t *limit = NULL, *next = NULL, *new = NULL; + struct list_head head = {0, }; + xlator_t *top = NULL; + char found = 0; + + priv = this->private; + + INIT_LIST_HEAD (&head); + + LOCK (&priv->lock); + { + list_splice_init (&priv->limit_head, &head); + } + UNLOCK (&priv->lock); + + ret = quota_parse_limits (priv, this, options, &head); + if (ret == -1) { + gf_log ("quota", GF_LOG_WARNING, + "quota reconfigure failed, " + "new changes will not take effect"); + goto out; + } + + LOCK (&priv->lock); + { + top = ((glusterfs_ctx_t *)this->ctx)->active->top; + GF_ASSERT (top); + + list_for_each_entry (limit, &priv->limit_head, limit_list) { + __quota_reconfigure (this, top->itable, limit); } - LOCK_INIT (&_private->lock); - _private->current_disk_usage = 0; - } - - _private->min_free_disk_limit = 0; - data = dict_get (this->options, "min-free-disk-limit"); - if (data) { - if (gf_string2percent (data->data, &_private->min_free_disk_limit) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid percent '%s' for min-free-disk limit", data->data); - ret = -1; - goto out; + list_for_each_entry_safe (limit, next, &head, limit_list) { + found = 0; + list_for_each_entry (new, &priv->limit_head, + limit_list) { + if (strcmp (new->path, limit->path) == 0) { + found = 1; + break; + } + } + + if (!found) { + limit->value = -1; + __quota_reconfigure (this, top->itable, limit); + } + + list_del_init (&limit->limit_list); + GF_FREE (limit); } - _private->refresh_interval = 20; /* 20seconds is default */ - data = dict_get (this->options, "refresh-interval"); - if (data) { - if (gf_string2time (data->data, - &_private->refresh_interval)!= 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid time '%s' for refresh " - "interval", data->data); - ret = -1; - goto out; - } - } } + UNLOCK (&priv->lock); + + GF_OPTION_RECONF ("timeout", priv->timeout, options, int64, out); + GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool, + out); - _private->only_first_time = 1; - this->private = (void *)_private; - ret = 0; - out: - return ret; + ret = 0; +out: + return ret; } -void + +void fini (xlator_t *this) { - struct quota_priv *_private = this->private; - - if (_private) { - gf_quota_cache_sync (this); - this->private = NULL; - } - - return ; + return; } -struct xlator_fops fops = { - .create = quota_create, - .open = quota_open, - .lookup = quota_lookup, - .truncate = quota_truncate, - .ftruncate = quota_ftruncate, - .writev = quota_writev, - .unlink = quota_unlink, - .rmdir = quota_rmdir, - .mknod = quota_mknod, - .mkdir = quota_mkdir, - .symlink = quota_symlink, - .statfs = quota_statfs, -}; -struct xlator_mops mops = { +struct xlator_fops fops = { + .statfs = quota_statfs, + .lookup = quota_lookup, + .writev = quota_writev, + .create = quota_create, + .mkdir = quota_mkdir, + .truncate = quota_truncate, + .ftruncate = quota_ftruncate, + .unlink = quota_unlink, + .symlink = quota_symlink, + .link = quota_link, + .rename = quota_rename, + .getxattr = quota_getxattr, + .fgetxattr = quota_fgetxattr, + .stat = quota_stat, + .fstat = quota_fstat, + .readlink = quota_readlink, + .readv = quota_readv, + .fsync = quota_fsync, + .setattr = quota_setattr, + .fsetattr = quota_fsetattr, + .mknod = quota_mknod, + .setxattr = quota_setxattr, + .fsetxattr = quota_fsetxattr, + .removexattr = quota_removexattr, + .fremovexattr = quota_fremovexattr, + .readdirp = quota_readdirp, + .fallocate = quota_fallocate, }; struct xlator_cbks cbks = { - .release = quota_release + .forget = quota_forget }; struct volume_options options[] = { - { .key = {"min-free-disk-limit"}, - .type = GF_OPTION_TYPE_PERCENT - }, - { .key = {"refresh-interval"}, - .type = GF_OPTION_TYPE_TIME - }, - { .key = {"disk-usage-limit"}, - .type = GF_OPTION_TYPE_SIZET - }, - { .key = {NULL} }, + {.key = {"limit-set"}}, + {.key = {"timeout"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 60, + .default_value = "0", + .description = "quota caches the directory sizes on client. Timeout " + "indicates the timeout for the cache to be revalidated." + }, + {.key = {"deem-statfs"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "If set to on, it takes quota limits into" + "consideration while estimating fs size. (df command)" + " (Default is off)." + }, + {.key = {NULL}} }; diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h new file mode 100644 index 000000000..84ecbb308 --- /dev/null +++ b/xlators/features/quota/src/quota.h @@ -0,0 +1,151 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "call-stub.h" +#include "defaults.h" +#include "byte-order.h" +#include "common-utils.h" +#include "quota-mem-types.h" + +#define QUOTA_XATTR_PREFIX "trusted." +#define DIRTY "dirty" +#define SIZE "size" +#define CONTRIBUTION "contri" +#define VAL_LENGTH 8 +#define READDIR_BUF 4096 + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK (lock); \ + var ++; \ + UNLOCK (lock); \ + } while (0) + +#define QUOTA_SAFE_DECREMENT(lock, var) \ + do { \ + LOCK (lock); \ + var --; \ + UNLOCK (lock); \ + } while (0) + +#define QUOTA_ALLOC_OR_GOTO(var, type, label) \ + do { \ + var = GF_CALLOC (sizeof (type), 1, \ + gf_quota_mt_##type); \ + if (!var) { \ + gf_log ("", GF_LOG_ERROR, \ + "out of memory :("); \ + ret = -1; \ + goto label; \ + } \ + } while (0); + +#define QUOTA_STACK_UNWIND(fop, frame, params...) \ + do { \ + quota_local_t *_local = NULL; \ + xlator_t *_this = NULL; \ + if (frame) { \ + _local = frame->local; \ + _this = frame->this; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT (fop, frame, params); \ + quota_local_cleanup (_this, _local); \ + } while (0) + +#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \ + do { \ + list_del (&_contribution->contri_list); \ + GF_FREE (_contribution); \ + } while (0) + +#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \ + do { \ + char _gfid_unparsed[40]; \ + uuid_unparse (_gfid, _gfid_unparsed); \ + _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \ + "%s.%s." CONTRIBUTION, \ + _vol_name, _gfid_unparsed); \ + } while (0) + + +#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \ + do { \ + GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \ + if (ret == -1) \ + goto label; \ + } while (0) + +#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \ + do { \ + ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \ + "%s." DIRTY, _vol_name); \ + if (ret == -1) \ + goto label; \ + } while (0) + +struct quota_dentry { + char *name; + uuid_t par; + struct list_head next; +}; +typedef struct quota_dentry quota_dentry_t; + +struct quota_inode_ctx { + int64_t size; + int64_t limit; + struct iatt buf; + struct list_head parents; + struct timeval tv; + gf_lock_t lock; +}; +typedef struct quota_inode_ctx quota_inode_ctx_t; + +struct quota_local { + gf_lock_t lock; + uint32_t validate_count; + uint32_t link_count; + loc_t loc; + loc_t oldloc; + loc_t newloc; + loc_t validate_loc; + int64_t delta; + int32_t op_ret; + int32_t op_errno; + int64_t size; + int64_t limit; + char just_validated; + inode_t *inode; + call_stub_t *stub; +}; +typedef struct quota_local quota_local_t; + +struct quota_priv { + int64_t timeout; + gf_boolean_t consider_statfs; + struct list_head limit_head; + gf_lock_t lock; +}; +typedef struct quota_priv quota_priv_t; + +struct limits { + struct list_head limit_list; + char *path; + int64_t value; + uuid_t gfid; +}; +typedef struct limits limits_t; + +uint64_t cn = 1; diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am index 15f49966f..4c1462137 100644 --- a/xlators/features/read-only/src/Makefile.am +++ b/xlators/features/read-only/src/Makefile.am @@ -1,13 +1,22 @@ -xlator_LTLIBRARIES = read-only.la +xlator_LTLIBRARIES = read-only.la worm.la + xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -read_only_la_LDFLAGS = -module -avoidversion +noinst_HEADERS = read-only-common.h + +read_only_la_LDFLAGS = -module -avoid-version -read_only_la_SOURCES = read-only.c +read_only_la_SOURCES = read-only.c read-only-common.c read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +worm_la_LDFLAGS = -module -avoid-version + +worm_la_SOURCES = read-only-common.c worm.c +worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c new file mode 100644 index 000000000..56a7a7176 --- /dev/null +++ b/xlators/features/read-only/src/read-only-common.c @@ -0,0 +1,239 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" + +int32_t +ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata); + return 0; +} + +int32_t +ro_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata); + return 0; +} + +int32_t +ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ + STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) +{ + STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL, xdata); + return 0; +} + +int32_t +ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + +int32_t +ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + + +int32_t +ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) +{ + STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + +int32_t +ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) +{ + STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + +int +ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata); + return 0; +} + + +int +ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata); + return 0; +} + +int32_t +ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + + +int +ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + + +int +ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL, + NULL, xdata); + return 0; +} + + + +int32_t +ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL, + NULL, xdata); + return 0; +} + + +int32_t +ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata); + return 0; +} + +int32_t +ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL, + NULL, NULL, xdata); + return 0; +} + + +static int32_t +ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + if (((flags & O_ACCMODE) == O_WRONLY) || + ((flags & O_ACCMODE) == O_RDWR)) { + STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata); + return 0; + } + + STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL, xdata); + return 0; +} + + +int32_t +ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata); + return 0; +} + +int32_t +ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata); + return 0; +} diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h new file mode 100644 index 000000000..5d4c7e260 --- /dev/null +++ b/xlators/features/read-only/src/read-only-common.h @@ -0,0 +1,115 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" + +int32_t +ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); + +int32_t +ro_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); + +int32_t +ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata); + +int32_t +ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type + type, dict_t *xdata); + +int32_t +ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int32_t +ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int32_t +ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata); + +int32_t +ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +int32_t +ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + + +int32_t +ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata); + +int32_t +ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata); + +int +ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata); + +int +ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata); + +int32_t +ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata); + + +int +ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata); + +int32_t +ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata); + +int32_t +ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata); + +int32_t +ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int32_t +ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int32_t +ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata); + +int32_t +ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata); + +int32_t +ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata); + +int32_t +ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata); + +int32_t +ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata); diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c index 08412bf04..e49e54a1b 100644 --- a/xlators/features/read-only/src/read-only.c +++ b/xlators/features/read-only/src/read-only.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -24,223 +14,7 @@ #include "xlator.h" #include "defaults.h" - -int32_t -ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict) -{ - STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL); - return 0; -} - -int32_t -ro_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) -{ - STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL); - return 0; -} - -int32_t -ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, const char *basename, entrylk_cmd cmd, - entrylk_type type) -{ - STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS); - return 0; -} - -int32_t -ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type) -{ - STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS); - return 0; -} - -int32_t -ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, int32_t cmd, struct flock *lock) -{ - STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS); - return 0; -} - -int32_t -ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct flock *lock) -{ - STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS); - return 0; -} - -int32_t -ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, - struct flock *flock) -{ - STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL); - return 0; -} - -int32_t -ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) -{ - STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL); - return 0; -} - -int32_t -ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) -{ - STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL); - return 0; -} - - -int32_t -ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) -{ - STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL); - return 0; -} - -int32_t -ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) -{ - STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL); - return 0; -} - -int32_t -ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev) -{ - STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL); - return 0; -} - - -int32_t -ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) -{ - STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL); - return 0; -} - -int32_t -ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL); - return 0; -} - -int32_t -ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL); - return 0; -} - -int32_t -ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc) -{ - STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL, NULL); - return 0; -} - - - -int32_t -ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) -{ - STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - - -int32_t -ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) -{ - STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL); - return 0; -} - -int32_t -ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd) -{ - STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL, - NULL, NULL); - return 0; -} - - -static int32_t -ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) -{ - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); - return 0; -} - -int32_t -ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) -{ - if (((flags & O_ACCMODE) == O_WRONLY) || - ((flags & O_ACCMODE) == O_RDWR)) { - STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL); - return 0; - } - - STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); - return 0; -} - -int32_t -ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags) -{ - STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS); - return 0; -} - -int32_t -ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) -{ - STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS); - return 0; -} - -int32_t -ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, struct iobref *iobref) -{ - STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL); - return 0; -} - - -int32_t -ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags) -{ - STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS); - return 0; -} - -int32_t -ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS); - return 0; -} +#include "read-only-common.h" int32_t init (xlator_t *this) @@ -295,9 +69,6 @@ struct xlator_fops fops = { .lk = ro_lk, }; -struct xlator_mops mops = { -}; - struct xlator_cbks cbks = { }; diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c new file mode 100644 index 000000000..16c3eb3da --- /dev/null +++ b/xlators/features/read-only/src/worm.c @@ -0,0 +1,89 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "defaults.h" +#include "read-only-common.h" + +static int32_t +worm_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + if ((((flags & O_ACCMODE) == O_WRONLY) || + ((flags & O_ACCMODE) == O_RDWR)) && + !(flags & O_APPEND)) { + STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL); + return 0; + } + + STACK_WIND (frame, worm_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +init (xlator_t *this) +{ + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + +struct xlator_fops fops = { + .open = worm_open, + + .unlink = ro_unlink, + .rmdir = ro_rmdir, + .rename = ro_rename, + .truncate = ro_truncate, + .removexattr = ro_removexattr, + .fsyncdir = ro_fsyncdir, + .xattrop = ro_xattrop, + .inodelk = ro_inodelk, + .finodelk = ro_finodelk, + .entrylk = ro_entrylk, + .fentrylk = ro_fentrylk, + .lk = ro_lk, +}; + +struct xlator_cbks cbks; + +struct volume_options options[] = { + { .key = {NULL} }, +}; + diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am index 15998a56e..5251eb082 100644 --- a/xlators/features/trash/src/Makefile.am +++ b/xlators/features/trash/src/Makefile.am @@ -1,15 +1,16 @@ xlator_LTLIBRARIES = trash.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features -trash_la_LDFLAGS = -module -avoidversion +trash_la_LDFLAGS = -module -avoid-version trash_la_SOURCES = trash.c trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = trash.h +noinst_HEADERS = trash.h trash-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h new file mode 100644 index 000000000..0e6ef572f --- /dev/null +++ b/xlators/features/trash/src/trash-mem-types.h @@ -0,0 +1,22 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __TRASH_MEM_TYPES_H__ +#define __TRASH_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_trash_mem_types_ { + gf_trash_mt_trash_private_t = gf_common_mt_end + 1, + gf_trash_mt_char, + gf_trash_mt_trash_elim_pattern_t, + gf_trash_mt_end +}; +#endif + diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index 5ee3da2c8..addeb66a0 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -1,29 +1,19 @@ /* - Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif #include "trash.h" - +#include "trash-mem-types.h" int32_t trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -63,7 +53,7 @@ trash_local_wipe (trash_local_t *local) if (local->newfd) fd_unref (local->newfd); - FREE (local); + mem_put (local); out: return; } @@ -73,7 +63,7 @@ trash_common_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent) { - TRASH_STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent); + TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, postparent); return 0; } @@ -94,9 +84,10 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc_t tmp_loc = {0,}; local = frame->local; - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + gf_log (this->name, GF_LOG_ERROR, "out of memory"); + goto out; } loop_count = local->loop_count; @@ -113,7 +104,8 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } tmp_path = memdup (local->newpath, count); if (!tmp_path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + gf_log (this->name, GF_LOG_ERROR, "out of memory"); + goto out; } tmp_loc.path = tmp_path; @@ -122,7 +114,7 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); goto out; } @@ -156,18 +148,19 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } tmp_path = memdup (local->newpath, count); if (!tmp_path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + gf_log (this->name, GF_LOG_ERROR, "out of memory"); + goto out; } tmp_loc.path = tmp_path; STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); out: - free (cookie); - free (tmp_str); + GF_FREE (cookie); + GF_FREE (tmp_str); return 0; } @@ -185,17 +178,15 @@ trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *prenewparent, struct iatt *postnewparent) { trash_local_t *local = NULL; - trash_private_t *priv = NULL; char *tmp_str = NULL; char *dir_name = NULL; char *tmp_cookie = NULL; loc_t tmp_loc = {0,}; - priv = this->private; local = frame->local; if ((op_ret == -1) && (op_errno == ENOENT)) { - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -203,7 +194,7 @@ trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, tmp_loc.path = dir_name; - tmp_cookie = strdup (dir_name); + tmp_cookie = gf_strdup (dir_name); if (!tmp_cookie) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -211,9 +202,9 @@ trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_cookie, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); - free (tmp_str); + GF_FREE (tmp_str); return 0; } @@ -243,7 +234,7 @@ trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } /* All other cases, unlink should return success */ - TRASH_STACK_UNWIND (frame, 0, op_errno, &local->preparent, + TRASH_STACK_UNWIND (unlink, frame, 0, op_errno, &local->preparent, &local->postparent); return 0; @@ -256,7 +247,7 @@ trash_common_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) { - TRASH_STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf); + TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf); return 0; } @@ -266,7 +257,7 @@ trash_common_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *preoldparent, struct iatt *postoldparent, struct iatt *prenewparent, struct iatt *postnewparent) { - TRASH_STACK_UNWIND (frame, op_ret, op_errno, stbuf, preoldparent, + TRASH_STACK_UNWIND (rename, frame, op_ret, op_errno, stbuf, preoldparent, postoldparent, prenewparent, postnewparent); return 0; } @@ -316,8 +307,8 @@ trash_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; fail: - TRASH_STACK_UNWIND (frame, op_ret, op_errno, buf, - NULL, NULL, NULL, NULL); + TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, buf, + NULL); return 0; @@ -337,7 +328,7 @@ trash_rename_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; if ((op_ret == -1) && (op_errno == ENOENT)) { - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -346,7 +337,7 @@ trash_rename_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* check for the errno, if its ENOENT create directory and call * rename later */ - tmp_path = strdup (dir_name); + tmp_path = gf_strdup (dir_name); if (!tmp_path) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -356,9 +347,9 @@ trash_rename_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); - free (tmp_str); + GF_FREE (tmp_str); return 0; } @@ -396,9 +387,10 @@ trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc_t tmp_loc = {0,}; local = frame->local; - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + goto out; } if ((op_ret == -1) && (op_errno == ENOENT)) { @@ -421,7 +413,7 @@ trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); } goto out; @@ -438,8 +430,8 @@ trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } out: - free (cookie); /* strdup (dir_name) was sent here :) */ - free (tmp_str); + GF_FREE (cookie); /* strdup (dir_name) was sent here :) */ + GF_FREE (tmp_str); return 0; } @@ -500,9 +492,7 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, trash_elim_pattern_t *trav = NULL; trash_private_t *priv = NULL; trash_local_t *local = NULL; - struct tm *tm = NULL; - char timestr[256] = {0,}; - time_t utime = 0; + char timestr[64] = {0,}; int32_t match = 0; priv = this->private; @@ -529,10 +519,10 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, return 0; } - local = CALLOC (1, sizeof (trash_local_t)); + local = mem_get0 (this->local_pool); if (!local) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - TRASH_STACK_UNWIND (frame, -1, ENOMEM, + TRASH_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); return 0; } @@ -549,9 +539,8 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, { /* append timestamp to file name */ /* TODO: can we make it optional? */ - utime = time (NULL); - tm = localtime (&utime); - strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm); + gf_time_ftm (timestr, sizeof timestr, time (NULL), + gf_timefmt_F_HMS); strcat (local->newpath, timestr); } @@ -570,9 +559,7 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) trash_elim_pattern_t *trav = NULL; trash_private_t *priv = NULL; trash_local_t *local = NULL; - struct tm *tm = NULL; - char timestr[256] = {0,}; - time_t utime = 0; + char timestr[64] = {0,}; int32_t match = 0; priv = this->private; @@ -605,10 +592,10 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) return 0; } - local = CALLOC (1, sizeof (trash_local_t)); + local = mem_get0 (this->local_pool); if (!local) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - TRASH_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL); + TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL); return 0; } frame->local = local; @@ -621,9 +608,8 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) { /* append timestamp to file name */ /* TODO: can we make it optional? */ - utime = time (NULL); - tm = localtime (&utime); - strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm); + gf_time_fmt (timestr, sizeof timestr, time (NULL), + gf_timefmt_F_HMS); strcat (local->newpath, timestr); } @@ -684,7 +670,7 @@ trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->fsize = stbuf->ia_size; STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - local->newfd, vector, count, local->cur_offset, iobuf); + local->newfd, vector, count, local->cur_offset, 0, iobuf); out: return 0; @@ -717,7 +703,7 @@ trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND (frame, trash_truncate_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, local->fd, (size_t)GF_BLOCK_READV_SIZE, - local->cur_offset); + local->cur_offset, 0); goto out; } @@ -757,7 +743,7 @@ trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND (frame, trash_truncate_readv_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset); + local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0); out: return 0; @@ -781,13 +767,13 @@ trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if ((op_ret == -1) && (op_errno == ENOENT)) { //Creating the directory structure here. - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } dir_name = dirname (tmp_str); - tmp_path = strdup (dir_name); + tmp_path = gf_strdup (dir_name); if (!tmp_path) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -797,8 +783,8 @@ trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, 0755); - free (tmp_str); + &tmp_loc, 0755, NULL); + GF_FREE (tmp_str); goto out; } @@ -846,13 +832,14 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; if (!local) - return 0; + goto out; loop_count = local->loop_count; - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + goto out; } if ((op_ret == -1) && (op_errno == ENOENT)) { @@ -874,7 +861,7 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); goto out; } @@ -890,7 +877,7 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, &local->newloc, flags, st_mode_from_ia (prot, local->loc.inode->ia_type), - local->newfd); + local->newfd, NULL); goto out; } } @@ -900,6 +887,7 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loop_count = ++local->loop_count; } UNLOCK (&frame->lock); + tmp_dirname = strchr (tmp_str, '/'); while (tmp_dirname) { count = tmp_dirname - tmp_str; @@ -920,11 +908,11 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); out: - free (cookie); /* strdup (dir_name) was sent here :) */ - free (tmp_str); + GF_FREE (cookie); /* strdup (dir_name) was sent here :) */ + GF_FREE (tmp_str); return 0; } @@ -936,10 +924,8 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { trash_private_t *priv = NULL; trash_local_t *local = NULL; - struct tm *tm = NULL; - char timestr[256] = {0,}; + char timestr[64] = {0,}; char loc_newname[PATH_MAX] = {0,}; - time_t utime = 0; int32_t flags = 0; priv = this->private; @@ -950,7 +936,7 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "fstat on the file failed: %s", strerror (op_errno)); - TRASH_STACK_UNWIND (frame, op_ret, op_errno, buf); + TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, buf, NULL); return 0; } @@ -971,18 +957,16 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, strcat (local->newpath, local->loc.path); { - utime = time (NULL); - tm = localtime (&utime); - strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm); + gf_time_fmt (timestr, sizeof timestr, time (NULL), + gf_timefmt_F_HMS); strcat (local->newpath, timestr); } strcpy (loc_newname,local->loc.name); strcat (loc_newname,timestr); - local->newloc.name = strdup (loc_newname); - local->newloc.path = strdup (local->newpath); + local->newloc.name = gf_strdup (loc_newname); + local->newloc.path = gf_strdup (local->newpath); local->newloc.inode = inode_new (local->loc.inode->table); - local->newloc.ino = local->newloc.inode->ino; local->newfd = fd_create (local->newloc.inode, frame->root->pid); flags = O_CREAT|O_EXCL|O_WRONLY; @@ -992,7 +976,7 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, FIRST_CHILD(this)->fops->create, &local->newloc, flags, st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type), - local->newfd); + local->newfd, NULL); return 0; } @@ -1036,10 +1020,10 @@ trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, LOCK_INIT (&frame->lock); - local = CALLOC (1, sizeof (trash_local_t)); + local = mem_get0 (this->local_pool); if (!local) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - TRASH_STACK_UNWIND (frame, -1, ENOMEM, NULL); + TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL); return 0; } @@ -1101,7 +1085,7 @@ trash_ftruncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, local->fd, (size_t)GF_BLOCK_READV_SIZE, - local->cur_offset); + local->cur_offset, 0); return 0; } @@ -1133,7 +1117,7 @@ trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND (frame, trash_ftruncate_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - local->newfd, vector, count, local->cur_offset, NULL); + local->newfd, vector, count, local->cur_offset, 0, NULL); return 0; } @@ -1154,13 +1138,13 @@ trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; if ((op_ret == -1) && (op_errno == ENOENT)) { - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } dir_name = dirname (tmp_str); - tmp_path = strdup (dir_name); + tmp_path = gf_strdup (dir_name); if (!tmp_path) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -1170,8 +1154,8 @@ trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, 0755); - free (tmp_str); + &tmp_loc, 0755, NULL); + GF_FREE (tmp_str); return 0; } @@ -1185,7 +1169,7 @@ trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, local->fd, - (size_t)GF_BLOCK_READV_SIZE, local->cur_offset); + (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0); return 0; } @@ -1210,13 +1194,14 @@ trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; if (!local) - return 0; + goto out; loop_count = local->loop_count; - tmp_str = strdup (local->newpath); + tmp_str = gf_strdup (local->newpath); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + goto out; } if ((op_ret == -1) && (op_errno == ENOENT)) { @@ -1238,7 +1223,7 @@ trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); goto out; } @@ -1255,7 +1240,7 @@ trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, FIRST_CHILD(this)->fops->create, &local->newloc, flags, st_mode_from_ia (prot, local->loc.inode->ia_type), - local->newfd); + local->newfd, NULL); goto out; } } @@ -1285,11 +1270,11 @@ trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk, tmp_path, this->children->xlator, this->children->xlator->fops->mkdir, - &tmp_loc, 0755); + &tmp_loc, 0755, NULL); out: - free (cookie); /* strdup (dir_name) was sent here :) */ - free (tmp_str); + GF_FREE (cookie); /* strdup (dir_name) was sent here :) */ + GF_FREE (tmp_str); return 0; } @@ -1309,7 +1294,7 @@ trash_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "%s: %s",local->newloc.path, strerror(op_errno)); - TRASH_STACK_UNWIND (frame, -1, op_errno, buf, NULL); + TRASH_STACK_UNWIND (ftruncate, frame, -1, op_errno, buf, NULL); return 0; } if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size)) @@ -1326,7 +1311,7 @@ trash_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, FIRST_CHILD(this)->fops->create, &local->newloc, ( O_CREAT | O_EXCL | O_WRONLY ), st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type), - local->newfd); + local->newfd, NULL); return 0; } @@ -1338,11 +1323,9 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) trash_private_t *priv = NULL; trash_local_t *local = NULL; dentry_t *dir_entry = NULL; - struct tm *tm = NULL; char *pathbuf = NULL; inode_t *newinode = NULL; - time_t utime = 0; - char timestr[256]; + char timestr[64]; int32_t retval = 0; int32_t match = 0; @@ -1374,17 +1357,14 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) return 0; } - local = CALLOC (1, sizeof (trash_local_t)); + local = mem_get0 (this->local_pool); if (!local) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - TRASH_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL); + TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL); return 0; } - utime = time (NULL); - tm = localtime (&utime); - strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm); - + gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_F_HMS); strcpy (local->newpath, priv->trash_dir); strcat (local->newpath, pathbuf); strcat (local->newpath, timestr); @@ -1398,7 +1378,6 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) local->newloc.path = local->newpath; local->loc.inode = inode_ref (fd->inode); - local->loc.ino = fd->inode->ino; local->loc.path = pathbuf; local->fop_offset = offset; @@ -1416,7 +1395,6 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) int32_t init (xlator_t *this) { - int32_t ret = 0; data_t *data = NULL; trash_private_t *_priv = NULL; trash_elim_pattern_t *trav = NULL; @@ -1438,7 +1416,7 @@ init (xlator_t *this) "dangling volume. check volfile "); } - _priv = CALLOC (1, sizeof (*_priv)); + _priv = GF_CALLOC (1, sizeof (*_priv), gf_trash_mt_trash_private_t); if (!_priv) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); return -1; @@ -1446,20 +1424,20 @@ init (xlator_t *this) data = dict_get (this->options, "trash-dir"); if (!data) { - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "no option specified for 'trash-dir', " "using \"/.trashcan/\""); - _priv->trash_dir = strdup ("/.trashcan"); + _priv->trash_dir = gf_strdup ("/.trashcan"); } else { /* Need a path with '/' as the first char, if not given, append it */ if (data->data[0] == '/') { - _priv->trash_dir = strdup (data->data); + _priv->trash_dir = gf_strdup (data->data); } else { /* TODO: Make sure there is no ".." in the path */ strcpy (trash_dir, "/"); strcat (trash_dir, data->data); - _priv->trash_dir = strdup (trash_dir); + _priv->trash_dir = gf_strdup (trash_dir); } } @@ -1468,7 +1446,7 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_TRACE, "no option specified for 'eliminate', using NULL"); } else { - tmp_str = strdup (data->data); + tmp_str = gf_strdup (data->data); if (!tmp_str) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); } @@ -1476,9 +1454,11 @@ init (xlator_t *this) /* Match Filename to option specified in eliminate. */ component = strtok_r (tmp_str, "|", &strtokptr); while (component) { - trav = CALLOC (1, sizeof (*trav)); + trav = GF_CALLOC (1, sizeof (*trav), + gf_trash_mt_trash_elim_pattern_t); if (!trav) { gf_log (this->name, GF_LOG_DEBUG, "out of memory"); + break; } trav->pattern = component; trav->next = _priv->eliminate; @@ -1497,7 +1477,7 @@ init (xlator_t *this) GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB); _priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE; } else { - ret = gf_string2bytesize (data->data, + (void)gf_string2bytesize (data->data, &max_trash_file_size64); if( max_trash_file_size64 > GF_ALLOWED_MAX_FILE_SIZE ) { gf_log (this->name, GF_LOG_DEBUG, @@ -1510,6 +1490,14 @@ init (xlator_t *this) _priv->max_trash_file_size); } + this->local_pool = mem_pool_new (trash_local_t, 64); + if (!this->local_pool) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + return -1; + } + + this->private = (void *)_priv; return 0; } @@ -1520,8 +1508,7 @@ fini (xlator_t *this) trash_private_t *priv = NULL; priv = this->private; - if (priv) - FREE (priv); + GF_FREE (priv); return; } @@ -1533,9 +1520,6 @@ struct xlator_fops fops = { .ftruncate = trash_ftruncate, }; -struct xlator_mops mops = { -}; - struct xlator_cbks cbks = { }; diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h index e1a1c314d..9a7c03361 100644 --- a/xlators/features/trash/src/trash.h +++ b/xlators/features/trash/src/trash.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __TRASH_H__ #define __TRASH_H__ @@ -59,8 +49,8 @@ struct trash_struct { char origpath[PATH_MAX]; char newpath[PATH_MAX]; int32_t loop_count; - struct stat preparent; - struct stat postparent; + struct iatt preparent; + struct iatt postparent; }; typedef struct trash_struct trash_local_t; @@ -77,11 +67,11 @@ struct trash_priv { }; typedef struct trash_priv trash_private_t; -#define TRASH_STACK_UNWIND(frame, params ...) do { \ +#define TRASH_STACK_UNWIND(op, frame, params ...) do { \ trash_local_t *__local = NULL; \ __local = frame->local; \ frame->local = NULL; \ - STACK_UNWIND (frame, params); \ + STACK_UNWIND_STRICT (op, frame, params); \ trash_local_wipe (__local); \ } while (0) |
