diff options
Diffstat (limited to 'xlators/features/marker')
29 files changed, 5284 insertions, 7782 deletions
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am index a6ba2de16ae..a985f42a877 100644 --- a/xlators/features/marker/Makefile.am +++ b/xlators/features/marker/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = src @SYNCDAEMON_SUBDIR@ +SUBDIRS = src CLEANFILES = diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am index 501586a76b6..58056b36511 100644 --- a/xlators/features/marker/src/Makefile.am +++ b/xlators/features/marker/src/Makefile.am @@ -1,15 +1,24 @@ +if WITH_SERVER xlator_LTLIBRARIES = marker.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -marker_la_LDFLAGS = -module -avoidversion +marker_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c \ + marker-common.c -marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h \ + marker-quota-helper.h marker-common.h \ + $(top_builddir)/xlators/lib/src/libxlator.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/xlators/lib/src -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src $(GF_CFLAGS) -shared -nostartfiles +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c index a413781bc0c..9c9047005d6 100644 --- a/xlators/features/marker/src/marker-common.c +++ b/xlators/features/marker/src/marker-common.c @@ -1,86 +1,57 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif #include <fnmatch.h> #include "marker-common.h" marker_inode_ctx_t * -marker_inode_ctx_new () +marker_inode_ctx_new() { - marker_inode_ctx_t *ctx = NULL; + marker_inode_ctx_t *ctx = NULL; - ctx = GF_CALLOC (1, sizeof (marker_inode_ctx_t), - gf_marker_mt_marker_inode_ctx_t); - if (ctx == NULL) - goto out; + ctx = GF_CALLOC(1, sizeof(marker_inode_ctx_t), + gf_marker_mt_marker_inode_ctx_t); + if (ctx == NULL) + goto out; - ctx->quota_ctx = NULL; + ctx->quota_ctx = NULL; out: - return ctx; + return ctx; } int32_t -marker_force_inode_ctx_get (inode_t *inode, xlator_t *this, - marker_inode_ctx_t **ctx) +marker_force_inode_ctx_get(inode_t *inode, xlator_t *this, + marker_inode_ctx_t **ctx) { - int32_t ret = -1; - uint64_t ctx_int = 0; + int32_t ret = -1; + uint64_t ctx_int = 0; - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx_int); - if (ret == 0) - *ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int; - else { - *ctx = marker_inode_ctx_new (); - if (*ctx == NULL) - goto unlock; - - ret = __inode_ctx_put (inode, this, - (uint64_t )(unsigned long) *ctx); - if (ret == -1) { - GF_FREE (*ctx); - goto unlock; - } - ret = 0; - } + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctx_int); + if (ret == 0) + *ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int; + else { + *ctx = marker_inode_ctx_new(); + if (*ctx == NULL) + goto unlock; + + ret = __inode_ctx_put(inode, this, (uint64_t)(unsigned long)*ctx); + if (ret == -1) { + GF_FREE(*ctx); + goto unlock; + } + ret = 0; } -unlock: UNLOCK (&inode->lock); + } +unlock: + UNLOCK(&inode->lock); - return ret; -} - -void -marker_filter_quota_xattr (dict_t *dict, char *key, - data_t *value, void *data) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", dict, out); - GF_VALIDATE_OR_GOTO ("marker", key, out); - - ret = fnmatch ("trusted.glusterfs.quota*", key, 0); - if (ret == 0) - dict_del (dict, key); -out: - return; + return ret; } diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h index 0a7ee261948..7f8cffe7d35 100644 --- a/xlators/features/marker/src/marker-common.h +++ b/xlators/features/marker/src/marker-common.h @@ -1,36 +1,19 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _MARKER_COMMON_H #define _MARKER_COMMON_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "inode.h" -#include "xlator.h" +#include <glusterfs/xlator.h> #include "marker.h" int32_t -marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **); +marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **); -void -marker_filter_quota_xattr (dict_t *, char *, data_t *, void *); #endif diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h index f2723dc26e9..aedfdb4a1b7 100644 --- a/xlators/features/marker/src/marker-mem-types.h +++ b/xlators/features/marker/src/marker-mem-types.h @@ -1,37 +1,28 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __MARKER_MEM_TYPES_H__ #define __MARKER_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_marker_mem_types_ { - gf_marker_mt_marker_local_t = gf_common_mt_end + 1, - gf_marker_mt_marker_conf_t, - gf_marker_mt_loc_t, - gf_marker_mt_volume_mark, - gf_marker_mt_int64_t, - gf_marker_mt_quota_inode_ctx_t, - gf_marker_mt_marker_inode_ctx_t, - gf_marker_mt_quota_local_t, - gf_marker_mt_inode_contribution_t, - gf_marker_mt_end + /* Those are used by ALLOCATE_OR_GOTO macro */ + gf_marker_mt_marker_conf_t = gf_common_mt_end + 1, + gf_marker_mt_loc_t, + gf_marker_mt_volume_mark, + gf_marker_mt_int64_t, + gf_marker_mt_quota_inode_ctx_t, + gf_marker_mt_marker_inode_ctx_t, + gf_marker_mt_inode_contribution_t, + gf_marker_mt_quota_meta_t, + gf_marker_mt_quota_synctask_t, + gf_marker_mt_end }; #endif diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c index 03a8d78ba16..ecd85d67b2b 100644 --- a/xlators/features/marker/src/marker-quota-helper.c +++ b/xlators/features/marker/src/marker-quota-helper.c @@ -1,416 +1,380 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "locking.h" +#include <glusterfs/locking.h> #include "marker-quota.h" #include "marker-common.h" #include "marker-quota-helper.h" #include "marker-mem-types.h" int -mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +mq_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", inode, out); - GF_VALIDATE_OR_GOTO ("marker", path, out); - /* Not checking for parent because while filling - * loc of root, parent will be NULL - */ + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", inode, out); + GF_VALIDATE_OR_GOTO("marker", path, out); + /* Not checking for parent because while filling + * loc of root, parent will be NULL + */ - if (inode) { - loc->inode = inode_ref (inode); - } + if (inode) { + loc->inode = inode_ref(inode); + } - if (parent) - loc->parent = inode_ref (parent); + if (parent) + loc->parent = inode_ref(parent); - loc->path = gf_strdup (path); - if (!loc->path) { - gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); - goto loc_wipe; - } + if (!gf_uuid_is_null(inode->gfid)) + gf_uuid_copy(loc->gfid, inode->gfid); - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - else - goto loc_wipe; + loc->path = gf_strdup(path); + if (!loc->path) { + gf_log("loc fill", GF_LOG_ERROR, "strdup failed"); + goto out; + } + + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + else + goto out; + + ret = 0; - ret = 0; -loc_wipe: - if (ret < 0) - loc_wipe (loc); out: - return ret; -} + if (ret < 0) + loc_wipe(loc); + return ret; +} int32_t -mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc) +mq_inode_loc_fill(const char *parent_gfid, inode_t *inode, loc_t *loc) { - char *resolvedpath = NULL; - inode_t *parent = NULL; - int ret = -1; + char *resolvedpath = NULL; + inode_t *parent = NULL; + quota_inode_ctx_t *ctx = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + if (inode == NULL) { + gf_log_callingfn("marker", GF_LOG_ERROR, + "loc fill failed, " + "inode is NULL"); + return ret; + } - if ((!inode) || (!loc)) - return ret; + if (loc == NULL) + return ret; - if ((inode) && __is_root_gfid (inode->gfid)) { - loc->parent = NULL; - goto ignore_parent; - } + if ((inode) && __is_root_gfid(inode->gfid)) { + loc->parent = NULL; + goto ignore_parent; + } - if (parent_gfid == NULL) - parent = inode_parent (inode, 0, NULL); - else - parent = inode_find (inode->table, - (unsigned char *) parent_gfid); + if (parent_gfid == NULL) + parent = inode_parent(inode, 0, NULL); + else + parent = inode_find(inode->table, (unsigned char *)parent_gfid); - if (parent == NULL) - goto err; + if (parent == NULL) { + gf_log("marker", GF_LOG_ERROR, "parent is NULL for %s", + uuid_utoa(inode->gfid)); + goto err; + } ignore_parent: - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) - goto err; - - ret = mq_loc_fill (loc, inode, parent, resolvedpath); - if (ret < 0) - goto err; + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) { + gf_log("marker", GF_LOG_ERROR, "failed to resolve path for %s", + uuid_utoa(inode->gfid)); + goto err; + } + + ret = mq_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; + + ret = mq_inode_ctx_get(inode, this, &ctx); + if (ret < 0 || ctx == NULL) + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + ret = -1; + goto err; + } + ret = 0; err: - if (parent) - inode_unref (parent); + if (parent) + inode_unref(parent); - GF_FREE (resolvedpath); + GF_FREE(resolvedpath); - return ret; + return ret; } - quota_inode_ctx_t * -mq_alloc_inode_ctx () -{ - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - - QUOTA_ALLOC (ctx, quota_inode_ctx_t, ret); - if (ret == -1) - goto out; - - ctx->size = 0; - ctx->dirty = 0; - ctx->updation_status = _gf_false; - LOCK_INIT (&ctx->lock); - INIT_LIST_HEAD (&ctx->contribution_head); -out: - return ctx; -} - -inode_contribution_t * -mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx) +mq_alloc_inode_ctx() { - inode_contribution_t *contri = NULL; - inode_contribution_t *temp = NULL; - - if (!inode || !ctx) - goto out; - - list_for_each_entry (temp, &ctx->contribution_head, contri_list) { - if (uuid_compare (temp->gfid, inode->gfid) == 0) { - contri = temp; - goto out; - } - } + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + + QUOTA_ALLOC(ctx, quota_inode_ctx_t, ret); + if (ret == -1) + goto out; + + ctx->size = 0; + ctx->dirty = 0; + ctx->updation_status = _gf_false; + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->contribution_head); out: - return contri; + return ctx; } - -int32_t -mq_delete_contribution_node (dict_t *dict, char *key, - inode_contribution_t *contribution) +static void +mq_contri_fini(inode_contribution_t *contri) { - if (dict_get (dict, key) != NULL) - goto out; - - QUOTA_FREE_CONTRIBUTION_NODE (contribution); -out: - return 0; + LOCK_DESTROY(&contri->lock); + GF_FREE(contri); } - inode_contribution_t * -__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +mq_contri_init(inode_t *inode) { - int32_t ret = 0; - inode_contribution_t *contribution = NULL; - - list_for_each_entry (contribution, &ctx->contribution_head, contri_list) { - if (uuid_compare (contribution->gfid, loc->parent->gfid) == 0) { - goto out; - } - } - - QUOTA_ALLOC (contribution, inode_contribution_t, ret); - if (ret == -1) - goto out; + inode_contribution_t *contri = NULL; + int32_t ret = 0; - contribution->contribution = 0; + QUOTA_ALLOC(contri, inode_contribution_t, ret); + if (ret == -1) + goto out; - uuid_copy (contribution->gfid, loc->parent->gfid); + GF_REF_INIT(contri, mq_contri_fini); - LOCK_INIT (&contribution->lock); - INIT_LIST_HEAD (&contribution->contri_list); + contri->contribution = 0; + contri->file_count = 0; + contri->dir_count = 0; + gf_uuid_copy(contri->gfid, inode->gfid); - list_add_tail (&contribution->contri_list, &ctx->contribution_head); + LOCK_INIT(&contri->lock); + INIT_LIST_HEAD(&contri->contri_list); out: - return contribution; + return contri; } - inode_contribution_t * -mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +mq_get_contribution_node(inode_t *inode, quota_inode_ctx_t *ctx) { - inode_contribution_t *contribution = NULL; + inode_contribution_t *contri = NULL; + inode_contribution_t *temp = NULL; - if ((ctx == NULL) || (loc == NULL)) - return NULL; + if (!inode || !ctx) + goto out; - if (strcmp (loc->path, "/") == 0) - return NULL; + LOCK(&ctx->lock); + { + if (list_empty(&ctx->contribution_head)) + goto unlock; - LOCK (&ctx->lock); + list_for_each_entry(temp, &ctx->contribution_head, contri_list) { - contribution = __mq_add_new_contribution_node (this, ctx, loc); + if (gf_uuid_compare(temp->gfid, inode->gfid) == 0) { + contri = temp; + GF_REF_GET(contri); + break; + } } - UNLOCK (&ctx->lock); + } +unlock: + UNLOCK(&ctx->lock); - return contribution; +out: + return contri; } - -int32_t -mq_dict_set_contribution (xlator_t *this, dict_t *dict, - loc_t *loc) +inode_contribution_t * +__mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, + loc_t *loc) { - int32_t ret = -1; - char contri_key [512] = {0, }; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", dict, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - - GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret); - if (ret < 0) { - ret = -1; - goto out; + inode_contribution_t *contribution = NULL; + + if (!loc->parent) { + if (!gf_uuid_is_null(loc->pargfid)) + loc->parent = inode_find(loc->inode->table, loc->pargfid); + + if (!loc->parent) + loc->parent = inode_parent(loc->inode, loc->pargfid, loc->name); + if (!loc->parent) + goto out; + } + + list_for_each_entry(contribution, &ctx->contribution_head, contri_list) + { + if (loc->parent && + gf_uuid_compare(contribution->gfid, loc->parent->gfid) == 0) { + goto out; } + } - ret = dict_set_int64 (dict, contri_key, 0); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "unable to set dict value on %s.", - loc->path); - goto out; - } + contribution = mq_contri_init(loc->parent); + if (contribution == NULL) + goto out; + + list_add_tail(&contribution->contri_list, &ctx->contribution_head); - ret = 0; out: - return ret; + return contribution; } - -int32_t -mq_inode_ctx_get (inode_t *inode, xlator_t *this, - quota_inode_ctx_t **ctx) +inode_contribution_t * +mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) { - int32_t ret = -1; - uint64_t ctx_int = 0; - marker_inode_ctx_t *mark_ctx = NULL; + inode_contribution_t *contribution = NULL; - GF_VALIDATE_OR_GOTO ("marker", inode, out); - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + if ((ctx == NULL) || (loc == NULL)) + return NULL; - ret = inode_ctx_get (inode, this, &ctx_int); - if (ret < 0) { - ret = -1; - *ctx = NULL; - goto out; - } - - mark_ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int; - if (mark_ctx->quota_ctx == NULL) { - ret = -1; - goto out; - } + if (((loc->path) && (strcmp(loc->path, "/") == 0)) || + (!loc->path && gf_uuid_is_null(loc->pargfid))) + return NULL; - *ctx = mark_ctx->quota_ctx; + LOCK(&ctx->lock); + { + contribution = __mq_add_new_contribution_node(this, ctx, loc); + if (contribution) + GF_REF_GET(contribution); + } + UNLOCK(&ctx->lock); - ret = 0; - -out: - return ret; + return contribution; } - -quota_inode_ctx_t * -__mq_inode_ctx_new (inode_t *inode, xlator_t *this) +int32_t +mq_dict_set_contribution(xlator_t *this, dict_t *dict, loc_t *loc, uuid_t gfid, + char *contri_key) { - int32_t ret = -1; - quota_inode_ctx_t *quota_ctx = NULL; - marker_inode_ctx_t *mark_ctx = NULL; - - ret = marker_force_inode_ctx_get (inode, this, &mark_ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "marker_force_inode_ctx_get() failed"); - goto out; + int32_t ret = -1; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", dict, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + + if (gfid && !gf_uuid_is_null(gfid)) { + GET_CONTRI_KEY(this, key, gfid, ret); + } else if (loc->parent) { + GET_CONTRI_KEY(this, key, loc->parent->gfid, ret); + } else { + /* nameless lookup, fetch contributions to all parents */ + GET_CONTRI_KEY(this, key, NULL, ret); + } + + if (ret < 0) + goto out; + + ret = dict_set_int64(dict, key, 0); + if (ret < 0) + goto out; + + if (contri_key) + if (snprintf(contri_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) { + ret = -1; + goto out; } - LOCK (&inode->lock); - { - if (mark_ctx->quota_ctx == NULL) { - quota_ctx = mq_alloc_inode_ctx (); - if (quota_ctx == NULL) { - ret = -1; - goto unlock; - } - mark_ctx->quota_ctx = quota_ctx; - } else { - quota_ctx = mark_ctx->quota_ctx; - } - - ret = 0; - } -unlock: - UNLOCK (&inode->lock); out: - return quota_ctx; -} - + if (ret < 0) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "dict set failed"); -quota_inode_ctx_t * -mq_inode_ctx_new (inode_t * inode, xlator_t *this) -{ - return __mq_inode_ctx_new (inode, this); + return ret; } -quota_local_t * -mq_local_new () +int32_t +mq_inode_ctx_get(inode_t *inode, xlator_t *this, quota_inode_ctx_t **ctx) { - int32_t ret = -1; - quota_local_t *local = NULL; + int32_t ret = -1; + uint64_t ctx_int = 0; + marker_inode_ctx_t *mark_ctx = NULL; - QUOTA_ALLOC (local, quota_local_t, ret); - if (ret < 0) - goto out; + GF_VALIDATE_OR_GOTO("marker", inode, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - local->ref = 1; - local->delta = 0; - local->err = 0; - LOCK_INIT (&local->lock); + ret = inode_ctx_get(inode, this, &ctx_int); + if (ret < 0) { + ret = -1; + *ctx = NULL; + goto out; + } - memset (&local->loc, 0, sizeof (loc_t)); - memset (&local->parent_loc, 0, sizeof (loc_t)); + mark_ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int; + if (mark_ctx->quota_ctx == NULL) { + ret = -1; + goto out; + } - local->ctx = NULL; - local->contri = NULL; + *ctx = mark_ctx->quota_ctx; + + ret = 0; out: - return local; + return ret; } -quota_local_t * -mq_local_ref (quota_local_t *local) +quota_inode_ctx_t * +__mq_inode_ctx_new(inode_t *inode, xlator_t *this) { - LOCK (&local->lock); - { - local->ref ++; + int32_t ret = -1; + quota_inode_ctx_t *quota_ctx = NULL; + marker_inode_ctx_t *mark_ctx = NULL; + + ret = marker_force_inode_ctx_get(inode, this, &mark_ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "marker_force_inode_ctx_get() failed"); + goto out; + } + + LOCK(&inode->lock); + { + if (mark_ctx->quota_ctx == NULL) { + quota_ctx = mq_alloc_inode_ctx(); + if (quota_ctx == NULL) { + ret = -1; + goto unlock; + } + mark_ctx->quota_ctx = quota_ctx; + } else { + quota_ctx = mark_ctx->quota_ctx; } - UNLOCK (&local->lock); - - return local; -} - - -int32_t -mq_local_unref (xlator_t *this, quota_local_t *local) -{ - int32_t ref = 0; - if (local == NULL) - goto out; - - QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref); - - if (ref != 0) - goto out; - - if (local->fd != NULL) - fd_unref (local->fd); - - loc_wipe (&local->loc); - loc_wipe (&local->parent_loc); - - LOCK_DESTROY (&local->lock); - - GF_FREE (local); + ret = 0; + } +unlock: + UNLOCK(&inode->lock); out: - return 0; + return quota_ctx; } - -inode_contribution_t * -mq_get_contribution_from_loc (xlator_t *this, loc_t *loc) +quota_inode_ctx_t * +mq_inode_ctx_new(inode_t *inode, xlator_t *this) { - int32_t ret = 0; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "cannot get marker-quota context from inode " - "(gfid:%s, path:%s)", - uuid_utoa (loc->inode->gfid), loc->path); - goto err; - } - - contribution = mq_get_contribution_node (loc->parent, ctx); - if (contribution == NULL) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "inode (gfid:%s, path:%s) has " - "no contribution towards parent (gfid:%s)", - uuid_utoa (loc->inode->gfid), - loc->path, uuid_utoa (loc->parent->gfid)); - goto err; - } - -err: - return contribution; + return __mq_inode_ctx_new(inode, this); } diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h index 56080f0dc21..d4091dd2180 100644 --- a/xlators/features/marker/src/marker-quota-helper.h +++ b/xlators/features/marker/src/marker-quota-helper.h @@ -1,83 +1,66 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ -#ifndef _MARKER_QUOTA_HELPER_H -#define _MARKER_QUOTA_HELPER - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "marker-quota.h" -#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \ - do { \ - list_del (&_contribution->contri_list); \ - GF_FREE (_contribution); \ - } while (0) - -#define QUOTA_SAFE_INCREMENT(lock, var) \ - do { \ - LOCK (lock); \ - var ++; \ - UNLOCK (lock); \ - } while (0) - -#define QUOTA_SAFE_DECREMENT(lock, var, value) \ - do { \ - LOCK (lock); \ - { \ - value = --var; \ - } \ - UNLOCK (lock); \ - } while (0) +#ifndef _MARKER_QUOTA_HELPER_H +#define _MARKER_QUOTA_HELPER_H + +#include "marker.h" + +#define QUOTA_FREE_CONTRIBUTION_NODE(ctx, _contribution) \ + do { \ + LOCK(&ctx->lock); \ + { \ + list_del_init(&_contribution->contri_list); \ + GF_REF_PUT(_contribution); \ + } \ + UNLOCK(&ctx->lock); \ + } while (0) + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) + +#define QUOTA_SAFE_DECREMENT(lock, var, value) \ + do { \ + LOCK(lock); \ + { \ + value = --var; \ + } \ + UNLOCK(lock); \ + } while (0) inode_contribution_t * -mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *); +mq_add_new_contribution_node(xlator_t *, quota_inode_ctx_t *, loc_t *); int32_t -mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *); +mq_dict_set_contribution(xlator_t *, dict_t *, loc_t *, uuid_t, char *); quota_inode_ctx_t * -mq_inode_ctx_new (inode_t *, xlator_t *); +mq_inode_ctx_new(inode_t *, xlator_t *); int32_t -mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **); +mq_inode_ctx_get(inode_t *, xlator_t *, quota_inode_ctx_t **); int32_t -mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *); - -int32_t -mq_inode_loc_fill (const char *, inode_t *, loc_t *); - -quota_local_t * -mq_local_new (); - -quota_local_t * -mq_local_ref (quota_local_t *); +mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *); int32_t -mq_local_unref (xlator_t *, quota_local_t *); +mq_inode_loc_fill(const char *, inode_t *, loc_t *); inode_contribution_t * -mq_get_contribution_node (inode_t *, quota_inode_ctx_t *); +mq_contri_init(inode_t *inode); inode_contribution_t * -mq_get_contribution_from_loc (xlator_t *this, loc_t *loc); +mq_get_contribution_node(inode_t *, quota_inode_ctx_t *); #endif diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c index 57e8470d67a..3de2ea1c92c 100644 --- a/xlators/features/marker/src/marker-quota.c +++ b/xlators/features/marker/src/marker-quota.c @@ -1,2457 +1,2297 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "dict.h" -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "libxlator.h" -#include "common-utils.h" -#include "byte-order.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/byte-order.h> #include "marker-quota.h" #include "marker-quota-helper.h" +#include <glusterfs/syncop.h> +#include <glusterfs/quota-common-utils.h> int -mq_loc_copy (loc_t *dst, loc_t *src) +mq_loc_copy(loc_t *dst, loc_t *src) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("marker", dst, out); - GF_VALIDATE_OR_GOTO ("marker", src, out); + GF_VALIDATE_OR_GOTO("marker", dst, out); + GF_VALIDATE_OR_GOTO("marker", src, out); - if (src->inode == NULL || - src->path == NULL) { - gf_log ("marker", GF_LOG_WARNING, - "src loc is not valid"); - goto out; - } + if (src->inode == NULL || + ((src->parent == NULL) && (gf_uuid_is_null(src->pargfid)) && + !__is_root_gfid(src->inode->gfid))) { + gf_log("marker", GF_LOG_WARNING, "src loc is not valid"); + goto out; + } - ret = loc_copy (dst, src); + ret = loc_copy(dst, src); out: - return ret; + return ret; } -int32_t -mq_get_local_err (quota_local_t *local, - int32_t *val) +static void +mq_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t status) { - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", local, out); - GF_VALIDATE_OR_GOTO ("marker", val, out); - - LOCK (&local->lock); - { - *val = local->err; - } - UNLOCK (&local->lock); - - ret = 0; -out: - return ret; + LOCK(&ctx->lock); + { + *flag = status; + } + UNLOCK(&ctx->lock); } -int32_t -mq_get_ctx_updation_status (quota_inode_ctx_t *ctx, - gf_boolean_t *status) +static void +mq_test_and_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t *status) { - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", status, out); - - LOCK (&ctx->lock); - { - *status = ctx->updation_status; - } - UNLOCK (&ctx->lock); - - ret = 0; -out: - return ret; + gf_boolean_t temp = _gf_false; + + LOCK(&ctx->lock); + { + temp = *status; + *status = *flag; + *flag = temp; + } + UNLOCK(&ctx->lock); } +static void +mq_get_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t *status) +{ + LOCK(&ctx->lock); + { + *status = *flag; + } + UNLOCK(&ctx->lock); +} int32_t -mq_set_ctx_updation_status (quota_inode_ctx_t *ctx, - gf_boolean_t status) +mq_get_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) { - int32_t ret = -1; - - if (ctx == NULL) - goto out; - - LOCK (&ctx->lock); - { - ctx->updation_status = status; - } - UNLOCK (&ctx->lock); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); - ret = 0; + mq_get_ctx_status(ctx, &ctx->updation_status, status); + return 0; out: - return ret; + return -1; } int32_t -mq_test_and_set_ctx_updation_status (quota_inode_ctx_t *ctx, - gf_boolean_t *status) +mq_set_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { - int32_t ret = -1; - gf_boolean_t temp = _gf_false; - - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", status, out); - - LOCK (&ctx->lock); - { - temp = *status; - *status = ctx->updation_status; - ctx->updation_status = temp; - } - UNLOCK (&ctx->lock); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - ret = 0; + mq_set_ctx_status(ctx, &ctx->updation_status, status); + return 0; out: - return ret; + return -1; } -void -mq_assign_lk_owner (xlator_t *this, call_frame_t *frame) +int32_t +mq_test_and_set_ctx_updation_status(quota_inode_ctx_t *ctx, + gf_boolean_t *status) { - marker_conf_t *conf = NULL; - uint64_t lk_owner = 0; - - conf = this->private; - - LOCK (&conf->lock); - { - if (++conf->quota_lk_owner == 0) { - ++conf->quota_lk_owner; - } - - lk_owner = conf->quota_lk_owner; - } - UNLOCK (&conf->lock); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); - frame->root->lk_owner = lk_owner; - - return; + mq_test_and_set_ctx_status(ctx, &ctx->updation_status, status); + return 0; +out: + return -1; } - int32_t -mq_loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc, - uint64_t ino, char *name) +mq_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { - int32_t ret = -1; - int32_t len = 0; - char *path = NULL; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", newloc, out); - GF_VALIDATE_OR_GOTO ("marker", oldloc, out); - GF_VALIDATE_OR_GOTO ("marker", name, out); - - newloc->inode = inode_new (oldloc->inode->table); - - if (!newloc->inode) { - ret = -1; - goto out; - } - - newloc->parent = inode_ref (oldloc->inode); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - len = strlen (oldloc->path); - - if (oldloc->path [len - 1] == '/') - ret = gf_asprintf ((char **) &path, "%s%s", - oldloc->path, name); - else - ret = gf_asprintf ((char **) &path, "%s/%s", - oldloc->path, name); - - if (ret < 0) - goto out; - - newloc->path = path; - - newloc->name = strrchr (newloc->path, '/'); - - if (newloc->name) - newloc->name++; - - gf_log (this->name, GF_LOG_DEBUG, "path = %s name =%s", - newloc->path, newloc->name); + mq_set_ctx_status(ctx, &ctx->create_status, status); + return 0; out: - return ret; + return -1; } int32_t -mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +mq_test_and_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) { - QUOTA_STACK_DESTROY (frame, this); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); - return 0; + mq_test_and_set_ctx_status(ctx, &ctx->create_status, status); + return 0; +out: + return -1; } -int32_t -mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +static void +mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { - struct gf_flock lock = {0, }; - quota_local_t *local = NULL; - loc_t loc = {0, }; - int ret = -1; - - local = frame->local; - - if (op_ret == -1) { - local->err = -1; - - mq_dirty_inode_updation_done (frame, NULL, this, 0, 0); - - return 0; - } - - if (op_ret == 0) - local->ctx->dirty = 0; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - ret = loc_copy (&loc, &local->loc); - if (ret == -1) { - local->err = -1; - frame->local = NULL; - mq_dirty_inode_updation_done (frame, NULL, this, 0, 0); - return 0; - } - - if (local->loc.inode == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "Inode is NULL, so can't stackwind."); - goto out; - } - - STACK_WIND (frame, - mq_dirty_inode_updation_done, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &loc, F_SETLKW, &lock); - - loc_wipe (&loc); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - return 0; + mq_set_ctx_status(ctx, &ctx->dirty_status, status); out: - mq_dirty_inode_updation_done (frame, NULL, this, -1, 0); - - return 0; + return; } -int32_t -mq_mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +int +mq_build_ancestry(xlator_t *this, loc_t *loc) { - int32_t ret = -1; - int64_t *size = NULL; - dict_t *newdict = NULL; - quota_local_t *local = NULL; - - local = (quota_local_t *) frame->local; - - if (op_ret == -1) - goto err; - - if (!dict) - goto wind; - - ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); - if (ret) - goto wind; + int32_t ret = -1; + fd_t *fd = NULL; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + dict_t *xdata = NULL; + inode_t *tmp_parent = NULL; + inode_t *tmp_inode = NULL; + inode_t *linked_inode = NULL; + quota_inode_ctx_t *ctx = NULL; + + INIT_LIST_HEAD(&entries.list); + + xdata = dict_new(); + if (xdata == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -ENOMEM; + goto out; + } + + ret = dict_set_int8(xdata, GET_ANCESTRY_DENTRY_KEY, 1); + if (ret < 0) + goto out; + + fd = fd_anonymous(loc->inode); + if (fd == NULL) { + gf_log(this->name, GF_LOG_ERROR, "fd creation failed"); + ret = -ENOMEM; + goto out; + } + + fd_bind(fd); + + ret = syncop_readdirp(this, fd, 131072, 0, &entries, xdata, NULL); + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "readdirp failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (list_empty(&entries.list)) { + ret = -1; + goto out; + } + + list_for_each_entry(entry, &entries.list, list) + { + if (__is_root_gfid(entry->inode->gfid)) { + /* The list contains a sub-list for each possible path + * to the target inode. Each sub-list starts with the + * root entry of the tree and is followed by the child + * entries for a particular path to the target entry. + * The root entry is an implied sub-list delimiter, + * as it denotes we have started processing a new path. + * Reset the parent pointer and continue + */ + + tmp_parent = NULL; + } else { + linked_inode = inode_link(entry->inode, tmp_parent, entry->d_name, + &entry->d_stat); + if (linked_inode) { + tmp_inode = entry->inode; + entry->inode = linked_inode; + inode_unref(tmp_inode); + } else { + gf_log(this->name, GF_LOG_ERROR, "inode link failed"); + ret = -EINVAL; + goto out; + } + } - LOCK (&local->ctx->lock); - { - local->ctx->size = ntoh64 (*size); + ctx = mq_inode_ctx_new(entry->inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(entry->inode->gfid)); + ret = -ENOMEM; + goto out; } - UNLOCK (&local->ctx->lock); -wind: - newdict = dict_new (); - if (!newdict) - goto err; + /* For non-directory, posix_get_ancestry_non_directory returns + * all hard-links that are represented by nodes adjacent to + * each other in the dentry-list. + * (Unlike the directory case where adjacent nodes either have + * a parent/child relationship or belong to different paths). + */ + if (entry->inode->ia_type == IA_IFDIR) + tmp_parent = entry->inode; + } + + if (loc->parent) + inode_unref(loc->parent); - ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0); - if (ret) - goto err; + loc->parent = inode_parent(loc->inode, 0, NULL); + if (loc->parent == NULL) { + ret = -1; + goto out; + } - STACK_WIND (frame, mq_release_lock_on_dirty_inode, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->loc, newdict, 0); - ret = 0; + ret = 0; -err: - if (op_ret == -1 || ret == -1) { - local->err = -1; +out: + gf_dirent_free(&entries); - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - } + if (fd) + fd_unref(fd); - if (newdict) - dict_unref (newdict); + if (xdata) + dict_unref(xdata); - return 0; + return ret; } -int32_t -mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) +/* This function should be used only in inspect_directory and inspect_file + * function to heal quota xattrs. + * Inode quota feature is introduced in 3.7. + * If gluster setup is upgraded from 3.6 to 3.7, there can be a + * getxattr and setxattr spikes with quota heal as inode quota is missing. + * So this wrapper function is to avoid xattrs spikes during upgrade. + * This function returns success even is inode-quota xattrs are missing and + * hence no healing performed. + */ +static int32_t +_quota_dict_get_meta(xlator_t *this, dict_t *dict, char *key, const int keylen, + quota_meta_t *meta, ia_type_t ia_type, + gf_boolean_t add_delta) { - int32_t ret = -1; - dict_t *new_dict = NULL; - int64_t *size = NULL; - int64_t *delta = NULL; - quota_local_t *local = NULL; - - local = frame->local; - - if (op_ret == -1) - goto err; - - if (dict == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "Dict is null while updating the size xattr %s", - local->loc.path?local->loc.path:""); - goto err; - } - - ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); - if (!size) { - gf_log (this->name, GF_LOG_WARNING, - "failed to get the size, %s", - local->loc.path?local->loc.path:""); - goto err; - } - - QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err); + int32_t ret = 0; + marker_conf_t *priv = NULL; - *delta = hton64 (local->sum - ntoh64 (*size)); + priv = this->private; - gf_log (this->name, GF_LOG_DEBUG, "calculated size = %"PRId64", " - "original size = %"PRIu64 - " path = %s diff = %"PRIu64, local->sum, ntoh64 (*size), - local->loc.path, ntoh64 (*delta)); - - new_dict = dict_new (); - if (!new_dict); - - ret = dict_set_bin (new_dict, QUOTA_SIZE_KEY, delta, 8); - if (ret) - goto err; - - STACK_WIND (frame, mq_mark_inode_undirty, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, &local->loc, - GF_XATTROP_ADD_ARRAY64, new_dict); + ret = quota_dict_get_inode_meta(dict, key, keylen, meta); + if (ret == -2 && (priv->feature_enabled & GF_INODE_QUOTA) == 0) { + /* quota_dict_get_inode_meta returns -2 if + * inode quota xattrs are not present. + * if inode quota self heal is turned off, + * then we should skip healing inode quotas + */ + gf_log(this->name, GF_LOG_DEBUG, + "inode quota disabled. " + "inode quota self heal will not be performed"); ret = 0; - -err: - if (op_ret == -1 || ret == -1) { - local->err = -1; - - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); + if (add_delta) { + if (ia_type == IA_IFDIR) + meta->dir_count = 1; + else + meta->file_count = 1; } + } - if (new_dict) - dict_unref (new_dict); - - return 0; + return ret; } int32_t -mq_test_and_set_local_err(quota_local_t *local, - int32_t *val) +quota_dict_set_size_meta(xlator_t *this, dict_t *dict, const quota_meta_t *meta) { - int tmp = 0; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", local, out); - GF_VALIDATE_OR_GOTO ("marker", val, out); - - LOCK (&local->lock); - { - tmp = local->err; - local->err = *val; - *val = tmp; - } - UNLOCK (&local->lock); - - ret = 0; + int32_t ret = -ENOMEM; + quota_meta_t *value = NULL; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + + value = GF_MALLOC(2 * sizeof(quota_meta_t), gf_common_quota_meta_t); + if (value == NULL) { + goto out; + } + value[0].size = hton64(meta->size); + value[0].file_count = hton64(meta->file_count); + value[0].dir_count = hton64(meta->dir_count); + + value[1].size = 0; + value[1].file_count = 0; + value[1].dir_count = hton64(1); + + GET_SIZE_KEY(this, size_key, ret); + if (ret < 0) + goto out; + ret = dict_set_bin(dict, size_key, value, (sizeof(quota_meta_t) * 2)); + if (ret < 0) { + gf_log_callingfn("quota", GF_LOG_ERROR, "dict set failed"); + GF_FREE(value); + } out: - return ret; + return ret; } -int32_t -mq_get_dirty_inode_size (call_frame_t *frame, xlator_t *this) +void +mq_compute_delta(quota_meta_t *delta, const quota_meta_t *op1, + const quota_meta_t *op2) { - int32_t ret = -1; - dict_t *dict = NULL; - quota_local_t *local = NULL; - - local = (quota_local_t *) frame->local; - - dict = dict_new (); - if (!dict) { - ret = -1; - goto err; - } - - ret = dict_set_int64 (dict, QUOTA_SIZE_KEY, 0); - if (ret) - goto err; - - STACK_WIND (frame, mq_update_size_xattr, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &local->loc, dict); - ret =0; - -err: - if (ret) { - local->err = -1; - - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - } + delta->size = op1->size - op2->size; + delta->file_count = op1->file_count - op2->file_count; + delta->dir_count = op1->dir_count - op2->dir_count; +} - if (dict) - dict_unref (dict); +void +mq_add_meta(quota_meta_t *dst, const quota_meta_t *src) +{ + dst->size += src->size; + dst->file_count += src->file_count; + dst->dir_count += src->dir_count; +} - return 0; +void +mq_sub_meta(quota_meta_t *dst, const quota_meta_t *src) +{ + if (src == NULL) { + dst->size = -dst->size; + dst->file_count = -dst->file_count; + dst->dir_count = -dst->dir_count; + } else { + dst->size = src->size - dst->size; + dst->file_count = src->file_count - dst->file_count; + dst->dir_count = src->dir_count - dst->dir_count; + } } int32_t -mq_get_child_contribution (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - dict_t *dict, - struct iatt *postparent) +mq_are_xattrs_set(xlator_t *this, loc_t *loc, gf_boolean_t *contri_set, + gf_boolean_t *size_set) { - int32_t ret = -1; - int32_t val = 0; - char contri_key [512] = {0, }; - int64_t *contri = NULL; - quota_local_t *local = NULL; - - local = frame->local; - - frame->local = NULL; - - QUOTA_STACK_DESTROY (frame, this); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s", - strerror (op_errno)); - val = -2; - if (!mq_test_and_set_local_err (local, &val) && - val != -2) - mq_release_lock_on_dirty_inode (local->frame, NULL, this, 0, 0); - - goto exit; - } - - ret = mq_get_local_err (local, &val); - if (!ret && val == -2) - goto exit; - - GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret); + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + quota_meta_t meta = { + 0, + }; + struct iatt stbuf = { + 0, + }; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + ret = mq_req_xattr(this, loc, dict, contri_key, size_key); + if (ret < 0) + goto out; + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (rsp_dict == NULL) + goto out; + + *contri_set = _gf_true; + *size_set = _gf_true; + if (loc->inode->ia_type == IA_IFDIR) { + ret = quota_dict_get_inode_meta(rsp_dict, size_key, strlen(size_key), + &meta); + if (ret < 0 || meta.dir_count == 0) + *size_set = _gf_false; + } + + if (!loc_is_root(loc)) { + ret = quota_dict_get_inode_meta(rsp_dict, contri_key, + strlen(contri_key), &meta); if (ret < 0) - goto out; - - if (!dict) - goto out; - - if (dict_get_bin (dict, contri_key, (void **) &contri) == 0) - local->sum += ntoh64 (*contri); + *contri_set = _gf_false; + } + ret = 0; out: - LOCK (&local->lock); - { - val = --local->dentry_child_count; - } - UNLOCK (&local->lock); + if (dict) + dict_unref(dict); - if (val == 0) { - mq_dirty_inode_readdir (local->frame, NULL, this, - 0, 0, NULL); - } - mq_local_unref (this, local); + if (rsp_dict) + dict_unref(rsp_dict); - return 0; -exit: - mq_local_unref (this, local); - return 0; + return ret; } int32_t -mq_readdir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - gf_dirent_t *entries) +mq_create_size_xattrs(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) { - char contri_key [512] = {0, }; - int32_t ret = 0; - int32_t val = 0; - off_t offset = 0; - int32_t count = 0; - dict_t *dict = NULL; - quota_local_t *local = NULL; - gf_dirent_t *entry = NULL; - call_frame_t *newframe = NULL; - loc_t loc = {0, }; - - local = mq_local_ref (frame->local); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "readdir failed %s", strerror (op_errno)); - local->err = -1; - - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - - goto end; - } else if (op_ret == 0) { - mq_get_dirty_inode_size (frame, this); - - goto end; - } - - local->dentry_child_count = 0; - - list_for_each_entry (entry, (&entries->list), list) { - gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name); - - if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name, - ".."))) { - gf_log (this->name, GF_LOG_DEBUG, "entry = %s", - entry->d_name); - continue; - } - - offset = entry->d_off; - count++; - } - - if (count == 0) { - mq_get_dirty_inode_size (frame, this); - goto end; - - } + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + dict_t *dict = NULL; - local->frame = frame; + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - LOCK (&local->lock); - { - local->dentry_child_count = count; - local->d_off = offset; - } - UNLOCK (&local->lock); - - - list_for_each_entry (entry, (&entries->list), list) { - gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name); - - if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name, - ".."))) { - gf_log (this->name, GF_LOG_DEBUG, "entry = %s", - entry->d_name); - continue; - } - - ret = mq_loc_fill_from_name (this, &loc, &local->loc, - entry->d_ino, entry->d_name); - if (ret < 0) - goto out; - - ret = 0; - - LOCK (&local->lock); - { - if (local->err != -2) { - newframe = copy_frame (frame); - if (!newframe) { - ret = -1; - } - } else - ret = -1; - } - UNLOCK (&local->lock); - - if (ret == -1) - goto out; - - newframe->local = mq_local_ref (local); - - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; - } - - GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret); - if (ret < 0) - goto out; - - ret = dict_set_int64 (dict, contri_key, 0); - if (ret) - goto out; - - STACK_WIND (newframe, - mq_get_child_contribution, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - &loc, dict); - - offset = entry->d_off; - - loc_wipe (&loc); - - newframe = NULL; - - out: - if (dict) { - dict_unref (dict); - dict = NULL; - } - - if (ret) { - val = -2; - mq_test_and_set_local_err (local, &val); - - if (newframe) { - newframe->local = NULL; - mq_local_unref(this, local); - QUOTA_STACK_DESTROY (newframe, this); - } - - break; - } - } + if (loc->inode->ia_type != IA_IFDIR) { + ret = 0; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = quota_dict_set_size_meta(this, dict, &size); + if (ret < 0) + goto out; + + ret = syncop_xattrop(FIRST_CHILD(this), loc, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL, + NULL); + + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } - if (ret && val != -2) { - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - } -end: - mq_local_unref (this, local); +out: + if (dict) + dict_unref(dict); - return 0; + return ret; } int32_t -mq_dirty_inode_readdir (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) +mq_lock(xlator_t *this, loc_t *loc, short l_type) { - quota_local_t *local = NULL; - - local = frame->local; - - if (op_ret == -1) { - local->err = -1; - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - return 0; - } - - if (local->fd == NULL) - local->fd = fd_ref (fd); + struct gf_flock lock = { + 0, + }; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + gf_log(this->name, GF_LOG_DEBUG, "set lock type %d on %s", l_type, + loc->path); + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = l_type; + lock.l_whence = SEEK_SET; + + ret = syncop_inodelk(FIRST_CHILD(this), this->name, loc, F_SETLKW, &lock, + NULL, NULL); + if (ret < 0) + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "inodelk failed " + "for %s: %s", + loc->path, strerror(-ret)); - STACK_WIND (frame, - mq_readdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, - local->fd, READDIR_BUF, local->d_off); +out: - return 0; + return ret; } int32_t -mq_check_if_still_dirty (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - dict_t *dict, - struct iatt *postparent) +mq_get_dirty(xlator_t *this, loc_t *loc, int32_t *dirty) { - int8_t dirty = -1; - int32_t ret = -1; - fd_t *fd = NULL; - quota_local_t *local = NULL; - - local = frame->local; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "failed to get " - "the dirty xattr for %s", local->loc.path); - goto err; - } - - if (!dict) { - ret = -1; - goto err; - } - - ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); - if (ret) - goto err; - - //the inode is not dirty anymore - if (dirty == 0) { - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - - return 0; - } - - fd = fd_create (local->loc.inode, frame->root->pid); - - local->d_off = 0; + int32_t ret = -1; + int8_t value = 0; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + struct iatt stbuf = { + 0, + }; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + ret = dict_set_int64(dict, QUOTA_DIRTY_KEY, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "dict set failed"); + goto out; + } + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value); + if (ret < 0) + goto out; + + *dirty = value; - STACK_WIND(frame, - mq_dirty_inode_readdir, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, - &local->loc, fd); - - ret = 0; - -err: - if (op_ret == -1 || ret == -1) { - local->err = -1; - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0); - } +out: + if (dict) + dict_unref(dict); - if (fd != NULL) { - fd_unref (fd); - } + if (rsp_dict) + dict_unref(rsp_dict); - return 0; + return ret; } int32_t -mq_get_dirty_xattr (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) +mq_get_set_dirty(xlator_t *this, loc_t *loc, int32_t dirty, int32_t *prev_dirty) { - int32_t ret = -1; - dict_t *xattr_req = NULL; - quota_local_t *local = NULL; - - if (op_ret == -1) { - mq_dirty_inode_updation_done (frame, NULL, this, 0, 0); - return 0; - } - - local = frame->local; + int32_t ret = -1; + int8_t value = 0; + quota_inode_ctx_t *ctx = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", prev_dirty, out); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_GET_AND_SET, dict, + NULL, NULL, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + *prev_dirty = 0; + if (rsp_dict) { + ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value); + if (ret == 0) + *prev_dirty = value; + } + + LOCK(&ctx->lock); + { + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + ret = 0; +out: + if (dict) + dict_unref(dict); - xattr_req = dict_new (); - if (xattr_req == NULL) { - ret = -1; - goto err; - } + if (rsp_dict) + dict_unref(rsp_dict); - ret = dict_set_int8 (xattr_req, QUOTA_DIRTY_KEY, 0); - if (ret) - goto err; + return ret; +} - STACK_WIND (frame, - mq_check_if_still_dirty, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - &local->loc, - xattr_req); +int32_t +mq_mark_dirty(xlator_t *this, loc_t *loc, int32_t dirty) +{ + int32_t ret = -1; + dict_t *dict = NULL; + quota_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); ret = 0; + goto out; + } + + dict = dict_new(); + if (!dict) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = syncop_setxattr(FIRST_CHILD(this), loc, dict, 0, NULL, NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "setxattr dirty = %d " + "failed for %s: %s", + dirty, loc->path, strerror(-ret)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); -err: - if (ret) { - local->err = -1; - mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0); - } - - if (xattr_req) - dict_unref (xattr_req); +out: + if (dict) + dict_unref(dict); - return 0; + return ret; } -/* return 1 when dirty updation started - * 0 other wise - */ int32_t -mq_update_dirty_inode (xlator_t *this, - loc_t *loc, - quota_inode_ctx_t *ctx, - inode_contribution_t *contribution) +_mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri, + quota_meta_t *size, uuid_t contri_gfid) { - int32_t ret = -1; - quota_local_t *local = NULL; - gf_boolean_t status = _gf_false; - struct gf_flock lock = {0, }; - call_frame_t *frame = NULL; - - ret = mq_get_ctx_updation_status (ctx, &status); - if (ret == -1 || status == _gf_true) { - ret = 0; - goto out; - } - - frame = create_frame (this, this->ctx->pool); - if (frame == NULL) { - ret = -1; - goto out; + int32_t ret = -1; + quota_meta_t meta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + struct iatt stbuf = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + if (size == NULL && contri == NULL) + goto out; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + if (size && loc->inode->ia_type == IA_IFDIR) { + GET_SIZE_KEY(this, size_key, keylen); + if (keylen < 0) + goto out; + ret = dict_set_int64(dict, size_key, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed."); + goto out; } + } - mq_assign_lk_owner (this, frame); - - local = mq_local_new (); - if (local == NULL) - goto fr_destroy; - - frame->local = local; - ret = mq_loc_copy (&local->loc, loc); + if (contri && !loc_is_root(loc)) { + ret = mq_dict_set_contribution(this, dict, loc, contri_gfid, + contri_key); if (ret < 0) - goto fr_destroy; - - local->ctx = ctx; - - local->contri = contribution; + goto out; + } + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (size) { + if (loc->inode->ia_type == IA_IFDIR) { + ret = quota_dict_get_meta(rsp_dict, size_key, keylen, &meta); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_get failed."); + goto out; + } - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; + size->size = meta.size; + size->file_count = meta.file_count; + size->dir_count = meta.dir_count; + } else { + size->size = stbuf.ia_blocks * 512; + size->file_count = 1; + size->dir_count = 0; + } + } - if (local->loc.inode == NULL) { - ret = -1; - gf_log (this->name, GF_LOG_WARNING, - "Inode is NULL, so can't stackwind."); - goto fr_destroy; + if (contri && !loc_is_root(loc)) { + ret = quota_dict_get_meta(rsp_dict, contri_key, strlen(contri_key), + &meta); + if (ret < 0) { + contri->size = 0; + contri->file_count = 0; + contri->dir_count = 0; + } else { + contri->size = meta.size; + contri->file_count = meta.file_count; + contri->dir_count = meta.dir_count; } + } - STACK_WIND (frame, - mq_get_dirty_xattr, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->loc, F_SETLKW, &lock); - return 1; + ret = 0; -fr_destroy: - QUOTA_STACK_DESTROY (frame, this); out: + if (dict) + dict_unref(dict); - return 0; -} + if (rsp_dict) + dict_unref(rsp_dict); + return ret; +} int32_t -mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri, + quota_meta_t *size, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution) { - quota_local_t *local = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", contribution, out); - if (frame == NULL) - return 0; + if (size == NULL && contri == NULL) { + ret = 0; + goto out; + } - local = frame->local; + ret = _mq_get_metadata(this, loc, contri, size, contribution->gfid); + if (ret < 0) + goto out; - if (local != NULL) { - mq_initiate_quota_txn (this, &local->loc); + if (size) { + LOCK(&ctx->lock); + { + ctx->size = size->size; + ctx->file_count = size->file_count; + ctx->dir_count = size->dir_count; } + UNLOCK(&ctx->lock); + } - QUOTA_STACK_DESTROY (frame, this); + if (contri) { + LOCK(&contribution->lock); + { + contribution->contribution = contri->size; + contribution->file_count = contri->file_count; + contribution->dir_count = contri->dir_count; + } + UNLOCK(&contribution->lock); + } - return 0; +out: + return ret; } - int32_t -mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno) +mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta, + quota_inode_ctx_t *ctx, inode_contribution_t *contribution) { - struct gf_flock lock = {0, }; - quota_local_t *local = NULL; + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; - local = frame->local; + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", contribution, out); - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; + ret = mq_get_metadata(this, loc, &contri, &size, ctx, contribution); + if (ret < 0) + goto out; - STACK_WIND (frame, - mq_inode_creation_done, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->loc, - F_SETLKW, &lock); + mq_compute_delta(delta, &size, &contri); - return 0; +out: + return ret; } - int32_t -mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +mq_remove_contri(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx, + inode_contribution_t *contri, quota_meta_t *delta, + uint32_t nlink) { - int32_t ret = -1; - dict_t *newdict = NULL; - quota_local_t *local = NULL; - - if (op_ret < 0) { - goto err; - } + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; - local = frame->local; - - if (local->loc.inode->ia_type == IA_IFDIR) { - newdict = dict_new (); - if (!newdict) { - goto err; - } - - ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0); - if (ret == -1) { - goto err; - } - - STACK_WIND (frame, mq_xattr_creation_release_lock, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->loc, newdict, 0); + if (nlink == 1) { + /*File was a last link and has been deleted */ + ret = 0; + goto done; + } + + GET_CONTRI_KEY(this, contri_key, contri->gfid, ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "get contri_key " + "failed for %s", + uuid_utoa(contri->gfid)); + goto out; + } + + ret = syncop_removexattr(FIRST_CHILD(this), loc, contri_key, 0, NULL); + if (ret < 0) { + if (-ret == ENOENT || -ret == ESTALE || -ret == ENODATA || + -ret == ENOATTR) { + /* Remove contri in done when unlink operation is + * performed, so return success on ENOENT/ESTSLE + * rename operation removes xattr earlier, + * so return success on ENODATA + */ + ret = 0; } else { - mq_xattr_creation_release_lock (frame, NULL, this, 0, 0); + gf_log_callingfn(this->name, GF_LOG_ERROR, + "removexattr %s failed for %s: %s", contri_key, + loc->path, strerror(-ret)); + goto out; } + } - ret = 0; +done: + LOCK(&contri->lock); + { + contri->contribution += delta->size; + contri->file_count += delta->file_count; + contri->dir_count += delta->dir_count; + } + UNLOCK(&contri->lock); -err: - if (ret < 0) { - mq_xattr_creation_release_lock (frame, NULL, this, 0, 0); - } + ret = 0; - if (newdict != NULL) - dict_unref (newdict); +out: + QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri); - return 0; + return ret; } - int32_t -mq_create_xattr (xlator_t *this, call_frame_t *frame) +mq_update_contri(xlator_t *this, loc_t *loc, inode_contribution_t *contri, + quota_meta_t *delta) { - int32_t ret = 0; - int64_t *value = NULL; - int64_t *size = NULL; - dict_t *dict = NULL; - char key[512] = {0, }; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contri = NULL; - - if (frame == NULL || this == NULL) - return 0; - - local = frame->local; - - ret = mq_inode_ctx_get (local->loc.inode, this, &ctx); - if (ret < 0) { - ctx = mq_inode_ctx_new (local->loc.inode, this); - if (ctx == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "mq_inode_ctx_new failed"); - ret = -1; - goto out; - } - } - - dict = dict_new (); - if (!dict) - goto out; - - if (local->loc.inode->ia_type == IA_IFDIR) { - QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err); - ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8); - if (ret < 0) - goto free_size; - } - - if (strcmp (local->loc.path, "/") != 0) { - contri = mq_add_new_contribution_node (this, ctx, &local->loc); - if (contri == NULL) - goto err; - - QUOTA_ALLOC_OR_GOTO (value, int64_t, ret, err); - GET_CONTRI_KEY (key, local->loc.parent->gfid, ret); - - ret = dict_set_bin (dict, key, value, 8); - if (ret < 0) - goto free_value; - } - - STACK_WIND (frame, mq_create_dirty_xattr, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, &local->loc, - GF_XATTROP_ADD_ARRAY64, dict); + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", delta, out); + GF_VALIDATE_OR_GOTO("marker", contri, out); + + if (quota_meta_is_null(delta)) { ret = 0; - -free_size: - if (ret < 0) { - GF_FREE (size); - } - -free_value: - if (ret < 0) { - GF_FREE (value); - } - -err: - dict_unref (dict); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + GET_CONTRI_KEY(this, contri_key, contri->gfid, ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "get contri_key " + "failed for %s", + uuid_utoa(contri->gfid)); + goto out; + } + + ret = quota_dict_set_meta(dict, contri_key, delta, loc->inode->ia_type); + if (ret < 0) + goto out; + + ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_ADD_ARRAY64, dict, + NULL, NULL, NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + LOCK(&contri->lock); + { + contri->contribution += delta->size; + contri->file_count += delta->file_count; + contri->dir_count += delta->dir_count; + } + UNLOCK(&contri->lock); out: - if (ret < 0) { - mq_xattr_creation_release_lock (frame, NULL, this, 0, 0); - } + if (dict) + dict_unref(dict); - return 0; + return ret; } - int32_t -mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *dict, - struct iatt *postparent) +mq_update_size(xlator_t *this, loc_t *loc, quota_meta_t *delta) { - quota_local_t *local = NULL; - int64_t *size = NULL, *contri = NULL; - int8_t dirty = 0; - int32_t ret = 0; - char contri_key[512] = {0, }; - - if (op_ret < 0) { - goto out; - } - - local = frame->local; - - ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); - if (ret < 0) - goto create_xattr; - - ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); - if (ret < 0) - goto create_xattr; + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + dict_t *dict = NULL; - //check contribution xattr if not root - if (strcmp (local->loc.path, "/") != 0) { - GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); - if (ret < 0) - goto out; + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", delta, out); - ret = dict_get_bin (dict, contri_key, (void **) &contri); - if (ret < 0) - goto create_xattr; - } + if (quota_meta_is_null(delta)) { + ret = 0; + goto out; + } + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = quota_dict_set_size_meta(this, dict, delta); + if (ret < 0) + goto out; + + ret = syncop_xattrop(FIRST_CHILD(this), loc, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL, + NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->size += delta->size; + ctx->file_count += delta->file_count; + if (ctx->dir_count == 0) + ctx->dir_count += delta->dir_count + 1; + else + ctx->dir_count += delta->dir_count; + } + UNLOCK(&ctx->lock); out: - mq_xattr_creation_release_lock (frame, NULL, this, 0, 0); - return 0; + if (dict) + dict_unref(dict); -create_xattr: - mq_create_xattr (this, frame); - return 0; + return ret; } - -int32_t -mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int +mq_synctask_cleanup(int ret, call_frame_t *frame, void *opaque) { - dict_t *xattr_req = NULL; - quota_local_t *local = NULL; - int32_t ret = 0; - - if (op_ret < 0) { - goto lock_err; - } + quota_synctask_t *args = NULL; - local = frame->local; + GF_ASSERT(opaque); - xattr_req = dict_new (); - if (xattr_req == NULL) { - goto err; - } - - ret = mq_req_xattr (this, &local->loc, xattr_req); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "cannot request xattr"); - goto err; - } - - STACK_WIND (frame, mq_check_n_set_inode_xattr, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &local->loc, xattr_req); + args = (quota_synctask_t *)opaque; + loc_wipe(&args->loc); - dict_unref (xattr_req); + if (args->stub) + call_resume(args->stub); - return 0; + if (!args->is_static) + GF_FREE(args); -err: - mq_xattr_creation_release_lock (frame, NULL, this, 0, 0); - - if (xattr_req) - dict_unref (xattr_req); - return 0; - -lock_err: - mq_inode_creation_done (frame, NULL, this, 0, 0); - return 0; + return 0; } - -int32_t -mq_set_inode_xattr (xlator_t *this, loc_t *loc) +int +mq_synctask1(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc, + quota_meta_t *contri, uint32_t nlink, call_stub_t *stub) { - struct gf_flock lock = {0, }; - quota_local_t *local = NULL; - int32_t ret = 0; - call_frame_t *frame = NULL; - - frame = create_frame (this, this->ctx->pool); - if (!frame) { - ret = -1; - goto err; - } - - local = mq_local_new (); - if (local == NULL) { - goto err; - } - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret < 0) { - goto err; + int32_t ret = -1; + quota_synctask_t *args = NULL; + quota_synctask_t static_args = { + 0, + }; + + if (spawn) { + QUOTA_ALLOC_OR_GOTO(args, quota_synctask_t, ret, out); + args->is_static = _gf_false; + } else { + args = &static_args; + args->is_static = _gf_true; + } + + args->this = this; + args->stub = stub; + loc_copy(&args->loc, loc); + args->ia_nlink = nlink; + + if (contri) { + args->contri = *contri; + } else { + args->contri.size = -1; + args->contri.file_count = -1; + args->contri.dir_count = -1; + } + + if (spawn) { + ret = synctask_new1(this->ctx->env, 1024 * 16, task, + mq_synctask_cleanup, NULL, args); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to spawn " + "new synctask"); + mq_synctask_cleanup(ret, NULL, args); } + } else { + ret = task(args); + mq_synctask_cleanup(ret, NULL, args); + } - frame->local = local; - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND (frame, - mq_get_xattr, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->loc, F_SETLKW, &lock); - - return 0; - -err: - QUOTA_STACK_DESTROY (frame, this); - - return 0; +out: + return ret; } +int +mq_synctask(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc) +{ + return mq_synctask1(this, task, spawn, loc, NULL, -1, NULL); +} int32_t -mq_get_parent_inode_local (xlator_t *this, quota_local_t *local) +mq_prevalidate_txn(xlator_t *this, loc_t *origin_loc, loc_t *loc, + quota_inode_ctx_t **ctx, struct iatt *buf) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + quota_inode_ctx_t *ctxtmp = NULL; + + if (buf) { + if (buf->ia_type == IA_IFREG && IS_DHT_LINKFILE_MODE(buf)) + goto out; + + if (buf->ia_type != IA_IFREG && buf->ia_type != IA_IFLNK && + buf->ia_type != IA_IFDIR) + goto out; + } + + if (origin_loc == NULL || origin_loc->inode == NULL || + gf_uuid_is_null(origin_loc->inode->gfid)) + goto out; + + loc_copy(loc, origin_loc); + + if (gf_uuid_is_null(loc->gfid)) + gf_uuid_copy(loc->gfid, loc->inode->gfid); + + if (!loc_is_root(loc) && loc->parent == NULL) + loc->parent = inode_parent(loc->inode, 0, NULL); + + ret = mq_inode_ctx_get(loc->inode, this, &ctxtmp); + if (ret < 0) { + gf_log_callingfn(this->name, GF_LOG_WARNING, + "inode ctx for " + "is NULL for %s", + loc->path); + goto out; + } + if (ctx) + *ctx = ctxtmp; + + ret = 0; +out: + return ret; +} - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", local, out); +int +mq_create_xattrs_task(void *opaque) +{ + int32_t ret = -1; + gf_boolean_t locked = _gf_false; + gf_boolean_t contri_set = _gf_false; + gf_boolean_t size_set = _gf_false; + gf_boolean_t need_txn = _gf_false; + quota_synctask_t *args = NULL; + quota_inode_ctx_t *ctx = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + gf_boolean_t status = _gf_false; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + THIS = this; + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to" + "get inode ctx, aborting quota create txn"); + goto out; + } + + if (loc->inode->ia_type == IA_IFDIR) { + /* lock not required for files */ + ret = mq_lock(this, loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + } - local->contri = NULL; + ret = mq_are_xattrs_set(this, loc, &contri_set, &size_set); + if (ret < 0 || (contri_set && size_set)) + goto out; - loc_wipe (&local->loc); + mq_set_ctx_create_status(ctx, _gf_false); + status = _gf_true; - ret = mq_loc_copy (&local->loc, &local->parent_loc); - if (ret < 0) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "loc copy failed"); - goto out; - } + if (loc->inode->ia_type == IA_IFDIR && size_set == _gf_false) { + ret = mq_create_size_xattrs(this, ctx, loc); + if (ret < 0) + goto out; + } - loc_wipe (&local->parent_loc); + need_txn = _gf_true; +out: + if (locked) + ret = mq_lock(this, loc, F_UNLCK); - ret = mq_inode_loc_fill (NULL, local->loc.parent, - &local->parent_loc); - if (ret < 0) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "failed to build parent loc of %s", - local->loc.path); - goto out; - } + if (status == _gf_false) + mq_set_ctx_create_status(ctx, _gf_false); - ret = mq_inode_ctx_get (local->loc.inode, this, &ctx); - if (ret < 0) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "inode ctx get failed"); - goto out; - } + if (need_txn) + ret = mq_initiate_quota_blocking_txn(this, loc, NULL); - local->ctx = ctx; + return ret; +} - if (list_empty (&ctx->contribution_head)) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "contribution node list is empty which " - "is an error"); - ret = -1; - goto out; +static int +_mq_create_xattrs_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf, + gf_boolean_t spawn) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + gf_boolean_t status = _gf_true; + loc_t loc = { + 0, + }; + inode_contribution_t *contribution = NULL; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0) + goto out; + + ret = mq_test_and_set_ctx_create_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; + + if (!loc_is_root(&loc) && loc.parent) { + contribution = mq_add_new_contribution_node(this, ctx, &loc); + if (contribution == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "cannot add a new contribution node " + "(%s)", + uuid_utoa(loc.gfid)); + ret = -1; + goto out; + } else { + GF_REF_PUT(contribution); } + } - local->contri = (inode_contribution_t *) ctx->contribution_head.next; - - ret = 0; + ret = mq_synctask(this, mq_create_xattrs_task, spawn, &loc); out: - return ret; -} + if (ret < 0 && status == _gf_false) + mq_set_ctx_create_status(ctx, _gf_false); - -int32_t -mq_xattr_updation_done (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - dict_t *dict) -{ - QUOTA_STACK_DESTROY (frame, this); - return 0; + loc_wipe(&loc); + return ret; } - -int32_t -mq_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) +int +mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf) { - int32_t ret = 0; - gf_boolean_t status = _gf_false; - quota_local_t *local = NULL; - - local = frame->local; + int32_t ret = -1; - if (op_ret == -1 || local->err) { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "unlocking failed on path (%s)(%s)", - local->parent_loc.path, strerror (op_errno)); - } - mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - return 0; - } - - gf_log (this->name, GF_LOG_DEBUG, - "inodelk released on %s", local->parent_loc.path); - - if ((strcmp (local->parent_loc.path, "/") == 0) - || (local->delta == 0)) { - mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL); - } else { - ret = mq_get_parent_inode_local (this, local); - if (ret < 0) { - mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL); - goto out; - } - status = _gf_true; - - ret = mq_test_and_set_ctx_updation_status (local->ctx, &status); - if (ret == 0 && status == _gf_false) { - mq_get_lock_on_parent (frame, this); - } else { - mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL); - } - } + ret = _mq_create_xattrs_txn(this, loc, buf, _gf_true); out: - return 0; + return ret; } - -//now release lock on the parent inode int32_t -mq_release_parent_lock (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno) +mq_reduce_parent_size_task(void *opaque) { - int32_t ret = 0; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - struct gf_flock lock = {0, }; - - local = frame->local; - - if (local->err != 0) { - gf_log_callingfn (this->name, - (local->err == ENOENT) ? GF_LOG_DEBUG - : GF_LOG_WARNING, - "An operation during quota updation " - "of path (%s) failed (%s)", local->loc.path, - strerror (local->err)); + int32_t ret = -1; + int32_t prev_dirty = 0; + quota_inode_ctx_t *ctx = NULL; + quota_inode_ctx_t *parent_ctx = NULL; + inode_contribution_t *contribution = NULL; + quota_meta_t delta = { + 0, + }; + quota_meta_t contri = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t dirty = _gf_false; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + gf_boolean_t remove_xattr = _gf_true; + uint32_t nlink = 0; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + contri = args->contri; + nlink = args->ia_nlink; + this = args->this; + THIS = this; + + ret = mq_inode_loc_fill(NULL, loc->parent, &parent_loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "parent_loc fill failed for " + "child inode %s: ", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + ret = mq_lock(this, &parent_loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + if (contri.size >= 0) { + /* contri parameter is supplied only for rename operation. + * remove xattr is alreday performed, we need to skip + * removexattr for rename operation + */ + remove_xattr = _gf_false; + delta.size = contri.size; + delta.file_count = contri.file_count; + delta.dir_count = contri.dir_count; + } else { + remove_xattr = _gf_true; + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log_callingfn(this->name, GF_LOG_WARNING, + "ctx for" + " the node %s is NULL", + loc->path); + goto out; } - ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); - if (ret < 0) - goto wind; - - LOCK (&ctx->lock); - { - ctx->dirty = 0; + contribution = mq_get_contribution_node(loc->parent, ctx); + if (contribution == NULL) { + ret = -1; + gf_log(this->name, GF_LOG_DEBUG, + "contribution for the node %s is NULL", loc->path); + goto out; } - UNLOCK (&ctx->lock); - if (local->parent_loc.inode == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "Invalid parent inode."); - goto err; + LOCK(&contribution->lock); + { + delta.size = contribution->contribution; + delta.file_count = contribution->file_count; + delta.dir_count = contribution->dir_count; } + UNLOCK(&contribution->lock); + } -wind: - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND (frame, - mq_inodelk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->parent_loc, - F_SETLKW, &lock); - - return 0; -err: - mq_xattr_updation_done (frame, NULL, this, - 0, 0 , NULL); - return 0; -} - - -int32_t -mq_mark_undirty (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - dict_t *dict) -{ - int32_t ret = -1; - int64_t *size = NULL; - dict_t *newdict = NULL; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - - local = frame->local; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "%s occurred while" - " updating the size of %s", strerror (op_errno), - local->parent_loc.path); + ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty); + if (ret < 0) + goto out; + dirty = _gf_true; - goto err; - } + mq_sub_meta(&delta, NULL); - //update the size of the parent inode - if (dict != NULL) { - ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } - - ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } - - LOCK (&ctx->lock); - { - if (size) - ctx->size = ntoh64 (*size); - gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64, - local->parent_loc.path, ctx->size); - } - UNLOCK (&ctx->lock); - } + if (remove_xattr) { + ret = mq_remove_contri(this, loc, ctx, contribution, &delta, nlink); + if (ret < 0) + goto out; + } - newdict = dict_new (); - if (!newdict) { - op_errno = ENOMEM; - goto err; - } + if (quota_meta_is_null(&delta)) + goto out; - ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0); + ret = mq_update_size(this, &parent_loc, &delta); + if (ret < 0) + goto out; - if (ret == -1) { - op_errno = -ret; - goto err; +out: + if (dirty) { + if (ret < 0 || prev_dirty) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if dir was dirty before + * the txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } else { + ret = mq_mark_dirty(this, &parent_loc, 0); } + } - STACK_WIND (frame, mq_release_parent_lock, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->parent_loc, newdict, 0); + if (locked) + ret = mq_lock(this, &parent_loc, F_UNLCK); - ret = 0; -err: - if (op_ret == -1 || ret == -1) { - local->err = op_errno; + if (ret >= 0) + ret = mq_initiate_quota_blocking_txn(this, &parent_loc, NULL); - mq_release_parent_lock (frame, NULL, this, 0, 0); - } + loc_wipe(&parent_loc); - if (newdict) - dict_unref (newdict); + if (contribution) + GF_REF_PUT(contribution); - return 0; + return ret; } - int32_t -mq_update_parent_size (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - dict_t *dict) +mq_reduce_parent_size_txn(xlator_t *this, loc_t *origin_loc, + quota_meta_t *contri, uint32_t nlink, + call_stub_t *stub) { - int64_t *size = NULL; - int32_t ret = -1; - dict_t *newdict = NULL; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - - local = frame->local; - - if (op_ret == -1) { - gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : - GF_LOG_WARNING), - "xattrop call failed: %s", strerror (op_errno)); - - goto err; - } - - LOCK (&local->contri->lock); - { - local->contri->contribution += local->delta; - } - UNLOCK (&local->contri->lock); - - gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64, - local->loc.path, local->ctx->size, - local->contri->contribution); - - if (dict == NULL) { - op_errno = EINVAL; - goto err; - } - - ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } + int32_t ret = -1; + loc_t loc = { + 0, + }; + gf_boolean_t resume_stub = _gf_true; - newdict = dict_new (); - if (!newdict) { - op_errno = ENOMEM; - ret = -1; - goto err; - } + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", origin_loc, out); - QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err); + ret = mq_prevalidate_txn(this, origin_loc, &loc, NULL, NULL); + if (ret < 0) + goto out; - *size = hton64 (local->delta); + if (loc_is_root(&loc)) { + ret = 0; + goto out; + } - ret = dict_set_bin (newdict, QUOTA_SIZE_KEY, size, 8); - if (ret < 0) { - op_errno = -ret; - goto err; - } + resume_stub = _gf_false; + ret = mq_synctask1(this, mq_reduce_parent_size_task, _gf_true, &loc, contri, + nlink, stub); +out: + loc_wipe(&loc); - STACK_WIND (frame, - mq_mark_undirty, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, - &local->parent_loc, - GF_XATTROP_ADD_ARRAY64, - newdict); - ret = 0; -err: - if (op_ret == -1 || ret < 0) { - local->err = op_errno; - mq_release_parent_lock (frame, NULL, this, 0, 0); - } + if (resume_stub && stub) + call_resume(stub); - if (newdict) - dict_unref (newdict); + if (ret) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "mq_reduce_parent_size_txn failed"); - return 0; + return ret; } -int32_t -mq_update_inode_contribution (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, - struct iatt *postparent) +int +mq_initiate_quota_task(void *opaque) { - int32_t ret = -1; - int64_t *size = NULL, size_int = 0, contri_int = 0; - int64_t *contri = NULL; - int64_t *delta = NULL; - char contri_key [512] = {0, }; - dict_t *newdict = NULL; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - local = frame->local; - - if (op_ret == -1) { - gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : - GF_LOG_WARNING), - "failed to get size and contribution of path (%s)(%s)", - local->loc.path, strerror (op_errno)); - goto err; - } - - ctx = local->ctx; - contribution = local->contri; - - //prepare to update size & contribution of the inode - GET_CONTRI_KEY (contri_key, contribution->gfid, ret); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - LOCK (&ctx->lock); - { - if (local->loc.inode->ia_type == IA_IFDIR ) { - ret = dict_get_bin (dict, QUOTA_SIZE_KEY, - (void **) &size); - if (ret < 0) { - op_errno = EINVAL; - goto unlock; - } - - ctx->size = ntoh64 (*size); - } else - ctx->size = buf->ia_blocks * 512; - - size_int = ctx->size; - } -unlock: - UNLOCK (&ctx->lock); - + int32_t ret = -1; + int32_t prev_dirty = 0; + loc_t child_loc = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t dirty = _gf_false; + gf_boolean_t status = _gf_false; + quota_meta_t delta = { + 0, + }; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + inode_contribution_t *contri = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_inode_ctx_t *parent_ctx = NULL; + inode_t *tmp_parent = NULL; + + GF_VALIDATE_OR_GOTO("marker", opaque, out); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + + GF_VALIDATE_OR_GOTO("marker", this, out); + THIS = this; + + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = mq_loc_copy(&child_loc, loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "loc copy failed"); + goto out; + } + + while (!__is_root_gfid(child_loc.gfid)) { + ret = mq_inode_ctx_get(child_loc.inode, this, &ctx); if (ret < 0) { - goto err; + gf_log(this->name, GF_LOG_WARNING, + "inode ctx get failed for %s, " + "aborting update txn", + child_loc.path); + goto out; } - ret = dict_get_bin (dict, contri_key, (void **) &contri); - - LOCK (&contribution->lock); - { - if (ret < 0) - contribution->contribution = 0; - else - contribution->contribution = ntoh64 (*contri); - - contri_int = contribution->contribution; - } - UNLOCK (&contribution->lock); - - gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64, - local->loc.path, size_int, contri_int); - - local->delta = size_int - contri_int; - - if (local->delta == 0) { - mq_mark_undirty (frame, NULL, this, 0, 0, NULL); - return 0; + /* To improve performance, abort current transaction + * if one is already in progress for same inode + */ + if (status == _gf_true) { + /* status will already set before txn start, + * so it should not be set in first + * loop iteration + */ + ret = mq_test_and_set_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; } - newdict = dict_new (); - if (newdict == NULL) { - op_errno = ENOMEM; + if (child_loc.parent == NULL) { + ret = mq_build_ancestry(this, &child_loc); + if (ret < 0 || child_loc.parent == NULL) { + /* If application performs parallel remove + * operations on same set of files/directories + * then we may get ENOENT/ESTALE + */ + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + "build ancestry failed for inode %s", + uuid_utoa(child_loc.inode->gfid)); ret = -1; - goto err; + goto out; + } } - QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err); - - *delta = hton64 (local->delta); - - ret = dict_set_bin (newdict, contri_key, delta, 8); + ret = mq_inode_loc_fill(NULL, child_loc.parent, &parent_loc); if (ret < 0) { - op_errno = -ret; - ret = -1; - goto err; + gf_log(this->name, GF_LOG_ERROR, + "parent_loc fill " + "failed for child inode %s: ", + uuid_utoa(child_loc.inode->gfid)); + goto out; } - STACK_WIND (frame, - mq_update_parent_size, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, - &local->loc, - GF_XATTROP_ADD_ARRAY64, - newdict); - ret = 0; + ret = mq_lock(this, &parent_loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; -err: - if (op_ret == -1 || ret < 0) { - local->err = op_errno; + mq_set_ctx_updation_status(ctx, _gf_false); + status = _gf_true; - mq_release_parent_lock (frame, NULL, this, 0, 0); + /* Contribution node can be NULL in below scenarios and + create if needed: + + Scenario 1) + In this case create a new contribution node + Suppose hard link for a file f1 present in a directory d1 is + created in the directory d2 (as f2). Now, since d2's + contribution is not there in f1's inode ctx, d2's + contribution xattr won't be created and will create problems + for quota operations. + + Don't create contribution if parent has been changed after + taking a lock, this can happen when rename is performed + and writes is still in-progress for the same file + + Scenario 2) + When a rename operation is performed, contribution node + for olp path will be removed. + + Create contribution node only if oldparent is same as + newparent. + Consider below example + 1) rename FOP invoked on file 'x' + 2) write is still in progress for file 'x' + 3) rename takes a lock on old-parent + 4) write-update txn blocked on old-parent to acquire lock + 5) in rename_cbk, contri xattrs are removed and contribution + is deleted and lock is released + 6) now write-update txn gets the lock and updates the + wrong parent as it was holding lock on old parent + so validate parent once the lock is acquired + + For more information on this problem, please see + doc for marker_rename in file marker.c + */ + contri = mq_get_contribution_node(child_loc.parent, ctx); + if (contri == NULL) { + tmp_parent = inode_parent(child_loc.inode, 0, NULL); + if (tmp_parent == NULL) { + /* This can happen if application performs + * parallel remove operations on same set + * of files/directories + */ + gf_log(this->name, GF_LOG_WARNING, + "parent is " + "NULL for inode %s", + uuid_utoa(child_loc.inode->gfid)); + ret = -1; + goto out; + } + if (gf_uuid_compare(tmp_parent->gfid, parent_loc.gfid)) { + /* abort txn if parent has changed */ + ret = 0; + goto out; + } + + inode_unref(tmp_parent); + tmp_parent = NULL; + + contri = mq_add_new_contribution_node(this, ctx, &child_loc); + if (contri == NULL) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to " + "create contribution node for %s, " + "abort update txn", + child_loc.path); + ret = -1; + goto out; + } } - if (newdict) - dict_unref (newdict); - - return 0; -} - -int32_t -mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno) -{ - int32_t ret = -1; - char contri_key [512] = {0, }; - dict_t *newdict = NULL; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - - local = frame->local; - - if (op_ret == -1) { - gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG - : GF_LOG_WARNING, - "couldnt mark inode corresponding to path (%s) dirty " - "(%s)", local->parent_loc.path, strerror (op_errno)); - goto err; - } + ret = mq_get_delta(this, &child_loc, &delta, ctx, contri); + if (ret < 0) + goto out; - VALIDATE_OR_GOTO (local->ctx, err); - VALIDATE_OR_GOTO (local->contri, err); + if (quota_meta_is_null(&delta)) + goto out; - gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty", local->parent_loc.path); + ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty); + if (ret < 0) + goto out; + dirty = _gf_true; - //update parent ctx - ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); - if (ret == -1) { - op_errno = EINVAL; - goto err; - } + ret = mq_update_contri(this, &child_loc, contri, &delta); + if (ret < 0) + goto out; - LOCK (&ctx->lock); - { - ctx->dirty = 1; + ret = mq_update_size(this, &parent_loc, &delta); + if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, + "rollback " + "contri updation"); + mq_sub_meta(&delta, NULL); + mq_update_contri(this, &child_loc, contri, &delta); + goto out; } - UNLOCK (&ctx->lock); - newdict = dict_new (); - if (newdict == NULL) { - op_errno = ENOMEM; - goto err; + if (prev_dirty == 0) { + ret = mq_mark_dirty(this, &parent_loc, 0); + } else { + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); } + dirty = _gf_false; + prev_dirty = 0; - if (local->loc.inode->ia_type == IA_IFDIR) { - ret = dict_set_int64 (newdict, QUOTA_SIZE_KEY, 0); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "dict_set failed."); - goto err; - } - } + ret = mq_lock(this, &parent_loc, F_UNLCK); + locked = _gf_false; - GET_CONTRI_KEY (contri_key, local->contri->gfid, ret); - if (ret < 0) { - op_errno = ENOMEM; - goto err; - } + if (__is_root_gfid(parent_loc.gfid)) + break; - ret = dict_set_int64 (newdict, contri_key, 0); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "dict_set failed."); - goto err; - } + /* Repeate above steps upwards till the root */ + loc_wipe(&child_loc); + ret = mq_loc_copy(&child_loc, &parent_loc); + if (ret < 0) + goto out; - mq_set_ctx_updation_status (local->ctx, _gf_false); + loc_wipe(&parent_loc); + GF_REF_PUT(contri); + contri = NULL; + } - STACK_WIND (frame, mq_update_inode_contribution, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &local->loc, newdict); +out: + if ((dirty) && (ret < 0)) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if the dir was dirty before + * txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } - ret = 0; + if (locked) + ret = mq_lock(this, &parent_loc, F_UNLCK); -err: - if ((op_ret == -1) || (ret < 0)) { - local->err = op_errno; + if (ctx && status == _gf_false) + mq_set_ctx_updation_status(ctx, _gf_false); - mq_set_ctx_updation_status (local->ctx, _gf_false); + loc_wipe(&child_loc); + loc_wipe(&parent_loc); - mq_release_parent_lock (frame, NULL, this, 0, 0); - } + if (tmp_parent) + inode_unref(tmp_parent); - if (newdict) - dict_unref (newdict); + if (contri) + GF_REF_PUT(contri); - return 0; + return 0; } -int32_t -mq_markdirty (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) +int +_mq_initiate_quota_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf, + gf_boolean_t spawn) { - int32_t ret = -1; - dict_t *dict = NULL; - quota_local_t *local = NULL; - - local = frame->local; - - if (op_ret == -1){ - gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG - : GF_LOG_WARNING, "acquiring locks failed on %s (%s)", - local->parent_loc.path, strerror (op_errno)); - - local->err = op_errno; - - mq_set_ctx_updation_status (local->ctx, _gf_false); - - mq_inodelk_cbk (frame, NULL, this, 0, 0); - - return 0; - } - - gf_log (this->name, GF_LOG_TRACE, - "inodelk succeeded on %s", local->parent_loc.path); - - dict = dict_new (); - if (!dict) { - ret = -1; - goto err; - } - - ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 1); - if (ret == -1) - goto err; - - STACK_WIND (frame, mq_fetch_child_size_and_contri, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->parent_loc, dict, 0); - + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + gf_boolean_t status = _gf_true; + loc_t loc = { + 0, + }; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0) + goto out; + + if (loc_is_root(&loc)) { ret = 0; -err: - if (ret == -1) { - local->err = 1; + goto out; + } - mq_set_ctx_updation_status (local->ctx, _gf_false); + ret = mq_test_and_set_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; - mq_release_parent_lock (frame, NULL, this, 0, 0); - } + ret = mq_synctask(this, mq_initiate_quota_task, spawn, &loc); - if (dict) - dict_unref (dict); +out: + if (ret < 0 && status == _gf_false) + mq_set_ctx_updation_status(ctx, _gf_false); - return 0; + loc_wipe(&loc); + return ret; } - -int32_t -mq_get_lock_on_parent (call_frame_t *frame, xlator_t *this) +int +mq_initiate_quota_txn(xlator_t *this, loc_t *loc, struct iatt *buf) { - struct gf_flock lock = {0, }; - quota_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("marker", frame, fr_destroy); - - local = frame->local; - gf_log (this->name, GF_LOG_DEBUG, "taking lock on %s", - local->parent_loc.path); - - if (local->parent_loc.inode == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "parent inode is not valid, aborting " - "transaction."); - goto fr_destroy; - } - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND (frame, - mq_markdirty, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->parent_loc, F_SETLKW, &lock); + int32_t ret = -1; - return 0; + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); -fr_destroy: - QUOTA_STACK_DESTROY (frame, this); - - return -1; + ret = _mq_initiate_quota_txn(this, loc, buf, _gf_true); +out: + return ret; } - int -mq_start_quota_txn (xlator_t *this, loc_t *loc, - quota_inode_ctx_t *ctx, - inode_contribution_t *contri) +mq_initiate_quota_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf) { - int32_t ret = -1; - call_frame_t *frame = NULL; - quota_local_t *local = NULL; - - frame = create_frame (this, this->ctx->pool); - if (frame == NULL) - goto err; - - mq_assign_lk_owner (this, frame); + int32_t ret = -1; - local = mq_local_new (); - if (local == NULL) - goto fr_destroy; + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - frame->local = local; - - ret = mq_loc_copy (&local->loc, loc); - if (ret < 0) - goto fr_destroy; - - ret = mq_inode_loc_fill (NULL, local->loc.parent, - &local->parent_loc); - if (ret < 0) - goto fr_destroy; - - local->ctx = ctx; - local->contri = contri; - - ret = mq_get_lock_on_parent (frame, this); - if (ret == -1) - goto err; - - return 0; - -fr_destroy: - QUOTA_STACK_DESTROY (frame, this); -err: - mq_set_ctx_updation_status (ctx, _gf_false); - - return -1; + ret = _mq_initiate_quota_txn(this, loc, buf, _gf_false); +out: + return ret; } - int -mq_initiate_quota_txn (xlator_t *this, loc_t *loc) +mq_update_dirty_inode_task(void *opaque) { - int32_t ret = -1; - gf_boolean_t status = _gf_false; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "inode ctx get failed, aborting quota txn"); - ret = -1; - goto out; - } - - contribution = mq_get_contribution_node (loc->parent, ctx); - if (contribution == NULL) - goto out; - - /* To improve performance, donot start another transaction - * if one is already in progress for same inode - */ - status = _gf_true; - - ret = mq_test_and_set_ctx_updation_status (ctx, &status); - if (ret < 0) - goto out; - - if (status == _gf_false) { - mq_start_quota_txn (this, loc, ctx, contribution); - } - + int32_t ret = -1; + fd_t *fd = NULL; + off_t offset = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + gf_boolean_t locked = _gf_false; + gf_boolean_t updated = _gf_false; + int32_t dirty = 0; + quota_meta_t contri = { + 0, + }; + quota_meta_t size = { + 0, + }; + quota_meta_t contri_sum = { + 0, + }; + quota_meta_t delta = { + 0, + }; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + quota_inode_ctx_t *ctx = NULL; + dict_t *xdata = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + THIS = this; + INIT_LIST_HEAD(&entries.list); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) + goto out; + + GET_CONTRI_KEY(this, contri_key, loc->gfid, keylen); + if (keylen < 0) { + ret = keylen; + goto out; + } + + xdata = dict_new(); + if (xdata == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = dict_set_int64(xdata, contri_key, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = mq_lock(this, loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + ret = mq_get_dirty(this, loc, &dirty); + if (ret < 0 || dirty == 0) { ret = 0; -out: - return ret; -} - + goto out; + } + + fd = fd_create(loc->inode, 0); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } + + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "opendir failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + fd_bind(fd); + while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, xdata, + NULL)) != 0) { + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + "readdirp failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } -/* int32_t */ -/* validate_inode_size_contribution (xlator_t *this, loc_t *loc, int64_t size, */ -/* int64_t contribution) */ -/* { */ -/* if (size != contribution) { */ -/* mq_initiate_quota_txn (this, loc); */ -/* } */ + if (list_empty(&entries.list)) + break; -/* return 0; */ -/* } */ + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; -int32_t -mq_inspect_directory_xattr (xlator_t *this, - loc_t *loc, - dict_t *dict, - struct iatt buf) -{ - int32_t ret = 0; - int8_t dirty = -1; - int64_t *size = NULL, size_int = 0; - int64_t *contri = NULL, contri_int = 0; - char contri_key [512] = {0, }; - gf_boolean_t not_root = _gf_false; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - ctx = mq_inode_ctx_new (loc->inode, this); - if (ctx == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "mq_inode_ctx_new failed"); - ret = -1; - goto err; - } - } + memset(&contri, 0, sizeof(contri)); + quota_dict_get_meta(entry->dict, contri_key, keylen, &contri); + if (quota_meta_is_null(&contri)) + continue; - if (strcmp (loc->path, "/") != 0) { - contribution = mq_add_new_contribution_node (this, ctx, loc); - if (contribution == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "cannot add a new contribution node"); - ret = -1; - goto err; - } + mq_add_meta(&contri_sum, &contri); } - ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size); - if (ret < 0) - goto out; - - ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); - if (ret < 0) - goto out; + gf_dirent_free(&entries); + } + /* Inculde for self */ + contri_sum.dir_count++; - if (strcmp (loc->path, "/") != 0) { - not_root = _gf_true; + ret = _mq_get_metadata(this, loc, NULL, &size, 0); + if (ret < 0) + goto out; - GET_CONTRI_KEY (contri_key, contribution->gfid, ret); - if (ret < 0) - goto out; + mq_compute_delta(&delta, &contri_sum, &size); - ret = dict_get_bin (dict, contri_key, (void **) &contri); - if (ret < 0) - goto out; + if (quota_meta_is_null(&delta)) + goto out; - LOCK (&contribution->lock); - { - contribution->contribution = ntoh64 (*contri); - contri_int = contribution->contribution; - } - UNLOCK (&contribution->lock); - } + gf_log(this->name, GF_LOG_INFO, + "calculated size = %" PRId64 ", original size = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.size, size.size, delta.size, loc->path); - LOCK (&ctx->lock); - { - ctx->size = ntoh64 (*size); - ctx->dirty = dirty; - size_int = ctx->size; - } - UNLOCK (&ctx->lock); + gf_log(this->name, GF_LOG_INFO, + "calculated f_count = %" PRId64 ", original f_count = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.file_count, size.file_count, delta.file_count, loc->path); - gf_log (this->name, GF_LOG_DEBUG, "size=%"PRId64 - " contri=%"PRId64, size_int, contri_int); + gf_log(this->name, GF_LOG_INFO, + "calculated d_count = %" PRId64 ", original d_count = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.dir_count, size.dir_count, delta.dir_count, loc->path); - if (dirty) { - ret = mq_update_dirty_inode (this, loc, ctx, contribution); - } + ret = mq_update_size(this, loc, &delta); + if (ret < 0) + goto out; - if ((!dirty || ret == 0) && (not_root == _gf_true) && - (size_int != contri_int)) { - mq_initiate_quota_txn (this, loc); - } + updated = _gf_true; - ret = 0; out: - if (ret) - mq_set_inode_xattr (this, loc); -err: - return ret; -} + gf_dirent_free(&entries); -int32_t -mq_inspect_file_xattr (xlator_t *this, - loc_t *loc, - dict_t *dict, - struct iatt buf) -{ - int32_t ret = -1; - uint64_t contri_int = 0, size = 0; - int64_t *contri_ptr = NULL; - char contri_key [512] = {0, }; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - ctx = mq_inode_ctx_new (loc->inode, this); - if (ctx == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "mq_inode_ctx_new failed"); - ret = -1; - goto out; - } - } + if (fd) + fd_unref(fd); - contribution = mq_add_new_contribution_node (this, ctx, loc); - if (contribution == NULL) - goto out; + if (xdata) + dict_unref(xdata); - LOCK (&ctx->lock); - { - ctx->size = 512 * buf.ia_blocks; - size = ctx->size; - } - UNLOCK (&ctx->lock); - - list_for_each_entry (contribution, &ctx->contribution_head, - contri_list) { - GET_CONTRI_KEY (contri_key, contribution->gfid, ret); - if (ret < 0) - continue; - - ret = dict_get_bin (dict, contri_key, (void **) &contri_int); - if (ret == 0) { - contri_ptr = (int64_t *)(unsigned long)contri_int; - - LOCK (&contribution->lock); - { - contribution->contribution = ntoh64 (*contri_ptr); - contri_int = contribution->contribution; - } - UNLOCK (&contribution->lock); - - gf_log (this->name, GF_LOG_DEBUG, - "size=%"PRId64 " contri=%"PRId64, size, contri_int); - - if (size != contri_int) { - mq_initiate_quota_txn (this, loc); - } - } else - mq_initiate_quota_txn (this, loc); - } + if (ret < 0) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory + */ + if (ctx) + mq_set_ctx_dirty_status(ctx, _gf_false); + } else if (dirty) { + mq_mark_dirty(this, loc, 0); + } -out: - return ret; -} + if (locked) + mq_lock(this, loc, F_UNLCK); -int32_t -mq_xattr_state (xlator_t *this, - loc_t *loc, - dict_t *dict, - struct iatt buf) -{ - if (buf.ia_type == IA_IFREG || - buf.ia_type == IA_IFLNK) { - mq_inspect_file_xattr (this, loc, dict, buf); - } else if (buf.ia_type == IA_IFDIR) - mq_inspect_directory_xattr (this, loc, dict, buf); + if (updated) + mq_initiate_quota_blocking_txn(this, loc, NULL); - return 0; + return ret; } int32_t -mq_req_xattr (xlator_t *this, - loc_t *loc, - dict_t *dict) +mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx) { - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", dict, out); + int32_t ret = -1; + gf_boolean_t status = _gf_true; - //if not "/" then request contribution - if (strcmp (loc->path, "/") == 0) - goto set_size; + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - ret = mq_dict_set_contribution (this, dict, loc); - if (ret == -1) - goto out; - -set_size: - ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0); - if (ret < 0) { - ret = -1; - goto out; - } - - ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 0); - if (ret < 0) { - ret = -1; - goto out; - } - - ret = 0; + mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status); + if (status == _gf_true) + goto out; + ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc); out: - return ret; -} - + if (ret < 0 && status == _gf_false) + mq_set_ctx_dirty_status(ctx, _gf_false); -int32_t -mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - QUOTA_STACK_DESTROY (frame, this); - - return 0; + return ret; } int32_t -_mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution, loc_t *loc, + dict_t *dict) { - int32_t ret = 0; - char contri_key [512] = {0, }; - quota_local_t *local = NULL; + int32_t ret = -1; + int8_t dirty = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + quota_meta_t delta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + gf_boolean_t status = _gf_false; + + ret = dict_get_int8(dict, QUOTA_DIRTY_KEY, &dirty); + if (ret < 0) { + /* dirty is set only on the first file write operation + * so ignore this error + */ + ret = 0; + dirty = 0; + } + + GET_SIZE_KEY(this, size_key, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + ret = _quota_dict_get_meta(this, dict, size_key, keylen, &size, IA_IFDIR, + _gf_false); + if (ret < 0) + goto create_xattr; + + if (!contribution) + goto create_xattr; + + if (!loc_is_root(loc)) { + GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri, + IA_IFDIR, _gf_false); + if (ret < 0) + goto create_xattr; - local = (quota_local_t *) frame->local; + LOCK(&contribution->lock); + { + contribution->contribution = contri.size; + contribution->file_count = contri.file_count; + contribution->dir_count = contri.dir_count; + } + UNLOCK(&contribution->lock); + } + + LOCK(&ctx->lock); + { + ctx->size = size.size; + ctx->file_count = size.file_count; + ctx->dir_count = size.dir_count; + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + + ret = mq_get_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) { + /* If the update txn is in progress abort inspection */ + ret = 0; + goto out; + } - if (op_ret == -1 || local->err == -1) { - mq_removexattr_cbk (frame, NULL, this, -1, 0); - return 0; - } + mq_compute_delta(&delta, &size, &contri); - frame->local = NULL; + if (dirty) { + ret = mq_update_dirty_inode_txn(this, loc, ctx); + goto out; + } - if (local->hl_count > 1) { - GET_CONTRI_KEY (contri_key, local->contri->gfid, ret); + if (!loc_is_root(loc) && !quota_meta_is_null(&delta)) + mq_initiate_quota_txn(this, loc, NULL); - STACK_WIND (frame, mq_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - &local->loc, contri_key); - ret = 0; - } else { - mq_removexattr_cbk (frame, NULL, this, 0, 0); - } + ret = 0; + goto out; - if (strcmp (local->parent_loc.path, "/") != 0) { - ret = mq_get_parent_inode_local (this, local); - if (ret < 0) - goto out; +create_xattr: + if (ret < 0) + ret = mq_create_xattrs_txn(this, loc, NULL); - mq_start_quota_txn (this, &local->loc, local->ctx, local->contri); - } out: - mq_local_unref (this, local); - - return 0; + return ret; } int32_t -mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +mq_inspect_file_xattr(xlator_t *this, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution, loc_t *loc, + dict_t *dict, struct iatt *buf) { - int32_t ret = -1; - struct gf_flock lock = {0, }; - quota_inode_ctx_t *ctx = NULL; - quota_local_t *local = NULL; - int64_t contribution = 0; - - local = frame->local; - if (op_ret == -1) - local->err = -1; - - ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx); - - LOCK (&local->contri->lock); + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + quota_meta_t delta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + gf_boolean_t status = _gf_false; + + if (!buf || !contribution || !ctx) + goto out; + + LOCK(&ctx->lock); + { + ctx->size = 512 * buf->ia_blocks; + ctx->file_count = 1; + ctx->dir_count = 0; + + size.size = ctx->size; + size.file_count = ctx->file_count; + size.dir_count = ctx->dir_count; + } + UNLOCK(&ctx->lock); + + GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + + ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri, + IA_IFREG, _gf_true); + if (ret < 0) { + ret = mq_create_xattrs_txn(this, loc, NULL); + } else { + LOCK(&contribution->lock); { - contribution = local->contri->contribution; - } - UNLOCK (&local->contri->lock); - - if (contribution == local->size) { - if (ret == 0) { - LOCK (&ctx->lock); - { - ctx->size -= contribution; - } - UNLOCK (&ctx->lock); - - LOCK (&local->contri->lock); - { - local->contri->contribution = 0; - } - UNLOCK (&local->contri->lock); - } - } - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND (frame, - _mq_inode_remove_done, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->parent_loc, - F_SETLKW, &lock); - return 0; -} - -int32_t -mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) -{ - int32_t ret = -1; - int64_t *size = NULL; - dict_t *dict = NULL; - quota_local_t *local = NULL; - - local = frame->local; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "inodelk set failed on %s", local->parent_loc.path); - QUOTA_STACK_DESTROY (frame, this); - return 0; + contribution->contribution = contri.size; + contribution->file_count = contri.file_count; + contribution->dir_count = contri.dir_count; } + UNLOCK(&contribution->lock); - VALIDATE_OR_GOTO (local->contri, err); - - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto err; + ret = mq_get_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) { + /* If the update txn is in progress abort inspection */ + ret = 0; + goto out; } - QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err); - - *size = hton64 (-local->size); - - ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8); - if (ret < 0) - goto err; - - - STACK_WIND (frame, mq_inode_remove_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, &local->parent_loc, - GF_XATTROP_ADD_ARRAY64, dict); - dict_unref (dict); - return 0; + mq_compute_delta(&delta, &size, &contri); + if (!quota_meta_is_null(&delta)) + mq_initiate_quota_txn(this, loc, NULL); + } + /* TODO: revist this code when fixing hardlinks */ -err: - local->err = 1; - mq_inode_remove_done (frame, NULL, this, -1, 0, NULL); - if (dict) - dict_unref (dict); - return 0; +out: + return ret; } int32_t -mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri) +mq_xattr_state(xlator_t *this, loc_t *origin_loc, dict_t *dict, + struct iatt *buf) { - int32_t ret = -1; - struct gf_flock lock = {0,}; - call_frame_t *frame = NULL; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) - goto out; - - contribution = mq_get_contribution_node (loc->parent, ctx); - if (contribution == NULL) - goto out; - - local = mq_local_new (); - if (local == NULL) { - ret = -1; - goto out; - } - - if (contri >= 0) { - local->size = contri; - } else { - LOCK (&contribution->lock); - { - local->size = contribution->contribution; - } - UNLOCK (&contribution->lock); - } - - if (local->size == 0) { - ret = 0; - goto out; - } - - ret = mq_loc_copy (&local->loc, loc); - if (ret < 0) - goto out; - - local->ctx = ctx; - local->contri = contribution; - - ret = mq_inode_loc_fill (NULL, loc->parent, &local->parent_loc); - if (ret < 0) - goto out; - - frame = create_frame (this, this->ctx->pool); - if (!frame) { - ret = -1; - goto out; - } - - mq_assign_lk_owner (this, frame); - - frame->local = local; - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - if (local->parent_loc.inode == NULL) { - ret = -1; - gf_log (this->name, GF_LOG_WARNING, - "Inode is NULL, so can't stackwind."); - goto out; - } - - STACK_WIND (frame, - mq_reduce_parent_size_xattr, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->parent_loc, F_SETLKW, &lock); - local = NULL; - ret = 0; + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + loc_t loc = { + 0, + }; + inode_contribution_t *contribution = NULL; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0 || loc.parent == NULL) + goto out; + + if (!loc_is_root(&loc)) { + contribution = mq_add_new_contribution_node(this, ctx, &loc); + if (contribution == NULL) { + if (!gf_uuid_is_null(loc.inode->gfid)) + gf_log(this->name, GF_LOG_WARNING, + "cannot add a new contribution node " + "(%s)", + uuid_utoa(loc.gfid)); + ret = -1; + goto out; + } + if (buf->ia_type == IA_IFDIR) + mq_inspect_directory_xattr(this, ctx, contribution, &loc, dict); + else + mq_inspect_file_xattr(this, ctx, contribution, &loc, dict, buf); + } else { + mq_inspect_directory_xattr(this, ctx, 0, &loc, dict); + } out: - if (local != NULL) - mq_local_unref (this, local); + loc_wipe(&loc); - return ret; -} + if (contribution) + GF_REF_PUT(contribution); - -int32_t -init_quota_priv (xlator_t *this) -{ - return 0; + return ret; } - int32_t -mq_rename_update_newpath (xlator_t *this, loc_t *loc) +mq_req_xattr(xlator_t *this, loc_t *loc, dict_t *dict, char *contri_key, + char *size_key) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; + int32_t ret = -1; + char key[QUOTA_KEY_MAX] = { + 0, + }; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", dict, out); - ret = mq_inode_ctx_get (loc->inode, this, &ctx); + if (!loc_is_root(loc)) { + ret = mq_dict_set_contribution(this, dict, loc, NULL, contri_key); if (ret < 0) - goto out; + goto out; + } - contribution = mq_add_new_contribution_node (this, ctx, loc); - if (contribution == NULL) { - ret = -1; - goto out; + GET_SIZE_KEY(this, key, ret); + if (ret < 0) + goto out; + if (size_key) + if (snprintf(size_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) { + ret = -1; + goto out; } - mq_initiate_quota_txn (this, loc); + ret = dict_set_uint64(dict, key, 0); + if (ret < 0) + goto out; + + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, 0); + out: - return ret; + if (ret < 0) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "dict set failed"); + return ret; } int32_t -mq_forget (xlator_t *this, quota_inode_ctx_t *ctx) +mq_forget(xlator_t *this, quota_inode_ctx_t *ctx) { - inode_contribution_t *contri = NULL; - inode_contribution_t *next = NULL; + inode_contribution_t *contri = NULL; + inode_contribution_t *next = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - list_for_each_entry_safe (contri, next, &ctx->contribution_head, - contri_list) { - list_del (&contri->contri_list); - GF_FREE (contri); - } + list_for_each_entry_safe(contri, next, &ctx->contribution_head, contri_list) + { + list_del_init(&contri->contri_list); + GF_REF_PUT(contri); + } - LOCK_DESTROY (&ctx->lock); - GF_FREE (ctx); + LOCK_DESTROY(&ctx->lock); + GF_FREE(ctx); out: - return 0; + return 0; } diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h index 7a90b28b7ed..4bbf6878b22 100644 --- a/xlators/features/marker/src/marker-quota.h +++ b/xlators/features/marker/src/marker-quota.h @@ -1,162 +1,140 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _MARKER_QUOTA_H #define _MARKER_QUOTA_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "marker.h" -#include "xlator.h" +#include <glusterfs/xlator.h> #include "marker-mem-types.h" +#include <glusterfs/refcount.h> +#include <glusterfs/quota-common-utils.h> +#include <glusterfs/call-stub.h> #define QUOTA_XATTR_PREFIX "trusted.glusterfs" #define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty" #define CONTRIBUTION "contri" -#define CONTRI_KEY_MAX 512 +#define QUOTA_KEY_MAX 512 #define READDIR_BUF 4096 - -#define QUOTA_STACK_DESTROY(_frame, _this) \ - do { \ - quota_local_t *_local = NULL; \ - _local = _frame->local; \ - _frame->local = NULL; \ - STACK_DESTROY (_frame->root); \ - mq_local_unref (_this, _local); \ - } while (0) - - -#define QUOTA_ALLOC(var, type, ret) \ - do { \ - ret = 0; \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - gf_log ("", GF_LOG_ERROR, \ - "out of memory"); \ - ret = -1; \ - } \ - } while (0); - -#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \ - do { \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - gf_log ("", GF_LOG_ERROR, \ - "out of memory"); \ - ret = -1; \ - goto label; \ - } \ - ret = 0; \ - } while (0); - -#define GET_CONTRI_KEY(var, _gfid, _ret) \ - do { \ - char _gfid_unparsed[40]; \ - uuid_unparse (_gfid, _gfid_unparsed); \ - _ret = snprintf (var, CONTRI_KEY_MAX, QUOTA_XATTR_PREFIX \ - ".%s.%s." CONTRIBUTION, "quota", \ - _gfid_unparsed); \ - } while (0); - -#define QUOTA_SAFE_INCREMENT(lock, var) \ - do { \ - LOCK (lock); \ - var ++; \ - UNLOCK (lock); \ - } while (0) +#define QUOTA_ALLOC(var, type, ret) \ + do { \ + ret = 0; \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + ret = -1; \ + } \ + } while (0); + +#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + gf_log("", GF_LOG_ERROR, "out of memory"); \ + ret = -1; \ + goto label; \ + } \ + ret = 0; \ + } while (0); + +#define GET_QUOTA_KEY(_this, var, key, _ret) \ + do { \ + marker_conf_t *_priv = _this->private; \ + if (_priv->version > 0) \ + _ret = snprintf(var, QUOTA_KEY_MAX, "%s.%d", key, _priv->version); \ + else \ + _ret = snprintf(var, QUOTA_KEY_MAX, "%s", key); \ + } while (0) + +#define GET_CONTRI_KEY(_this, var, _gfid, _ret) \ + do { \ + char _tmp_var[QUOTA_KEY_MAX] = { \ + 0, \ + }; \ + if (_gfid != NULL) { \ + char _gfid_unparsed[40]; \ + gf_uuid_unparse(_gfid, _gfid_unparsed); \ + _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \ + QUOTA_XATTR_PREFIX ".%s.%s." CONTRIBUTION, \ + "quota", _gfid_unparsed); \ + } else { \ + _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \ + QUOTA_XATTR_PREFIX ".%s.." CONTRIBUTION, "quota"); \ + } \ + GET_QUOTA_KEY(_this, var, _tmp_var, _ret); \ + } while (0) + +#define GET_SIZE_KEY(_this, var, _ret) \ + { \ + GET_QUOTA_KEY(_this, var, QUOTA_SIZE_KEY, _ret); \ + } + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) struct quota_inode_ctx { - int64_t size; - int8_t dirty; - gf_boolean_t updation_status; - gf_lock_t lock; - struct list_head contribution_head; + int64_t size; + int64_t file_count; + int64_t dir_count; + int8_t dirty; + gf_boolean_t create_status; + gf_boolean_t updation_status; + gf_boolean_t dirty_status; + gf_lock_t lock; + struct list_head contribution_head; }; typedef struct quota_inode_ctx quota_inode_ctx_t; -struct inode_contribution { - struct list_head contri_list; - int64_t contribution; - uuid_t gfid; - gf_lock_t lock; +struct quota_synctask { + xlator_t *this; + loc_t loc; + quota_meta_t contri; + gf_boolean_t is_static; + uint32_t ia_nlink; + call_stub_t *stub; }; -typedef struct inode_contribution inode_contribution_t; +typedef struct quota_synctask quota_synctask_t; -struct quota_local { - int64_t delta; - int64_t d_off; - int32_t err; - int32_t ref; - int64_t sum; - int64_t size; - int32_t hl_count; - int32_t dentry_child_count; - - fd_t *fd; - call_frame_t *frame; - gf_lock_t lock; - - loc_t loc; - loc_t parent_loc; - - quota_inode_ctx_t *ctx; - inode_contribution_t *contri; +struct inode_contribution { + struct list_head contri_list; + int64_t contribution; + int64_t file_count; + int64_t dir_count; + uuid_t gfid; + gf_lock_t lock; + GF_REF_DECL; }; -typedef struct quota_local quota_local_t; - -int32_t -mq_get_lock_on_parent (call_frame_t *, xlator_t *); - -int32_t -mq_req_xattr (xlator_t *, loc_t *, dict_t *); - -int32_t -init_quota_priv (xlator_t *); +typedef struct inode_contribution inode_contribution_t; int32_t -mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt); +mq_req_xattr(xlator_t *, loc_t *, dict_t *, char *, char *); int32_t -mq_set_inode_xattr (xlator_t *, loc_t *); +mq_xattr_state(xlator_t *, loc_t *, dict_t *, struct iatt *); int -mq_initiate_quota_txn (xlator_t *, loc_t *); - -int32_t -mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *, - int32_t, int32_t, fd_t *); +mq_initiate_quota_txn(xlator_t *, loc_t *, struct iatt *); -int32_t -mq_reduce_parent_size (xlator_t *, loc_t *, int64_t); +int +mq_initiate_quota_blocking_txn(xlator_t *, loc_t *, struct iatt *); -int32_t -mq_rename_update_newpath (xlator_t *, loc_t *); +int +mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf); int32_t -mq_inspect_file_xattr (xlator_t *this, loc_t *loc, dict_t *dict, struct iatt buf); +mq_reduce_parent_size_txn(xlator_t *, loc_t *, quota_meta_t *, uint32_t nlink, + call_stub_t *stub); int32_t -mq_forget (xlator_t *, quota_inode_ctx_t *); +mq_forget(xlator_t *, quota_inode_ctx_t *); #endif diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index 93b1518cb7e..1375ccc498c 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1,1732 +1,2320 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "libxlator.h" #include "marker.h" #include "marker-mem-types.h" #include "marker-quota.h" #include "marker-quota-helper.h" #include "marker-common.h" -#include "byte-order.h" +#include <glusterfs/byte-order.h> +#include <glusterfs/syncop.h> +#include <glusterfs/syscall.h> + +#include <fnmatch.h> #define _GF_UID_GID_CHANGED 1 +static char *mq_ext_xattrs[] = { + QUOTA_SIZE_KEY, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + NULL, +}; + void -fini (xlator_t *this); +fini(xlator_t *this); int32_t -marker_start_setxattr (call_frame_t *, xlator_t *); +marker_start_setxattr(call_frame_t *, xlator_t *); + +/* When client/quotad request for quota xattrs, + * replace the key-name by adding the version number + * in end of the key-name. + * In the cbk, result value of xattrs for original + * key-name. + * Below function marker_key_replace_with_ver and + * marker_key_set_ver is used for setting/removing + * version for the key-name + */ +int +marker_key_replace_with_ver(xlator_t *this, dict_t *dict) +{ + int ret = -1; + int i = 0; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + priv = this->private; + + if (dict == NULL || priv->version <= 0) { + ret = 0; + goto out; + } + + for (i = 0; mq_ext_xattrs[i]; i++) { + if (dict_get(dict, mq_ext_xattrs[i])) { + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto out; + + ret = dict_set(dict, key, dict_get(dict, mq_ext_xattrs[i])); + if (ret < 0) + goto out; + + dict_del(dict, mq_ext_xattrs[i]); + } + } + + ret = 0; + +out: + return ret; +} + +int +marker_key_set_ver(xlator_t *this, dict_t *dict) +{ + int ret = -1; + int i = -1; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + priv = this->private; + + if (dict == NULL || priv->version <= 0) { + ret = 0; + goto out; + } + + for (i = 0; mq_ext_xattrs[i]; i++) { + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto out; + + if (dict_get(dict, key)) + dict_set(dict, mq_ext_xattrs[i], dict_get(dict, key)); + } + + ret = 0; +out: + return ret; +} marker_local_t * -marker_local_ref (marker_local_t *local) +marker_local_ref(marker_local_t *local) { - GF_VALIDATE_OR_GOTO ("marker", local, err); + GF_VALIDATE_OR_GOTO("marker", local, err); - LOCK (&local->lock); - { - local->ref++; - } - UNLOCK (&local->lock); + LOCK(&local->lock); + { + local->ref++; + } + UNLOCK(&local->lock); - return local; + return local; err: - return NULL; + return NULL; } int -marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +marker_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) { - int ret = -1; + int ret = -1; - if (!loc) - return ret; + if (!loc) + return ret; - if (inode) { - loc->inode = inode_ref (inode); - uuid_copy (loc->gfid, loc->inode->gfid); + if (inode) { + loc->inode = inode_ref(inode); + if (gf_uuid_is_null(loc->gfid)) { + gf_uuid_copy(loc->gfid, loc->inode->gfid); } + } - if (parent) - loc->parent = inode_ref (parent); + if (parent) + loc->parent = inode_ref(parent); - loc->path = gf_strdup (path); + if (path) { + loc->path = gf_strdup(path); if (!loc->path) { - gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); - goto loc_wipe; + gf_log("loc fill", GF_LOG_ERROR, "strdup failed"); + goto loc_wipe; } - loc->name = strrchr (loc->path, '/'); + loc->name = strrchr(loc->path, '/'); if (loc->name) - loc->name++; - else - goto loc_wipe; + loc->name++; + } - ret = 0; + ret = 0; loc_wipe: - if (ret < 0) - loc_wipe (loc); + if (ret < 0) + loc_wipe(loc); - return ret; + return ret; } int -marker_inode_loc_fill (inode_t *inode, loc_t *loc) +_marker_inode_loc_fill(inode_t *inode, inode_t *parent, char *name, loc_t *loc) { - char *resolvedpath = NULL; - int ret = -1; - inode_t *parent = NULL; + char *resolvedpath = NULL; + int ret = -1; + gf_boolean_t free_parent = _gf_false; - if ((!inode) || (!loc)) - return ret; + if ((!inode) || (!loc)) + return ret; - parent = inode_parent (inode, NULL, NULL); + if (parent && name) + ret = inode_path(parent, name, &resolvedpath); + else + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) + goto err; - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) - goto err; + if (parent == NULL) { + parent = inode_parent(inode, NULL, NULL); + free_parent = _gf_true; + } - ret = marker_loc_fill (loc, inode, NULL, resolvedpath); - if (ret < 0) - goto err; + ret = marker_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; err: - if (parent) - inode_unref (parent); + if (free_parent) + inode_unref(parent); - if (resolvedpath) - GF_FREE (resolvedpath); + GF_FREE(resolvedpath); - return ret; + return ret; +} + +int +marker_inode_loc_fill(inode_t *inode, loc_t *loc) +{ + return _marker_inode_loc_fill(inode, NULL, NULL, loc); } int32_t -marker_trav_parent (marker_local_t *local) +marker_trav_parent(marker_local_t *local) { - int32_t ret = 0; - loc_t loc = {0, }; + int32_t ret = 0; + loc_t loc = { + 0, + }; + inode_t *parent = NULL; + int8_t need_unref = 0; - ret = marker_inode_loc_fill (local->loc.parent, &loc); + if (!local->loc.parent) { + parent = inode_parent(local->loc.inode, NULL, NULL); + if (parent) + need_unref = 1; + } else + parent = local->loc.parent; - if (ret < 0) { - ret = -1; - goto out; - } + ret = marker_inode_loc_fill(parent, &loc); + + if (ret < 0) { + ret = -1; + goto out; + } - loc_wipe (&local->loc); + loc_wipe(&local->loc); - local->loc = loc; + local->loc = loc; out: - return ret; + if (need_unref) + inode_unref(parent); + + return ret; } -int32_t -marker_error_handler (xlator_t *this) +void +marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno) { - marker_conf_t *priv = NULL; - - priv = (marker_conf_t *) this->private; + marker_conf_t *priv = (marker_conf_t *)this->private; + const char *path = local ? ((local->loc.path) ? local->loc.path + : uuid_utoa(local->loc.gfid)) + : "<nul>"; - unlink (priv->timestamp_file); - - return 0; + gf_log(this->name, GF_LOG_CRITICAL, + "Indexing gone corrupt at %s (reason: %s)." + " Geo-replication slave content needs to be revalidated", + path, strerror(op_errno)); + sys_unlink(priv->timestamp_file); } int32_t -marker_local_unref (marker_local_t *local) +marker_local_unref(marker_local_t *local) { - int32_t var = 0; - - if (local == NULL) - return -1; - - LOCK (&local->lock); - { - var = --local->ref; - } - UNLOCK (&local->lock); + int32_t var = 0; - if (var != 0) - goto out; - - loc_wipe (&local->loc); - loc_wipe (&local->parent_loc); + if (local == NULL) + return -1; - if (local->oplocal) { - marker_local_unref (local->oplocal); - local->oplocal = NULL; - } - GF_FREE (local); + LOCK(&local->lock); + { + var = --local->ref; + } + UNLOCK(&local->lock); + + if (var != 0) + goto out; + + loc_wipe(&local->loc); + loc_wipe(&local->parent_loc); + if (local->xdata) + dict_unref(local->xdata); + + if (local->lk_frame) { + STACK_DESTROY(local->lk_frame->root); + local->lk_frame = NULL; + } + + if (local->oplocal) { + marker_local_unref(local->oplocal); + local->oplocal = NULL; + } + mem_put(local); out: - return 0; + return 0; } int32_t -stat_stampfile (xlator_t *this, marker_conf_t *priv, - struct volume_mark **status) +stat_stampfile(xlator_t *this, marker_conf_t *priv, struct volume_mark **status) { - struct stat buf = {0, }; - struct volume_mark *vol_mark = NULL; + struct stat buf = { + 0, + }; + struct volume_mark *vol_mark = NULL; - vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1, - gf_marker_mt_volume_mark); + vol_mark = GF_CALLOC(sizeof(struct volume_mark), 1, + gf_marker_mt_volume_mark); - vol_mark->major = 1; - vol_mark->minor = 0; + vol_mark->major = 1; + vol_mark->minor = 0; - GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16); - memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16); + GF_ASSERT(sizeof(priv->volume_uuid_bin) == 16); + memcpy(vol_mark->uuid, priv->volume_uuid_bin, 16); - if (stat (priv->timestamp_file, &buf) != -1) { - vol_mark->retval = 0; - vol_mark->sec = htonl (buf.st_ctime); - vol_mark->usec = htonl (ST_CTIM_NSEC (&buf)/1000); - } else - vol_mark->retval = 1; + if (sys_stat(priv->timestamp_file, &buf) != -1) { + vol_mark->retval = 0; + vol_mark->sec = htonl(buf.st_mtime); + vol_mark->usec = htonl(ST_MTIM_NSEC(&buf) / 1000); + } else + vol_mark->retval = 1; - *status = vol_mark; + *status = vol_mark; - return 0; + return 0; } int32_t -marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this, - const char *name, struct volume_mark *vol_mark) +marker_getxattr_stampfile_cbk(call_frame_t *frame, xlator_t *this, + const char *name, struct volume_mark *vol_mark, + dict_t *xdata) { - int32_t ret = -1; - dict_t *dict = NULL; + int32_t ret = -1; + dict_t *dict = NULL; - if (vol_mark == NULL){ - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL); + if (vol_mark == NULL) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); - goto out; - } + goto out; + } - dict = dict_new (); + dict = dict_new(); - ret = dict_set_bin (dict, (char *)name, vol_mark, - sizeof (struct volume_mark)); - if (ret) - gf_log (this->name, GF_LOG_WARNING, "failed to set key %s", - name); + ret = dict_set_bin(dict, (char *)name, vol_mark, + sizeof(struct volume_mark)); + if (ret) { + GF_FREE(vol_mark); + gf_log(this->name, GF_LOG_WARNING, "failed to set key %s", name); + } - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict); + STACK_UNWIND_STRICT(getxattr, frame, 0, 0, dict, xdata); - dict_unref (dict); + if (dict) + dict_unref(dict); out: - return 0; + return 0; } -int32_t -call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name) +gf_boolean_t +call_from_special_client(call_frame_t *frame, xlator_t *this, const char *name) { - struct volume_mark *vol_mark = NULL; - marker_conf_t *priv = NULL; - gf_boolean_t ret = _gf_true; + struct volume_mark *vol_mark = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t is_true = _gf_true; - priv = (marker_conf_t *)this->private; + priv = (marker_conf_t *)this->private; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || - strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { - ret = _gf_false; - goto out; - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || + strcmp(name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { + is_true = _gf_false; + goto out; + } - stat_stampfile (this, priv, &vol_mark); + stat_stampfile(this, priv, &vol_mark); - marker_getxattr_stampfile_cbk (frame, this, name, vol_mark); + marker_getxattr_stampfile_cbk(frame, this, name, vol_mark, NULL); out: - return ret; + return is_true; } -int32_t -marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +static gf_boolean_t +_is_quota_internal_xattr(dict_t *d, char *k, data_t *v, void *data) { - if (cookie) { - gf_log (this->name, GF_LOG_DEBUG, - "Filtering the quota extended attributes"); + int i = 0; + char **external_xattrs = data; - dict_foreach (dict, marker_filter_quota_xattr, NULL); - } - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict); - return 0; + for (i = 0; external_xattrs && external_xattrs[i]; i++) { + if (strcmp(k, external_xattrs[i]) == 0) + return _gf_false; + } + + if (fnmatch("trusted.glusterfs.quota*", k, 0) == 0) + return _gf_true; + + /* It would be nice if posix filters pgfid xattrs. But since marker + * also takes up responsibility to clean these up, adding the filtering + * here (Check 'quota_xattr_cleaner') + */ + if (fnmatch(PGFID_XATTR_KEY_PREFIX "*", k, 0) == 0) + return _gf_true; + + return _gf_false; } -int32_t -marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) +static void +marker_filter_internal_xattrs(xlator_t *this, dict_t *xattrs) { - gf_boolean_t ret = _gf_false; - marker_conf_t *priv = NULL; - unsigned long cookie = 0; + marker_conf_t *priv = NULL; + char **ext = NULL; - priv = this->private; + priv = this->private; + if (priv->feature_enabled & GF_QUOTA) + ext = mq_ext_xattrs; - if (priv == NULL || (priv->feature_enabled & GF_XTIME) == 0) - goto wind; + dict_foreach_match(xattrs, _is_quota_internal_xattr, ext, + dict_remove_foreach_fn, NULL); +} - gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); +static void +marker_filter_gsyncd_xattrs(call_frame_t *frame, xlator_t *this, dict_t *xattrs) +{ + marker_conf_t *priv = NULL; - ret = call_from_special_client (frame, this, name); -wind: - if (ret == _gf_false) { - if (name == NULL) { - /* Signifies that marker translator - * has to filter the quota's xattr's, - * this is to prevent afr from performing - * self healing on marker-quota xattrs' - */ - cookie = 1; - } - STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, - name); - } + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(frame); - return 0; + if (xattrs && frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_REMOVE_INTERNAL_XATTR(GF_XATTR_XTIME_PATTERN, xattrs); + } + return; } +int32_t +marker_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int32_t ret = -1; + if (op_ret < 0) + goto unwind; + + ret = marker_key_set_ver(this, dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (cookie) { + gf_log(this->name, GF_LOG_DEBUG, + "Filtering the quota extended attributes"); + + /* If the getxattr is from a non special client, then do not + copy the quota related xattrs (except the quota limit key + i.e trusted.glusterfs.quota.limit-set which has been set by + glusterd on the directory on which quota limit is set.) for + directories. Let the healing of xattrs happen upon lookup. + NOTE: setting of trusted.glusterfs.quota.limit-set as of now + happens from glusterd. It should be moved to quotad. Also + trusted.glusterfs.quota.limit-set is set on directory which + is permanent till quota is removed on that directory or limit + is changed. So let that xattr be healed by other xlators + properly whenever directory healing is done. + */ + /* + * Except limit-set xattr, rest of the xattrs are maintained + * by quota xlator. Don't expose them to other xlators. + * This filter makes sure quota xattrs are not healed as part of + * metadata self-heal + */ + marker_filter_internal_xattrs(frame->this, dict); + } + + /* Filter gsyncd xtime xattr for non gsyncd clients */ + marker_filter_gsyncd_xattrs(frame, frame->this, dict); + +unwind: + MARKER_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} int32_t -marker_setxattr_done (call_frame_t *frame) -{ - marker_local_t *local = NULL; +marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + gf_boolean_t is_true = _gf_false; + marker_conf_t *priv = NULL; + unsigned long cookie = 0; + marker_local_t *local = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + int32_t ret = -1; + int32_t i = 0; + + priv = this->private; + + if (name) { + for (i = 0; mq_ext_xattrs[i]; i++) { + if (strcmp(name, mq_ext_xattrs[i])) + continue; + + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto out; + name = key; + break; + } + } - local = (marker_local_t *) frame->local; + frame->local = mem_get0(this->local_pool); + local = frame->local; + if (local == NULL) + goto out; - frame->local = NULL; + MARKER_INIT_LOCAL(frame, local); - STACK_DESTROY (frame->root); + if ((loc_copy(&local->loc, loc)) < 0) + goto out; - marker_local_unref (local); + gf_log(this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); - return 0; + if (priv && priv->feature_enabled & GF_XTIME) + is_true = call_from_special_client(frame, this, name); + + if (is_true == _gf_false) { + if (name == NULL) { + /* Signifies that marker translator + * has to filter the quota's xattr's, + * this is to prevent afr from performing + * self healing on marker-quota xattrs' + */ + cookie = 1; + } + STACK_WIND_COOKIE(frame, marker_getxattr_cbk, (void *)cookie, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, + loc, name, xdata); + } + + return 0; +out: + MARKER_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; } -int -marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int32_t +marker_setxattr_done(call_frame_t *frame) { - int32_t ret = 0; - int32_t done = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; - local = (marker_local_t*) frame->local; + local = (marker_local_t *)frame->local; - if (op_ret == -1 && op_errno == ENOSPC) { - marker_error_handler (this); - done = 1; - goto out; - } + frame->local = NULL; - if (strcmp (local->loc.path, "/") == 0) { - done = 1; - goto out; - } + STACK_DESTROY(frame->root); - ret = marker_trav_parent (local); + marker_local_unref(local); - if (ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "Error occurred " - "while traversing to the parent, stopping marker"); + return 0; +} - done = 1; +int +marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = 0; + int32_t done = 1; + marker_local_t *local = NULL; - goto out; + local = (marker_local_t *)frame->local; + + if (op_ret == -1 && op_errno == ENOSPC) { + marker_error_handler(this, local, op_errno); + goto out; + } + + if (local) { + if (local->loc.path && strcmp(local->loc.path, "/") == 0) { + goto out; + } + if (__is_root_gfid(local->loc.gfid)) { + goto out; } + } - marker_start_setxattr (frame, this); + ret = (local) ? marker_trav_parent(local) : -1; + if (ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, + "Error occurred " + "while traversing to the parent, stopping marker"); + goto out; + } + + marker_start_setxattr(frame, this); + done = 0; out: - if (done) { - marker_setxattr_done (frame); - } + if (done) { + marker_setxattr_done(frame); + } - return 0; + return 0; } int32_t -marker_start_setxattr (call_frame_t *frame, xlator_t *this) +marker_start_setxattr(call_frame_t *frame, xlator_t *this) { - int32_t ret = 0; - dict_t *dict = NULL; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = -1; + dict_t *dict = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - local = (marker_local_t*) frame->local; + local = (marker_local_t *)frame->local; - dict = dict_new (); + if (!local) + goto out; - ret = dict_set_static_bin (dict, priv->marker_xattr, - (void *)local->timebuf, 8); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to set marker xattr (%s)", local->loc.path); + dict = dict_new(); - STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0); + if (!dict) + goto out; - dict_unref (dict); + if (local->loc.inode && gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, local->loc.inode->gfid); - return 0; + GF_UUID_ASSERT(local->loc.gfid); + + ret = dict_set_static_bin(dict, priv->marker_xattr, (void *)local->timebuf, + 8); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set marker xattr (%s)", + local->loc.path); + goto out; + } + + STACK_WIND(frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0, NULL); + + ret = 0; +out: + if (dict) + dict_unref(dict); + + return ret; } void -marker_gettimeofday (marker_local_t *local) +marker_gettimeofday(marker_local_t *local) { - struct timeval tv = {0, }; + struct timeval tv = { + 0, + }; - gettimeofday (&tv, NULL); + gettimeofday(&tv, NULL); - local->timebuf [0] = htonl (tv.tv_sec); - local->timebuf [1] = htonl (tv.tv_usec); + local->timebuf[0] = htonl(tv.tv_sec); + local->timebuf[1] = htonl(tv.tv_usec); - return; + return; } int32_t -marker_create_frame (xlator_t *this, marker_local_t *local) +marker_create_frame(xlator_t *this, marker_local_t *local) { - call_frame_t *frame = NULL; + call_frame_t *frame = NULL; - frame = create_frame (this, this->ctx->pool); + frame = create_frame(this, this->ctx->pool); - frame->local = (void *) local; + if (!frame) + return -1; - marker_start_setxattr (frame, this); + frame->local = (void *)local; - return 0; + marker_start_setxattr(frame, this); + + return 0; } int32_t -marker_xtime_update_marks (xlator_t *this, marker_local_t *local) +marker_xtime_update_marks(xlator_t *this, marker_local_t *local) { - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); + marker_conf_t *priv = NULL; - if (local->pid == -1) - goto out; + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); + + priv = this->private; + + if ((local->pid == GF_CLIENT_PID_GSYNCD && + !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE)) || + (local->pid == GF_CLIENT_PID_DEFRAG)) + goto out; - marker_gettimeofday (local); + marker_gettimeofday(local); - marker_local_ref (local); + marker_local_ref(local); - marker_create_frame (this, local); + marker_create_frame(this, local); out: - return 0; + return 0; } - int32_t -marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "while Creating a file %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while creating directory %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_QUOTA) - mq_set_inode_xattr (this, &local->loc); + if (priv->feature_enabled & GF_QUOTA) + mq_create_xattrs_txn(this, &local->loc, NULL); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int -marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dict_t *params) +marker_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, params); + STACK_WIND(frame, marker_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); - return 0; + return 0; err: - STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL); - return 0; -} + MARKER_STACK_UNWIND(mkdir, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + return 0; +} int32_t -marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "while Creating a file %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while creating file %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_QUOTA) - mq_set_inode_xattr (this, &local->loc); + if (priv->feature_enabled & GF_QUOTA) + mq_create_xattrs_txn(this, &local->loc, buf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) +marker_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, fd, - params); - return 0; + STACK_WIND(frame, marker_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; err: - STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL); + MARKER_STACK_UNWIND(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } - int32_t -marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +marker_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "while write, %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while write, %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t offset, - struct iobref *iobref) +marker_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - iobref); - return 0; + STACK_WIND(frame, marker_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; err: - STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(writev, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) +marker_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + call_stub_t *stub = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "rmdir %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "rmdir %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent); + if (op_ret == -1 || local == NULL) + goto out; - if (op_ret == -1 || local == NULL) - goto out; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + + if (priv->feature_enabled & GF_QUOTA) { + /* If a 'rm -rf' is performed by a client, rmdir can be faster + than marker background mq_reduce_parent_size_txn. + In this case, as part of rmdir parent child association + will be removed in the server protocol. + This can lead to mq_reduce_parent_size_txn failures. + + So perform mq_reduce_parent_size_txn in foreground + and unwind to server once txn is complete + */ - priv = this->private; + stub = fop_rmdir_cbk_stub(frame, default_rmdir_cbk, op_ret, op_errno, + preparent, postparent, xdata); + mq_reduce_parent_size_txn(this, &local->loc, NULL, 1, stub); - if (priv->feature_enabled & GF_QUOTA) - mq_reduce_parent_size (this, &local->loc, -1); + if (stub) { + marker_local_unref(local); + return 0; + } + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); out: - marker_local_unref (local); + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent, + xdata); - return 0; + marker_local_unref(local); + + return 0; } int32_t -marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) +marker_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, flags); - return 0; + STACK_WIND(frame, marker_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + return 0; err: - STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) +marker_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, - "%s occurred in unlink", strerror (op_errno)); - } + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + uint32_t nlink = -1; + GF_UNUSED int32_t ret = 0; + call_stub_t *stub = NULL; - local = (marker_local_t *) frame->local; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred in unlink", + strerror(op_errno)); + } - frame->local = NULL; + local = (marker_local_t *)frame->local; - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent); + frame->local = NULL; + priv = this->private; - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); - if ((priv->feature_enabled & GF_QUOTA) && (local->ia_nlink == 1)) - mq_reduce_parent_size (this, &local->loc, -1); + if (priv->feature_enabled & GF_QUOTA) { + if (local->skip_txn) + goto out; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); -out: - marker_local_unref (local); + if (xdata) { + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret) { + gf_log(this->name, GF_LOG_TRACE, "dict get failed %s ", + strerror(-ret)); + } + } - return 0; -} + /* If a 'rm -rf' is performed by a client, unlink can be faster + than marker background mq_reduce_parent_size_txn. + In this case, as part of unlink parent child association + will be removed in the server protocol. + This can lead to mq_reduce_parent_size_txn failures. + So perform mq_reduce_parent_size_txn in foreground + and unwind to server once txn is complete + */ -int32_t -marker_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) -{ - marker_local_t *local = NULL; + stub = fop_unlink_cbk_stub(frame, default_unlink_cbk, op_ret, op_errno, + preparent, postparent, xdata); + mq_reduce_parent_size_txn(this, &local->loc, NULL, nlink, stub); - local = frame->local; - if (op_ret < 0) { - goto err; + if (stub) { + marker_local_unref(local); + return 0; } + } - if (local == NULL) { - op_errno = EINVAL; - goto err; - } +out: + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); - local->ia_nlink = buf->ia_nlink; + marker_local_unref(local); - STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc); - return 0; -err: - frame->local = NULL; - STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL); - marker_local_unref (local); - return 0; + return 0; } - int32_t -marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +marker_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t dict_free = _gf_false; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto unlink_wind; + if (priv->feature_enabled == 0) + goto unlink_wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); + local->xflag = xflag; + if (xdata) + local->xdata = dict_ref(xdata); + MARKER_INIT_LOCAL(frame, local); - MARKER_INIT_LOCAL (frame, local); + ret = loc_copy(&local->loc, loc); - ret = loc_copy (&local->loc, loc); + if (ret == -1) + goto err; - if (ret == -1) - goto err; + if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) { + local->skip_txn = 1; + goto unlink_wind; + } - STACK_WIND (frame, marker_unlink_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - return 0; + if (xdata == NULL) { + xdata = dict_new(); + dict_free = _gf_true; + } + + ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + if (ret < 0) + goto err; unlink_wind: - STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc); - return 0; + STACK_WIND(frame, marker_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + goto out; + err: - frame->local = NULL; - STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL); - marker_local_unref (local); - return 0; -} + MARKER_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); +out: + if (dict_free) + dict_unref(xdata); + return 0; +} int32_t -marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "linking a file ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "linking a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc); + if (priv->feature_enabled & GF_QUOTA) { + if (!local->skip_txn) + mq_create_xattrs_txn(this, &local->loc, buf); + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +marker_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, newloc); + ret = loc_copy(&local->loc, newloc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; + + if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) + local->skip_txn = 1; wind: - STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc); - return 0; + STACK_WIND(frame, marker_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; err: - STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_rename_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL, *oplocal = NULL; - loc_t newloc = {0, }; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL, *oplocal = NULL; + loc_t newloc = { + 0, + }; + marker_conf_t *priv = NULL; - local = frame->local; - oplocal = local->oplocal; + local = frame->local; + oplocal = local->oplocal; - priv = this->private; + priv = this->private; - frame->local = NULL; + frame->local = NULL; - if (op_ret < 0) { - if (local->err == 0) { - local->err = op_errno; - } + if (op_ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)", + oplocal->parent_loc.path, + uuid_utoa(oplocal->parent_loc.inode->gfid), strerror(op_errno)); + } - gf_log (this->name, GF_LOG_WARNING, - "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)", - local->parent_loc.path, - uuid_utoa (local->parent_loc.inode->gfid), - strerror (op_errno)); - } + if (local->err != 0) + goto err; - if (local->stub != NULL) { - call_resume (local->stub); - local->stub = NULL; - } else if (local->err != 0) { - STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL, - NULL, NULL, NULL); - } + mq_reduce_parent_size_txn(this, &oplocal->loc, &oplocal->contribution, -1, + NULL); - mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution); + if (local->loc.inode != NULL) { + /* If destination file exits before rename, it would have + * been unlinked while renaming a file + */ + mq_reduce_parent_size_txn(this, &local->loc, NULL, local->ia_nlink, + NULL); + } + + newloc.inode = inode_ref(oplocal->loc.inode); + newloc.path = gf_strdup(local->loc.path); + newloc.name = strrchr(newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref(local->loc.parent); + + mq_create_xattrs_txn(this, &newloc, &local->buf); + + loc_wipe(&newloc); + + if (priv->feature_enabled & GF_XTIME) { + if (!local->loc.inode) + local->loc.inode = inode_ref(oplocal->loc.inode); + // update marks on oldpath + gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks(this, oplocal); + marker_xtime_update_marks(this, local); + } - if (local->loc.inode != NULL) { - mq_reduce_parent_size (this, &local->loc, local->contribution); - } +err: + marker_local_unref(local); + marker_local_unref(oplocal); - newloc.inode = inode_ref (oplocal->loc.inode); - newloc.path = gf_strdup (local->loc.path); - newloc.name = strrchr (newloc.path, '/'); - if (newloc.name) - newloc.name++; - newloc.parent = inode_ref (local->loc.parent); + return 0; +} + +void +marker_rename_release_oldp_lock(marker_local_t *local, xlator_t *this) +{ + marker_local_t *oplocal = NULL; + call_frame_t *lk_frame = NULL; + struct gf_flock lock = { + 0, + }; - mq_rename_update_newpath (this, &newloc); + oplocal = local->oplocal; + lk_frame = local->lk_frame; - loc_wipe (&newloc); + if (lk_frame == NULL) + goto err; - if (priv->feature_enabled & GF_XTIME) { - //update marks on oldpath - marker_xtime_update_marks (this, oplocal); - marker_xtime_update_marks (this, local); - } + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; - marker_local_unref (local); - marker_local_unref (oplocal); - return 0; -} + STACK_WIND(lk_frame, marker_rename_done, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, this->name, + &oplocal->parent_loc, F_SETLKW, &lock, NULL); + return; + +err: + marker_local_unref(local); + marker_local_unref(oplocal); +} int32_t -marker_rename_release_newp_lock (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno) +marker_rename_unwind(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL, *oplocal = NULL; - struct gf_flock lock = {0, }; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contri = NULL; + + local = frame->local; + oplocal = local->oplocal; + frame->local = NULL; - local = frame->local; + // Reset frame uid and gid if set. + if (cookie == (void *)_GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID(frame, frame->root, local); + + if (op_ret < 0) + local->err = op_errno ? op_errno : EINVAL; + + if (local->stub != NULL) { + /* Remove contribution node from in-memory even if + * remove-xattr has failed as the rename is already performed + * if local->stub is set, which means rename was successful + */ + (void)mq_inode_ctx_get(oplocal->loc.inode, this, &ctx); + if (ctx) { + contri = mq_get_contribution_node(oplocal->loc.parent, ctx); + if (contri) { + QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri); + GF_REF_PUT(contri); + } + } + + call_resume(local->stub); + local->stub = NULL; + local->err = 0; + } else if (local->err != 0) { + STACK_UNWIND_STRICT(rename, frame, -1, local->err, NULL, NULL, NULL, + NULL, NULL, NULL); + } else { + gf_log(this->name, GF_LOG_CRITICAL, + "continuation stub to unwind the call is absent, hence " + "call will be hung (call-stack id = %" PRIu64 ")", + frame->root->unique); + } + + /* If there are in-progress writes on old-path when during rename + * operation, update txn will update the wrong path if lock + * is released before rename unwind. + * So release lock only after rename unwind + */ + marker_rename_release_oldp_lock(local, this); + + return 0; +} + +int32_t +marker_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + call_stub_t *stub = NULL; + int32_t ret = 0; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + loc_t newloc = { + 0, + }; + + local = (marker_local_t *)frame->local; + + if (local != NULL) { oplocal = local->oplocal; + } - if (op_ret < 0) { - if (local->err == 0) { - local->err = op_errno; - } + priv = this->private; - gf_log (this->name, GF_LOG_WARNING, - "inodelk (UNLOCK) failed on %s (gfid:%s) (%s)", - oplocal->parent_loc.path, - uuid_utoa (oplocal->parent_loc.inode->gfid), - strerror (op_errno)); + if (op_ret < 0) { + if (local != NULL) { + local->err = op_errno; } - if (local->next_lock_on == NULL) { - marker_rename_done (frame, NULL, this, 0, 0); - goto out; + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "renaming a file ", + strerror(op_errno)); + } + + if (priv->feature_enabled & GF_QUOTA) { + if ((op_ret < 0) || (local == NULL)) { + goto quota_err; + } + + local->ia_nlink = 0; + if (xdata) + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, + &local->ia_nlink); + + local->buf = *buf; + stub = fop_rename_cbk_stub(frame, default_rename_cbk, op_ret, op_errno, + buf, preoldparent, postoldparent, + prenewparent, postnewparent, xdata); + if (stub == NULL) { + local->err = ENOMEM; + goto quota_err; } - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; + local->stub = stub; - STACK_WIND (frame, - marker_rename_done, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &local->parent_loc, F_SETLKW, &lock); + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = ENOMEM; + goto quota_err; + } -out: - return 0; -} + /* Removexattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID(frame, local, frame->root); + + newloc.inode = inode_ref(oplocal->loc.inode); + newloc.path = gf_strdup(local->loc.path); + newloc.name = strrchr(newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref(local->loc.parent); + gf_uuid_copy(newloc.gfid, oplocal->loc.inode->gfid); + STACK_WIND_COOKIE( + frame, marker_rename_unwind, frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &newloc, contri_key, NULL); -int32_t -marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno) -{ - marker_local_t *local = NULL, *oplocal = NULL; - struct gf_flock lock = {0, }; + loc_wipe(&newloc); + } else { + frame->local = NULL; - local = frame->local; - oplocal = local->oplocal; + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); - if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; + if ((op_ret < 0) || (local == NULL)) { + goto out; } - //Reset frame uid and gid if set. - if (cookie == (void *) _GF_UID_GID_CHANGED) - MARKER_RESET_UID_GID (frame, frame->root, local); - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND (frame, - marker_rename_release_newp_lock, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &oplocal->parent_loc, F_SETLKW, &lock); - return 0; + if (priv->feature_enabled & GF_XTIME) { + // update marks on oldpath + if (!local->loc.inode) + local->loc.inode = inode_ref(oplocal->loc.inode); + gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks(this, oplocal); + marker_xtime_update_marks(this, local); + } + } + +out: + if (!(priv->feature_enabled & GF_QUOTA)) { + marker_local_unref(local); + marker_local_unref(oplocal); + } + + return 0; + +quota_err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; } +int32_t +marker_do_rename(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + quota_meta_t contribution = { + 0, + }; + + local = frame->local; + oplocal = local->oplocal; + + // Reset frame uid and gid if set. + if (cookie == (void *)_GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID(frame, frame->root, local); + + if ((op_ret < 0) && (op_errno != ENOATTR) && (op_errno != ENODATA)) { + local->err = op_errno ? op_errno : EINVAL; + gf_log(this->name, GF_LOG_WARNING, + "fetching contribution values from %s (gfid:%s) " + "failed (%s)", + oplocal->loc.path, uuid_utoa(oplocal->loc.inode->gfid), + strerror(op_errno)); + goto err; + } + + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, keylen); + if (keylen < 0) { + local->err = errno ? errno : ENOMEM; + goto err; + } + quota_dict_get_meta(dict, contri_key, keylen, &contribution); + oplocal->contribution = contribution; + + STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &oplocal->loc, &local->loc, + local->xdata); + + return 0; + +err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; +} int32_t -marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) +marker_get_oldpath_contribution(call_frame_t *lk_frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + call_frame_t *frame = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int32_t ret = 0; + + local = lk_frame->local; + oplocal = local->oplocal; + frame = local->frame; + + if (op_ret < 0) { + local->err = op_errno ? op_errno : EINVAL; + gf_log(this->name, GF_LOG_WARNING, + "cannot hold inodelk on %s (gfid:%s) (%s)", oplocal->loc.path, + uuid_utoa(oplocal->loc.inode->gfid), strerror(op_errno)); + if (local->lk_frame) { + STACK_DESTROY(local->lk_frame->root); + local->lk_frame = NULL; + } + goto err; + } + + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno ? errno : ENOMEM; + goto err; + } + + /* getxattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID(frame, local, frame->root); + + if (gf_uuid_is_null(oplocal->loc.gfid)) + gf_uuid_copy(oplocal->loc.gfid, oplocal->loc.inode->gfid); + + GF_UUID_ASSERT(oplocal->loc.gfid); + + STACK_WIND_COOKIE(frame, marker_do_rename, frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, &oplocal->loc, + contri_key, NULL); + + return 0; +err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; +} + +/* For a marker_rename FOP, following is the algorithm used for Quota + * accounting. The use-case considered is: + * 1. rename (src, dst) + * 2. both src and dst exist + * 3. there are parallel operations on src and dst (lets say through fds + * opened on them before rename was initiated). + * + * PS: We've not thought through whether this algo works in the presence of + * hardlinks to src and/or dst. + * + * Algorithm: + * ========== + * + * 1) set inodelk on src-parent + * As part of rename operation, parent can change for the file. + * We need to remove contribution (both on disk xattr and in-memory one) + * to src-parent (and its ancestors) and add the contribution to dst-parent + * (and its ancestors). While we are doing these operations, contribution of + * the file/directory shouldn't be changing as we want to be sure that + * a) what we subtract from src-parent is exactly what we add to dst-parent + * b) we should subtract from src-parent exactly what we contributed to + * src-parent + * So, We hold a lock on src-parent to block any parallel transcations on + * src-inode (since that's the one which survives rename). + * + * If there are any parallel transactions on dst-inode they keep succeeding + * till the association of dst-inode with dst-parent is broken because of an + * inode_rename after unwind of rename fop from marker. Only after unwind + * (and hence inode_rename), we delete and subtract the contribution of + * dst-inode to dst-parent. That way we are making sure we subtract exactly + * what dst-inode contributed to dst-parent. + * + * 2) lookup contribution to src-parent on src-inode. + * We need to save the contribution info for use at step-8. + * + * 3) wind rename + * Perform rename on disk + * + * 4) remove xattr on src-loc + * After rename, parent can change, so + * need to remove xattrs storing contribution to src-parent. + * + * 5) remove contribution node corresponding to src-parent from the in-memory + * list. + * After rename, contri gfid can change and we have + * also removed xattr from file. + * We need to remove in-memory contribution node to prevent updations to + * src-parent even after a successful rename + * + * 6) unwind rename + * This will ensure that rename is done in the server + * inode table. An inode_rename disassociates src-inode from src-parent and + * associates it with dst-parent. It also disassociates dst-inode from + * dst-parent. After inode_rename, inode_parent on src-inode will give + * dst-parent and inode_parent on dst-inode will return NULL (assuming + * dst-inode doesn't have any hardlinks). + * + * 7) release inodelk on src-parent + * Lock on src-parent should be released only after + * rename on disk, remove xattr and rename_unwind (and hence inode_rename) + * operations. If lock is released before inode_rename, a parallel + * transaction on src-inode can still update src-parent (as inode_parent on + * src-inode can still return src-parent). This would make the + * contribution from src-inode to src-parent stored in step-2 stale. + * + * 8) Initiate mq_reduce_parent_size_txn on src-parent to remove contribution + * of src-inode to src-parent. We use the contribution stored in step-2. + * Since, we had acquired the lock on src-parent all along step-2 through + * inode_rename, we can be sure that a parallel transaction wouldn't have + * added a delta to src-parent. + * + * 9) Initiate mq_reduce_parent_size_txn on dst-parent if dst-inode exists. + * The size reduced from dst-parent and its ancestors is the + * size stored as contribution to dst-parent in dst-inode. + * If the destination file had existed, rename will unlink the + * destination file as part of its operation. + * We need to reduce the size on the dest parent similarly to + * unlink. Since, we are initiating reduce-parent-size transaction after + * inode_rename, we can be sure that a parallel transaction wouldn't add + * delta to dst-parent while we are reducing the contribution of dst-inode + * from its ancestors before rename. + * + * 10) create contribution xattr to dst-parent on src-inode. + */ +int32_t +marker_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - call_stub_t *stub = NULL; - int32_t ret = 0; - char contri_key [512] = {0, }; - loc_t newloc = {0, }; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + marker_conf_t *priv = NULL; + struct gf_flock lock = { + 0, + }; - local = (marker_local_t *) frame->local; + priv = this->private; - if (local != NULL) { - oplocal = local->oplocal; - } + if (priv->feature_enabled == 0) + goto rename_wind; - priv = this->private; + local = mem_get0(this->local_pool); - if (op_ret < 0) { - if (local != NULL) { - local->err = op_errno; - } + MARKER_INIT_LOCAL(frame, local); - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "renaming a file ", strerror (op_errno)); - } + oplocal = mem_get0(this->local_pool); - if (priv->feature_enabled & GF_QUOTA) { - if ((op_ret < 0) || (local == NULL)) { - goto quota_err; - } - - stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret, - op_errno, buf, preoldparent, - postoldparent, prenewparent, - postnewparent); - if (stub == NULL) { - local->err = ENOMEM; - goto quota_err; - } - - local->stub = stub; - - GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); - if (ret < 0) { - local->err = ENOMEM; - goto quota_err; - } - - /* Removexattr requires uid and gid to be 0, - * reset them in the callback. - */ - MARKER_SET_UID_GID (frame, local, frame->root); - - newloc.inode = inode_ref (oplocal->loc.inode); - newloc.path = gf_strdup (local->loc.path); - newloc.name = strrchr (newloc.path, '/'); - if (newloc.name) - newloc.name++; - newloc.parent = inode_ref (local->loc.parent); - - STACK_WIND_COOKIE (frame, marker_rename_release_oldp_lock, - frame->cookie, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - &newloc, contri_key); - - loc_wipe (&newloc); - } else { - frame->local = NULL; + MARKER_INIT_LOCAL(frame, oplocal); - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, - preoldparent, postoldparent, prenewparent, - postnewparent); + frame->local = local; - if ((op_ret < 0) || (local == NULL)) { - goto out; - } + local->oplocal = marker_local_ref(oplocal); - if (priv->feature_enabled & GF_XTIME) { - //update marks on oldpath - marker_xtime_update_marks (this, oplocal); - marker_xtime_update_marks (this, local); - } - } + ret = loc_copy(&local->loc, newloc); + if (ret < 0) + goto err; -out: - if (!(priv->feature_enabled & GF_QUOTA)) { - marker_local_unref (local); - marker_local_unref (oplocal); - } + ret = loc_copy(&oplocal->loc, oldloc); + if (ret < 0) + goto err; - return 0; + if (!(priv->feature_enabled & GF_QUOTA)) { + goto rename_wind; + } -quota_err: - marker_rename_release_oldp_lock (frame, NULL, this, 0, 0); - return 0; -} + ret = mq_inode_loc_fill(NULL, newloc->parent, &local->parent_loc); + if (ret < 0) + goto err; + ret = mq_inode_loc_fill(NULL, oldloc->parent, &oplocal->parent_loc); + if (ret < 0) + goto err; -int32_t -marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; -{ - marker_local_t *local = NULL, *oplocal = NULL; - char contri_key[512] = {0, }; - int32_t ret = 0; - int64_t *contribution = 0; + local->xdata = xdata ? dict_ref(xdata) : dict_new(); + ret = dict_set_int32(local->xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + if (ret < 0) + goto err; - local = frame->local; - oplocal = local->oplocal; + local->frame = frame; + local->lk_frame = create_frame(this, this->ctx->pool); + if (local->lk_frame == NULL) + goto err; - //Reset frame uid and gid if set. - if (cookie == (void *) _GF_UID_GID_CHANGED) - MARKER_RESET_UID_GID (frame, frame->root, local); - - if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; - gf_log (this->name, GF_LOG_WARNING, - "fetching contribution values from %s (gfid:%s) " - "failed (%s)", local->loc.path, - uuid_utoa (local->loc.inode->gfid), - strerror (op_errno)); - goto err; - } + local->lk_frame->root->uid = 0; + local->lk_frame->root->gid = 0; + local->lk_frame->local = local; + set_lk_owner_from_ptr(&local->lk_frame->root->lk_owner, + local->lk_frame->root); - if (local->loc.inode != NULL) { - GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); - if (ret < 0) { - local->err = errno; - goto err; - } - - if (dict_get_bin (dict, contri_key, - (void **) &contribution) == 0) { - local->contribution = ntoh64 (*contribution); - } - } + STACK_WIND(local->lk_frame, marker_get_oldpath_contribution, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk, this->name, + &oplocal->parent_loc, F_SETLKW, &lock, NULL); - STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &oplocal->loc, - &local->loc); + return 0; - return 0; +rename_wind: + STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; err: - marker_rename_release_oldp_lock (frame, NULL, this, 0, 0); - return 0; + MARKER_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, + NULL); + marker_local_unref(oplocal); + + return 0; } +int32_t +marker_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "truncating a file ", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) { + /* DHT Rebalance process, at the end of migration will + * first make the src file as a linkto file and then + * truncate the file. By doing a truncate after making the + * src file as linkto file, the contri which is already + * accounted is left over. + * So, we need to account for the linkto file when a truncate + * happens, thereby updating the contri properly. + * By passing NULL for postbuf, mq_prevalidate does not check + * for linkto file. + * Same happens with ftruncate as well. + */ + if (postbuf && IS_DHT_LINKFILE_MODE(postbuf)) + mq_initiate_quota_txn(this, &local->loc, NULL); + else + mq_initiate_quota_txn(this, &local->loc, postbuf); + } + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + +out: + marker_local_unref(local); + + return 0; +} int32_t -marker_get_newpath_contribution (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict) +marker_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - marker_local_t *local = NULL, *oplocal = NULL; - char contri_key[512] = {0, }; - int32_t ret = 0; - int64_t *contribution = 0; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - local = frame->local; - oplocal = local->oplocal; + priv = this->private; - //Reset frame uid and gid if set. - if (cookie == (void *) _GF_UID_GID_CHANGED) - MARKER_RESET_UID_GID (frame, frame->root, local); - - if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; - gf_log (this->name, GF_LOG_WARNING, - "fetching contribution values from %s (gfid:%s) " - "failed (%s)", oplocal->loc.path, - uuid_utoa (oplocal->loc.inode->gfid), - strerror (op_errno)); - goto err; - } + if (priv->feature_enabled == 0) + goto wind; - GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); - if (ret < 0) { - local->err = errno; - goto err; - } + local = mem_get0(this->local_pool); - if (dict_get_bin (dict, contri_key, (void **) &contribution) == 0) - oplocal->contribution = ntoh64 (*contribution); - - if (local->loc.inode != NULL) { - GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); - if (ret < 0) { - local->err = errno; - goto err; - } - - /* getxattr requires uid and gid to be 0, - * reset them in the callback. - */ - MARKER_SET_UID_GID (frame, local, frame->root); - - STACK_WIND_COOKIE (frame, marker_do_rename, - frame->cookie, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - &local->loc, contri_key); - } else { - marker_do_rename (frame, NULL, this, 0, 0, NULL); - } + MARKER_INIT_LOCAL(frame, local); - return 0; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; err: - marker_rename_release_oldp_lock (frame, NULL, this, 0, 0); - return 0; -} + MARKER_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} int32_t -marker_get_oldpath_contribution (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno) +marker_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL, *oplocal = NULL; - char contri_key[512] = {0, }; - int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - local = frame->local; - oplocal = local->oplocal; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "truncating a file ", + strerror(op_errno)); + } - if (op_ret < 0) { - local->err = op_errno; - gf_log (this->name, GF_LOG_WARNING, - "cannot hold inodelk on %s (gfid:%s) (%s)", - local->next_lock_on->path, - uuid_utoa (local->next_lock_on->inode->gfid), - strerror (op_errno)); - goto lock_err; - } + local = (marker_local_t *)frame->local; - GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); - if (ret < 0) { - local->err = errno; - goto quota_err; - } + frame->local = NULL; - /* getxattr requires uid and gid to be 0, - * reset them in the callback. - */ - MARKER_SET_UID_GID (frame, local, frame->root); + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - STACK_WIND_COOKIE (frame, marker_get_newpath_contribution, - frame->cookie, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - &oplocal->loc, contri_key); - return 0; + if (op_ret == -1 || local == NULL) + goto out; -quota_err: - marker_rename_release_oldp_lock (frame, NULL, this, 0, 0); - return 0; + priv = this->private; -lock_err: - if ((local->next_lock_on == NULL) - || (local->next_lock_on == &local->parent_loc)) { - local->next_lock_on = NULL; - marker_rename_release_oldp_lock (frame, NULL, this, 0, 0); - } else { - marker_rename_release_newp_lock (frame, NULL, this, 0, 0); - } + if (priv->feature_enabled & GF_QUOTA) { + if (postbuf && IS_DHT_LINKFILE_MODE(postbuf)) + mq_initiate_quota_txn(this, &local->loc, NULL); + else + mq_initiate_quota_txn(this, &local->loc, postbuf); + } - return 0; -} + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + return 0; +} int32_t -marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - marker_local_t *local = NULL, *oplocal = NULL; - loc_t *loc = NULL; - struct gf_flock lock = {0, }; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - local = frame->local; - oplocal = local->oplocal; + priv = this->private; - if (op_ret < 0) { - if (local->next_lock_on != &oplocal->parent_loc) { - loc = &oplocal->parent_loc; - } else { - loc = &local->parent_loc; - } - - local->err = op_errno; - gf_log (this->name, GF_LOG_WARNING, - "cannot hold inodelk on %s (gfid:%s) (%s)", - loc->path, uuid_utoa (loc->inode->gfid), - strerror (op_errno)); - goto err; - } + if (priv->feature_enabled == 0) + goto wind; - if (local->next_lock_on != NULL) { - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND (frame, - marker_get_oldpath_contribution, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, local->next_lock_on, - F_SETLKW, &lock); - } else { - marker_get_oldpath_contribution (frame, 0, this, 0, 0); - } + local = mem_get0(this->local_pool); - return 0; + MARKER_INIT_LOCAL(frame, local); + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; err: - marker_rename_done (frame, NULL, this, 0, 0); - return 0; -} + MARKER_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} int32_t -marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) +marker_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - marker_conf_t *priv = NULL; - struct gf_flock lock = {0, }; - loc_t *lock_on = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - priv = this->private; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "creating symlinks ", + strerror(op_errno)); + } - if (priv->feature_enabled == 0) - goto rename_wind; + local = (marker_local_t *)frame->local; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + frame->local = NULL; + priv = this->private; - MARKER_INIT_LOCAL (frame, local); - - ALLOCATE_OR_GOTO (oplocal, marker_local_t, err); - - MARKER_INIT_LOCAL (frame, oplocal); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - frame->local = local; + STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - local->oplocal = marker_local_ref (oplocal); + if (op_ret == -1 || local == NULL) + goto out; - ret = loc_copy (&local->loc, newloc); - if (ret < 0) - goto err; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - ret = loc_copy (&oplocal->loc, oldloc); - if (ret < 0) - goto err; + if (priv->feature_enabled & GF_QUOTA) { + mq_create_xattrs_txn(this, &local->loc, buf); + } - if (!(priv->feature_enabled & GF_QUOTA)) { - goto rename_wind; - } + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); - ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc); - if (ret < 0) - goto err; + return 0; +} - ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc); - if (ret < 0) - goto err; +int +marker_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if ((newloc->inode != NULL) && (newloc->parent != oldloc->parent) - && (uuid_compare (newloc->parent->gfid, - oldloc->parent->gfid) < 0)) { - lock_on = &local->parent_loc; - local->next_lock_on = &oplocal->parent_loc; - } else { - lock_on = &oplocal->parent_loc; - if ((newloc->inode != NULL) && (newloc->parent - != oldloc->parent)) { - local->next_lock_on = &local->parent_loc; - } - } + priv = this->private; - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; + if (priv->feature_enabled == 0) + goto wind; - STACK_WIND (frame, - marker_rename_inodelk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, lock_on, - F_SETLKW, &lock); + local = mem_get0(this->local_pool); - return 0; + MARKER_INIT_LOCAL(frame, local); -rename_wind: - STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc); + ret = loc_copy(&local->loc, loc); - return 0; + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata); + return 0; err: - STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(symlink, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } - int32_t -marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +marker_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "truncating a file ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred with " + "mknod ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - priv = this->private; + if (op_ret == -1 || local == NULL) + goto out; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc); + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG(local->mode))) { + mq_create_xattrs_txn(this, &local->loc, buf); + } + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } -int32_t -marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +int +marker_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + local->mode = mode; + + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - return 0; + STACK_WIND(frame, marker_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; err: - STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +marker_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "truncating a file ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "fallocating a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - return 0; + STACK_WIND(frame, marker_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; err: - STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during discard", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_set_inode_xattr (this, &local->loc); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } -int -marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, dict_t *params) +int32_t +marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkpath, loc, params); - return 0; + STACK_WIND(frame, marker_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; err: - STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL); - return 0; -} + MARKER_STACK_UNWIND(discard, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} int32_t -marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during zerofill", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, - buf, preparent, postparent); + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) { - mq_set_inode_xattr (this, &local->loc); - } + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } -int -marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *parms) +int32_t +marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; - - priv = this->private; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (priv->feature_enabled == 0) - goto wind; + priv = this->private; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + if (priv->feature_enabled == 0) + goto wind; - MARKER_INIT_LOCAL (frame, local); + local = mem_get0(this->local_pool); - ret = loc_copy (&local->loc, loc); + MARKER_INIT_LOCAL(frame, local); - local->mode = mode; + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, parms); - return 0; + STACK_WIND(frame, marker_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; err: - STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL); - return 0; -} + MARKER_STACK_UNWIND(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} /* when a call from the special client is received on * key trusted.glusterfs.volume-mark with value "RESET" @@ -1735,761 +2323,1246 @@ err: * timestamp file. */ int32_t -call_from_sp_client_to_reset_tmfile (call_frame_t *frame, - xlator_t *this, - dict_t *dict) +call_from_sp_client_to_reset_tmfile(call_frame_t *frame, xlator_t *this, + dict_t *dict) { - int32_t fd = 0; - int32_t op_ret = 0; - int32_t op_errno = 0; - data_t *data = NULL; - marker_conf_t *priv = NULL; + int32_t fd = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + data_t *data = NULL; + marker_conf_t *priv = NULL; + + if (frame == NULL || this == NULL || dict == NULL) + return -1; - if (frame == NULL || this == NULL || dict == NULL) - return -1; + priv = this->private; - priv = this->private; + data = dict_get(dict, "trusted.glusterfs.volume-mark"); + if (data == NULL) + return -1; - data = dict_get (dict, "trusted.glusterfs.volume-mark"); - if (data == NULL) - return -1; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + op_ret = -1; + op_errno = EPERM; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - op_ret = -1; - op_errno = EPERM; + goto out; + } - goto out; + if (data->len == 0 || + (data->len == 5 && memcmp(data->data, "RESET", 5) == 0)) { + fd = open(priv->timestamp_file, O_WRONLY | O_TRUNC); + if (fd != -1) { + /* TODO check whether the O_TRUNC would update the + * timestamps on a zero length file on all machies. + */ + sys_close(fd); } - if (data->len == 0 || (data->len == 5 && - memcmp (data->data, "RESET", 5) == 0)) { - fd = open (priv->timestamp_file, O_WRONLY|O_TRUNC); - if (fd != -1) { - /* TODO check whether the O_TRUNC would update the - * timestamps on a zero length file on all machies. - */ - close (fd); - } - - if (fd != -1 || errno == ENOENT) { - op_ret = 0; - op_errno = 0; - } else { - op_ret = -1; - op_errno = errno; - } + if (fd != -1 || errno == ENOENT) { + op_ret = 0; + op_errno = 0; } else { - op_ret = -1; - op_errno = EINVAL; + op_ret = -1; + op_errno = errno; } + } else { + op_ret = -1; + op_errno = EINVAL; + } out: - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL); - return 0; + return 0; } - int32_t -marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "setxattr ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; +} + +int +remove_quota_keys(dict_t *dict, char *k, data_t *v, void *data) +{ + call_frame_t *frame = data; + marker_local_t *local = frame->local; + xlator_t *this = frame->this; + marker_conf_t *priv = NULL; + char ver_str[NAME_MAX] = { + 0, + }; + char *dot = NULL; + int ret = -1; + + priv = this->private; + + /* If quota is enabled immediately after disable. + * quota healing starts creating new xattrs + * before completing the cleanup operation. + * So we should check if the xattr is the new. + * Do not remove xattr if its xattr + * version is same as current version + */ + if ((priv->feature_enabled & GF_QUOTA) && priv->version > 0) { + snprintf(ver_str, sizeof(ver_str), ".%d", priv->version); + dot = strrchr(k, '.'); + if (dot && !strcmp(dot, ver_str)) + return 0; + } + + ret = syncop_removexattr(FIRST_CHILD(this), &local->loc, k, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "%s: Failed to remove " + "extended attribute: %s", + local->loc.path, k); + return -1; + } + return 0; +} + +int +quota_xattr_cleaner_cbk(int ret, call_frame_t *frame, void *args) +{ + dict_t *xdata = args; + int op_ret = -1; + int op_errno = 0; + + op_ret = (ret < 0) ? -1 : 0; + op_errno = -ret; + + MARKER_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return ret; +} + +int +quota_xattr_cleaner(void *args) +{ + struct synctask *task = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + marker_local_t *local = NULL; + dict_t *xdata = NULL; + int ret = -1; + + task = synctask_get(); + if (!task) + goto out; + + frame = task->frame; + this = frame->this; + local = frame->local; + + ret = syncop_listxattr(FIRST_CHILD(this), &local->loc, &xdata, NULL, NULL); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = dict_foreach_fnmatch(xdata, "trusted.glusterfs.quota.*", + remove_quota_keys, frame); + if (ret == -1) { + ret = -errno; + goto out; + } + ret = dict_foreach_fnmatch(xdata, PGFID_XATTR_KEY_PREFIX "*", + remove_quota_keys, frame); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = 0; +out: + if (xdata) + dict_unref(xdata); + + return ret; +} + +int +marker_do_xattr_cleanup(call_frame_t *frame, xlator_t *this, dict_t *xdata, + loc_t *loc) +{ + int ret = -1; + marker_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + MARKER_INIT_LOCAL(frame, local); + + loc_copy(&local->loc, loc); + ret = synctask_new(this->ctx->env, quota_xattr_cleaner, + quota_xattr_cleaner_cbk, frame, xdata); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to create synctask " + "for cleaning up quota extended attributes"); + goto out; + } + + ret = 0; +out: + if (ret) + MARKER_STACK_UNWIND(setxattr, frame, -1, ENOMEM, xdata); + + return ret; +} + +static gf_boolean_t +marker_xattr_cleanup_cmd(dict_t *dict) +{ + return (dict_get(dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL); } int32_t -marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags) +marker_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + int op_errno = ENOMEM; + + priv = this->private; + + if (marker_xattr_cleanup_cmd(dict)) { + if (frame->root->uid != 0 || frame->root->gid != 0) { + op_errno = EPERM; + ret = -1; + goto err; + } - priv = this->private; + /* The following function does the cleanup and then unwinds the + * corresponding call*/ + loc_path(loc, NULL); + marker_do_xattr_cleanup(frame, this, xdata, loc); + return 0; + } - if (priv->feature_enabled == 0) - goto wind; + ret = marker_key_replace_with_ver(this, dict); + if (ret < 0) + goto err; - ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); - if (ret == 0) - return 0; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + ret = call_from_sp_client_to_reset_tmfile(frame, this, dict); + if (ret == 0) + return 0; - MARKER_INIT_LOCAL (frame, local); + local = mem_get0(this->local_pool); - ret = loc_copy (&local->loc, loc); + MARKER_INIT_LOCAL(frame, local); - if (ret == -1) - goto err; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags); - return 0; + STACK_WIND(frame, marker_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; err: - STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM); + MARKER_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } - int32_t -marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "fsetxattr", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags) +marker_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); - if (ret == 0) - return 0; + ret = call_from_sp_client_to_reset_tmfile(frame, this, dict); + if (ret == 0) + return 0; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags); - return 0; + STACK_WIND(frame, marker_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; err: - STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM); + MARKER_STACK_UNWIND(fsetxattr, frame, -1, ENOMEM, NULL); - return 0; + return 0; } - int32_t -marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost) +marker_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "fsetattr ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre, - statpost); + STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } - int32_t -marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) +marker_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid); - return 0; + STACK_WIND(frame, marker_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; err: - STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost) +marker_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - if (op_ret == -1) { - gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : - GF_LOG_ERROR), - "%s occurred during setattr of %s", - strerror (op_errno), - (local ? local->loc.path : "<nul>")); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during setattr of %s", + strerror(op_errno), (local ? local->loc.path : "<nul>")); + } - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, - statpost); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) +marker_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid); - return 0; + STACK_WIND(frame, marker_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; err: - STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "removing extended attribute", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; - - priv = this->private; +marker_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t ret = -1; + int32_t i = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + priv = this->private; + + if (name) { + for (i = 0; mq_ext_xattrs[i]; i++) { + if (strcmp(name, mq_ext_xattrs[i])) + continue; + + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto err; + name = key; + break; + } + } - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name); - return 0; + STACK_WIND(frame, marker_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; err: - STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM); + MARKER_STACK_UNWIND(removexattr, frame, -1, ENOMEM, NULL); - return 0; + return 0; } +static gf_boolean_t +__has_quota_xattrs(dict_t *xattrs) +{ + if (dict_foreach_match(xattrs, _is_quota_internal_xattr, NULL, + dict_null_foreach_fn, NULL) > 0) + return _gf_true; + + return _gf_false; +} int32_t -marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) +marker_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + dict_t *xattrs = NULL; + quota_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + + priv = this->private; + local = (marker_local_t *)frame->local; + frame->local = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "lookup failed with %s", + strerror(op_errno)); + goto unwind; + } + + ret = marker_key_set_ver(this, dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (dict && __has_quota_xattrs(dict)) { + xattrs = dict_copy_with_ref(dict, NULL); + if (!xattrs) { + op_ret = -1; + op_errno = ENOMEM; + } else { + marker_filter_internal_xattrs(this, xattrs); + } + } else if (dict) { + xattrs = dict_ref(dict); + } + + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } + +unwind: + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xattrs, + postparent); + + if (op_ret == -1 || local == NULL) + goto out; + + /* copy the gfid from the stat structure instead of inode, + * since if the lookup is fresh lookup, then the inode + * would have not yet linked to the inode table which happens + * in protocol/server. + */ + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); + + if (priv->feature_enabled & GF_QUOTA) { + mq_xattr_state(this, &local->loc, dict, buf); + } + +out: + marker_local_unref(local); + if (xattrs) + dict_unref(xattrs); + + return 0; +} + +int32_t +marker_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "lookup failed with %s", - strerror (op_errno)); - } + priv = this->private; - local = (marker_local_t *) frame->local; + xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!xattr_req) + goto err; - frame->local = NULL; + ret = marker_key_replace_with_ver(this, xattr_req); + if (ret < 0) + goto err; - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - dict, postparent); + if (priv->feature_enabled == 0) + goto wind; - if (op_ret == -1 || local == NULL) - goto out; + local = mem_get0(this->local_pool); + if (local == NULL) + goto err; - priv = this->private; + MARKER_INIT_LOCAL(frame, local); - if (priv->feature_enabled & GF_QUOTA) { - mq_xattr_state (this, &local->loc, dict, *buf); - } + ret = loc_copy(&local->loc, loc); + if (ret == -1) + goto err; -out: - marker_local_unref (local); + if ((priv->feature_enabled & GF_QUOTA)) + mq_req_xattr(this, loc, xattr_req, NULL, NULL); - return 0; +wind: + STACK_WIND(frame, marker_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + + dict_unref(xattr_req); + + return 0; +err: + MARKER_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); + + if (xattr_req) + dict_unref(xattr_req); + + return 0; } -int32_t -marker_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) +int +marker_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + gf_dirent_t *entry = NULL; + quota_inode_ctx_t *ctx = NULL; + int ret = -1; - priv = this->private; + if ((op_ret <= 0) || (entries == NULL)) { + goto out; + } - if (priv->feature_enabled == 0) - goto wind; + list_for_each_entry(entry, &entries->list, list) + { + if (entry->inode == NULL) + continue; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + ret = marker_key_set_ver(this, entry->dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + break; + } - MARKER_INIT_LOCAL (frame, local); + ctx = mq_inode_ctx_new(entry->inode, this); + if (ctx == NULL) + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(entry->inode->gfid)); + } - ret = loc_copy (&local->loc, loc); - if (ret == -1) - goto err; +out: + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} - if ((priv->feature_enabled & GF_QUOTA) && xattr_req) - mq_req_xattr (this, loc, xattr_req); -wind: - STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - return 0; -err: - STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL); +int +marker_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + loc_t loc = { + 0, + }; + int ret = -1; + char *resolvedpath = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret <= 0) + goto unwind; + + priv = this->private; + local = frame->local; + + if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) { + goto unwind; + } + + list_for_each_entry(entry, &entries->list, list) + { + if ((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0) || entry->inode == NULL) + continue; + + loc.parent = inode_ref(local->loc.inode); + loc.inode = inode_ref(entry->inode); + ret = inode_path(loc.parent, entry->d_name, &resolvedpath); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get the " + "path for the entry %s", + entry->d_name); + loc_wipe(&loc); + continue; + } - return 0; + loc.path = resolvedpath; + resolvedpath = NULL; + + ctx = mq_inode_ctx_new(loc.inode, this); + if (ctx == NULL) + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(loc.inode->gfid)); + + mq_xattr_state(this, &loc, entry->dict, &entry->d_stat); + loc_wipe(&loc); + + ret = marker_key_set_ver(this, entry->dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + } + +unwind: + MARKER_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; } -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; +int +marker_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + marker_conf_t *priv = NULL; + loc_t loc = { + 0, + }; + marker_local_t *local = NULL; + int ret = -1; + + priv = this->private; + + dict = dict ? dict_ref(dict) : dict_new(); + if (!dict) + goto unwind; + + ret = marker_key_replace_with_ver(this, dict); + if (ret < 0) + goto unwind; + + if (dict_get(dict, GET_ANCESTRY_DENTRY_KEY)) { + STACK_WIND(frame, marker_build_ancestry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + } else { + if (priv->feature_enabled & GF_QUOTA) { + local = mem_get0(this->local_pool); - if (!this) - return ret; + MARKER_INIT_LOCAL(frame, local); - ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1); + loc.parent = local->loc.inode = inode_ref(fd->inode); - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; + mq_req_xattr(this, &loc, dict, NULL, NULL); } - return ret; + STACK_WIND(frame, marker_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + } + + dict_unref(dict); + return 0; +unwind: + MARKER_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; } +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_marker_mt_end + 1); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + " failed"); + return ret; + } + + return ret; +} int32_t -init_xtime_priv (xlator_t *this, dict_t *options) +init_xtime_priv(xlator_t *this, dict_t *options) { - data_t *data = NULL; - int32_t ret = -1; - marker_conf_t *priv = NULL; + int32_t ret = -1; + marker_conf_t *priv = NULL; + char *tmp_opt = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO (this->name, options, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - priv = this->private; + priv = this->private; - if((data = dict_get (options, VOLUME_UUID)) != NULL) { - priv->volume_uuid = data->data; + ret = dict_get_str(options, "volume-uuid", &tmp_opt); - ret = uuid_parse (priv->volume_uuid, priv->volume_uuid_bin); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "invalid volume uuid %s", priv->volume_uuid); - goto out; - } + if (ret) { + priv->volume_uuid = NULL; + tmp_opt = ""; - ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s", - MARKER_XATTR_PREFIX, priv->volume_uuid, - XTIME); + gf_log(this->name, GF_LOG_ERROR, + "please specify the volume-uuid" + "in the translator options"); - if (ret == -1){ - priv->marker_xattr = NULL; + return -1; + } + gf_asprintf(&priv->volume_uuid, "%s", tmp_opt); - gf_log (this->name, GF_LOG_ERROR, - "Failed to allocate memory"); - goto out; - } + ret = gf_uuid_parse(priv->volume_uuid, priv->volume_uuid_bin); - gf_log (this->name, GF_LOG_DEBUG, - "the volume-uuid = %s", priv->volume_uuid); - } else { - priv->volume_uuid = NULL; + if (ret == -1) { + gf_log(this->name, GF_LOG_ERROR, "invalid volume uuid %s", + priv->volume_uuid); + goto out; + } - gf_log (this->name, GF_LOG_ERROR, - "please specify the volume-uuid" - "in the translator options"); + ret = gf_asprintf(&(priv->marker_xattr), "%s.%s.%s", MARKER_XATTR_PREFIX, + priv->volume_uuid, XTIME); - return -1; - } + if (ret == -1) { + priv->marker_xattr = NULL; + goto out; + } - if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) { - priv->timestamp_file = data->data; + gf_log(this->name, GF_LOG_DEBUG, "volume-uuid = %s", priv->volume_uuid); - gf_log (this->name, GF_LOG_DEBUG, - "the timestamp-file is = %s", - priv->timestamp_file); + ret = dict_get_str(options, "timestamp-file", &tmp_opt); + if (ret) { + priv->timestamp_file = NULL; + tmp_opt = ""; - } else { - priv->timestamp_file = NULL; + gf_log(this->name, GF_LOG_ERROR, + "please specify the timestamp-file" + "in the translator options"); - gf_log (this->name, GF_LOG_ERROR, - "please specify the timestamp-file" - "in the translator options"); + goto out; + } - goto out; - } + ret = gf_asprintf(&priv->timestamp_file, "%s", tmp_opt); + if (ret == -1) { + priv->timestamp_file = NULL; + goto out; + } - ret = 0; + gf_log(this->name, GF_LOG_DEBUG, "the timestamp-file is = %s", + priv->timestamp_file); + + ret = 0; out: - return ret; + return ret; } void -marker_xtime_priv_cleanup (xlator_t *this) +marker_xtime_priv_cleanup(xlator_t *this) { - marker_conf_t *priv = NULL; + marker_conf_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", this, out); - priv = (marker_conf_t *) this->private; + priv = (marker_conf_t *)this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, priv, out); - if (priv->volume_uuid != NULL) - GF_FREE (priv->volume_uuid); + GF_FREE(priv->volume_uuid); - if (priv->timestamp_file != NULL) - GF_FREE (priv->timestamp_file); + GF_FREE(priv->timestamp_file); - if (priv->marker_xattr != NULL) - GF_FREE (priv->marker_xattr); + GF_FREE(priv->marker_xattr); out: - return; + return; } void -marker_priv_cleanup (xlator_t *this) +marker_priv_cleanup(xlator_t *this) { - marker_conf_t *priv = NULL; + marker_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("marker", this, out); + + priv = (marker_conf_t *)this->private; - GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, priv, out); - priv = (marker_conf_t *) this->private; + marker_xtime_priv_cleanup(this); - GF_VALIDATE_OR_GOTO (this->name, priv, out); + LOCK_DESTROY(&priv->lock); - marker_xtime_priv_cleanup (this); + GF_FREE(priv); - LOCK_DESTROY (&priv->lock); + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } - GF_FREE (priv); out: - return; + return; } int32_t -reconfigure (xlator_t *this, dict_t *options) -{ - int32_t ret = -1; - data_t *data = NULL; - gf_boolean_t flag = _gf_false; - marker_conf_t *priv = NULL; - - GF_ASSERT (this); - GF_ASSERT (this->private); - - priv = this->private; - - priv->feature_enabled = 0; - - GF_VALIDATE_OR_GOTO (this->name, options, out); - - data = dict_get (options, "quota"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) { - ret = init_quota_priv (this); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to initialize quota private"); - } else { - priv->feature_enabled |= GF_QUOTA; - } - } - } +reconfigure(xlator_t *this, dict_t *options) +{ + int32_t ret = 0; + data_t *data = NULL; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; + int32_t version = 0; - data = dict_get (options, "xtime"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) { - marker_xtime_priv_cleanup (this); - - ret = init_xtime_priv (this, options); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to initialize xtime private, " - "xtime updation will fail"); - } else { - priv->feature_enabled |= GF_XTIME; - } - } - } + GF_ASSERT(this); + GF_ASSERT(this->private); + + priv = this->private; + + priv->feature_enabled = 0; + + GF_VALIDATE_OR_GOTO(this->name, options, out); + + data = dict_get(options, "quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_QUOTA; + } + + data = dict_get(options, "inode-quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_INODE_QUOTA; + } + + data = dict_get(options, "quota-version"); + if (data) + ret = gf_string2int32(data->data, &version); + + if (priv->feature_enabled) { + if (version >= 0) + priv->version = version; + else + gf_log(this->name, GF_LOG_ERROR, + "Invalid quota " + "version %d", + priv->version); + } + + data = dict_get(options, "xtime"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) { + marker_xtime_priv_cleanup(this); + + ret = init_xtime_priv(this, options); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to initialize xtime private, " + "xtime updation will fail"); + } else { + priv->feature_enabled |= GF_XTIME; + data = dict_get(options, "gsync-force-xtime"); + if (!data) + goto out; + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + } out: - return 0; + return ret; } - int32_t -init (xlator_t *this) -{ - dict_t *options = NULL; - data_t *data = NULL; - int32_t ret = 0; - gf_boolean_t flag = _gf_false; - marker_conf_t *priv = NULL; - - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "marker translator needs subvolume defined."); - return -1; - } +init(xlator_t *this) +{ + dict_t *options = NULL; + data_t *data = NULL; + int32_t ret = 0; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling."); - return -1; - } + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "marker translator needs subvolume defined."); + return -1; + } - options = this->options; + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "Volume is dangling."); + return -1; + } - ALLOCATE_OR_GOTO (this->private, marker_conf_t, err); + options = this->options; - priv = this->private; + ALLOCATE_OR_GOTO(this->private, marker_conf_t, err); - priv->feature_enabled = 0; + priv = this->private; - LOCK_INIT (&priv->lock); + priv->feature_enabled = 0; + priv->version = 0; - data = dict_get (options, "quota"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) { - ret = init_quota_priv (this); - if (ret < 0) - goto err; + LOCK_INIT(&priv->lock); - priv->feature_enabled |= GF_QUOTA; - } - } + data = dict_get(options, "quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_QUOTA; + } - data = dict_get (options, "xtime"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) { - ret = init_xtime_priv (this, options); - if (ret < 0) - goto err; + data = dict_get(options, "inode-quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_INODE_QUOTA; + } - priv->feature_enabled |= GF_XTIME; - } - } + data = dict_get(options, "quota-version"); + if (data) + ret = gf_string2int32(data->data, &priv->version); - return 0; + if ((ret == 0) && priv->feature_enabled && priv->version < 0) { + gf_log(this->name, GF_LOG_ERROR, "Invalid quota version %d", + priv->version); + goto err; + } + + data = dict_get(options, "xtime"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) { + ret = init_xtime_priv(this, options); + if (ret < 0) + goto err; + + priv->feature_enabled |= GF_XTIME; + data = dict_get(options, "gsync-force-xtime"); + if (!data) + goto cont; + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + +cont: + this->local_pool = mem_pool_new(marker_local_t, 128); + if (!this->local_pool) { + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto err; + } + + return 0; err: - marker_priv_cleanup (this); + marker_priv_cleanup(this); - return -1; + return -1; } int32_t -marker_forget (xlator_t *this, inode_t *inode) +marker_forget(xlator_t *this, inode_t *inode) { - marker_inode_ctx_t *ctx = NULL; - uint64_t value = 0; + marker_inode_ctx_t *ctx = NULL; + uint64_t value = 0; - if (inode_ctx_del (inode, this, &value) != 0) - goto out; + if (inode_ctx_del(inode, this, &value) != 0) + goto out; - ctx = (marker_inode_ctx_t *)(unsigned long)value; - if (ctx == NULL) { - goto out; - } + ctx = (marker_inode_ctx_t *)(unsigned long)value; + if (ctx == NULL) { + goto out; + } - mq_forget (this, ctx->quota_ctx); + mq_forget(this, ctx->quota_ctx); - GF_FREE (ctx); + GF_FREE(ctx); out: - return 0; + return 0; } void -fini (xlator_t *this) +fini(xlator_t *this) { - marker_priv_cleanup (this); + marker_priv_cleanup(this); } struct xlator_fops fops = { - .lookup = marker_lookup, - .create = marker_create, - .mkdir = marker_mkdir, - .writev = marker_writev, - .truncate = marker_truncate, - .ftruncate = marker_ftruncate, - .symlink = marker_symlink, - .link = marker_link, - .unlink = marker_unlink, - .rmdir = marker_rmdir, - .rename = marker_rename, - .mknod = marker_mknod, - .setxattr = marker_setxattr, - .fsetxattr = marker_fsetxattr, - .setattr = marker_setattr, - .fsetattr = marker_fsetattr, - .removexattr = marker_removexattr, - .getxattr = marker_getxattr + .lookup = marker_lookup, + .create = marker_create, + .mkdir = marker_mkdir, + .writev = marker_writev, + .truncate = marker_truncate, + .ftruncate = marker_ftruncate, + .symlink = marker_symlink, + .link = marker_link, + .unlink = marker_unlink, + .rmdir = marker_rmdir, + .rename = marker_rename, + .mknod = marker_mknod, + .setxattr = marker_setxattr, + .fsetxattr = marker_fsetxattr, + .setattr = marker_setattr, + .fsetattr = marker_fsetattr, + .removexattr = marker_removexattr, + .getxattr = marker_getxattr, + .readdirp = marker_readdirp, + .fallocate = marker_fallocate, + .discard = marker_discard, + .zerofill = marker_zerofill, }; -struct xlator_cbks cbks = { - .forget = marker_forget -}; +struct xlator_cbks cbks = {.forget = marker_forget}; struct volume_options options[] = { - {.key = {"volume-uuid"}}, - {.key = {"timestamp-file"}}, - {.key = {"quota"}}, - {.key = {"xtime"}}, - {.key = {NULL}} + {.key = {"volume-uuid"}, .default_value = "{{ volume.id }}"}, + {.key = {"timestamp-file"}}, + { + .key = {"quota"}, + .op_version = {1}, + .flags = OPT_FLAG_NONE, + .tags = {}, + }, + { + .key = {"inode-quota"}, + .op_version = {1}, + .flags = OPT_FLAG_NONE, + .tags = {}, + }, + { + .key = {"xtime"}, + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {}, + }, + { + .key = {"gsync-force-xtime"}, + .op_version = {2}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {}, + }, + { + .key = {"quota-version"}, + .flags = OPT_FLAG_NONE, + }, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "marker", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h index f5ed9df399e..4821094c14b 100644 --- a/xlators/features/marker/src/marker.h +++ b/xlators/features/marker/src/marker.h @@ -1,126 +1,147 @@ -/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _MARKER_H #define _MARKER_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include "marker-quota.h" -#include "xlator.h" -#include "defaults.h" -#include "uuid.h" -#include "call-stub.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat-uuid.h> +#include <glusterfs/call-stub.h> #define MARKER_XATTR_PREFIX "trusted.glusterfs" -#define XTIME "xtime" -#define VOLUME_MARK "volume-mark" -#define VOLUME_UUID "volume-uuid" -#define TIMESTAMP_FILE "timestamp-file" +#define XTIME "xtime" +#define VOLUME_MARK "volume-mark" +#define VOLUME_UUID "volume-uuid" +#define TIMESTAMP_FILE "timestamp-file" enum { - GF_QUOTA=1, - GF_XTIME=2 + GF_QUOTA = 1, + GF_XTIME = 2, + GF_XTIME_GSYNC_FORCE = 4, + GF_INODE_QUOTA = 8, }; /*initialize the local variable*/ -#define MARKER_INIT_LOCAL(_frame,_local) do { \ - _frame->local = _local; \ - _local->pid = _frame->root->pid; \ - memset (&_local->loc, 0, sizeof (loc_t)); \ - _local->ref = 1; \ - _local->uid = -1; \ - _local->gid = -1; \ - LOCK_INIT (&_local->lock); \ - _local->oplocal = NULL; \ - } while (0) +#define MARKER_INIT_LOCAL(_frame, _local) \ + do { \ + _frame->local = _local; \ + _local->pid = _frame->root->pid; \ + memset(&_local->loc, 0, sizeof(loc_t)); \ + _local->ref = 1; \ + _local->uid = -1; \ + _local->gid = -1; \ + LOCK_INIT(&_local->lock); \ + _local->oplocal = NULL; \ + } while (0) /* try alloc and if it fails, goto label */ -#define ALLOCATE_OR_GOTO(var, type, label) do { \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "out of memory :("); \ - goto label; \ - } \ - } while (0) - -#define _MARKER_SET_UID_GID(dest, src) \ - do { \ - if (src->uid != -1 && \ - src->gid != -1) { \ - dest->uid = src->uid; \ - dest->gid = src->gid; \ - } \ - } while (0) - -#define MARKER_SET_UID_GID(frame, dest, src) \ - do { \ - _MARKER_SET_UID_GID (dest, src); \ - frame->root->uid = 0; \ - frame->root->gid = 0; \ - frame->cookie = (void *) _GF_UID_GID_CHANGED; \ - } while (0) - -#define MARKER_RESET_UID_GID(frame, dest, src) \ - do { \ - _MARKER_SET_UID_GID (dest, src); \ - frame->cookie = NULL; \ - } while (0) - -struct marker_local{ - uint32_t timebuf[2]; - pid_t pid; - loc_t loc; - loc_t parent_loc; - loc_t *next_lock_on; - uid_t uid; - gid_t gid; - int32_t ref; - int32_t ia_nlink; - gf_lock_t lock; - mode_t mode; - int32_t err; - call_stub_t *stub; - int64_t contribution; - struct marker_local *oplocal; +#define ALLOCATE_OR_GOTO(var, type, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + gf_log(this->name, GF_LOG_ERROR, "out of memory :("); \ + goto label; \ + } \ + } while (0) + +#define _MARKER_SET_UID_GID(dest, src) \ + do { \ + if (src->uid != -1 && src->gid != -1) { \ + dest->uid = src->uid; \ + dest->gid = src->gid; \ + } \ + } while (0) + +#define MARKER_SET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID(dest, src); \ + frame->root->uid = 0; \ + frame->root->gid = 0; \ + frame->cookie = (void *)_GF_UID_GID_CHANGED; \ + } while (0) + +#define MARKER_RESET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID(dest, src); \ + frame->cookie = NULL; \ + } while (0) + +#define MARKER_STACK_UNWIND(fop, frame, params...) \ + do { \ + quota_local_t *_local = NULL; \ + if (frame) { \ + _local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (_local) \ + marker_local_unref(_local); \ + } while (0) + +struct marker_local { + uint32_t timebuf[2]; + pid_t pid; + loc_t loc; + loc_t parent_loc; + uid_t uid; + gid_t gid; + int32_t ref; + uint32_t ia_nlink; + struct iatt buf; + gf_lock_t lock; + mode_t mode; + int32_t err; + call_stub_t *stub; + call_frame_t *lk_frame; + quota_meta_t contribution; + struct marker_local *oplocal; + + /* marker quota specific */ + int64_t delta; + int64_t d_off; + int64_t sum; + int64_t size; + int32_t hl_count; + int32_t dentry_child_count; + + fd_t *fd; + call_frame_t *frame; + + quota_inode_ctx_t *ctx; + inode_contribution_t *contri; + + int xflag; + dict_t *xdata; + gf_boolean_t skip_txn; }; typedef struct marker_local marker_local_t; +#define quota_local_t marker_local_t + struct marker_inode_ctx { - struct quota_inode_ctx *quota_ctx; + struct quota_inode_ctx *quota_ctx; }; typedef struct marker_inode_ctx marker_inode_ctx_t; -struct marker_conf{ - char feature_enabled; - char *size_key; - char *dirty_key; - char *volume_uuid; - uuid_t volume_uuid_bin; - char *timestamp_file; - char *marker_xattr; - uint64_t quota_lk_owner; - gf_lock_t lock; +struct marker_conf { + char feature_enabled; + char *size_key; + char *dirty_key; + char *volume_uuid; + uuid_t volume_uuid_bin; + char *timestamp_file; + char *marker_xattr; + uint64_t quota_lk_owner; + gf_lock_t lock; + int32_t version; }; typedef struct marker_conf marker_conf_t; diff --git a/xlators/features/marker/utils/Makefile.am b/xlators/features/marker/utils/Makefile.am deleted file mode 100644 index 556951d9fb7..00000000000 --- a/xlators/features/marker/utils/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = syncdaemon src - -CLEANFILES = diff --git a/xlators/features/marker/utils/src/Makefile.am b/xlators/features/marker/utils/src/Makefile.am deleted file mode 100644 index 73c99cb76d8..00000000000 --- a/xlators/features/marker/utils/src/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -gsyncddir = $(libexecdir)/glusterfs - -gsyncd_PROGRAMS = gsyncd - -gsyncd_SOURCES = gsyncd.c procdiggy.c - -gsyncd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -gsyncd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS) - -noinst_HEADERS = procdiggy.h - -AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src\ - -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\ - -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\" - - -CLEANFILES = - -$(top_builddir)/libglusterfs/src/libglusterfs.la: - $(MAKE) -C $(top_builddir)/libglusterfs/src/ all diff --git a/xlators/features/marker/utils/src/gsyncd.c b/xlators/features/marker/utils/src/gsyncd.c deleted file mode 100644 index d554e562b23..00000000000 --- a/xlators/features/marker/utils/src/gsyncd.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <string.h> - -#include "common-utils.h" -#include "run.h" -#include "procdiggy.h" - -#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_" -#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_" -#define GSYNCD_CONF "geo-replication/gsyncd.conf" -#define GSYNCD_PY "gsyncd.py" -#define RSYNC "rsync" - -int restricted = 0; - -static int -duplexpand (void **buf, size_t tsiz, size_t *len) -{ - size_t osiz = tsiz * *len; - - *buf = realloc (*buf, osiz << 1); - if (!buf) - return -1; - - memset ((char *)*buf + osiz, 0, osiz); - *len <<= 1; - - return 0; -} - -static int -str2argv (char *str, char ***argv) -{ - char *p = NULL; - int argc = 0; - size_t argv_len = 32; - int ret = 0; - - assert (str); - str = strdup (str); - if (!str) - return -1; - - *argv = calloc (argv_len, sizeof (**argv)); - if (!*argv) - goto error; - - while ((p = strtok (str, " "))) { - str = NULL; - - argc++; - if (argc == argv_len) { - ret = duplexpand ((void *)argv, - sizeof (**argv), - &argv_len); - if (ret == -1) - goto error; - } - (*argv)[argc - 1] = p; - } - - return argc; - - error: - fprintf (stderr, "out of memory\n"); - return -1; -} - -static int -invoke_gsyncd (int argc, char **argv) -{ - char config_file[PATH_MAX] = {0,}; - size_t gluster_workdir_len = 0; - runner_t runner = {0,}; - int i = 0; - int j = 0; - char *nargv[argc + 4]; - - if (restricted) { - /* in restricted mode we forcibly use the system-wide config */ - runinit (&runner); - runner_add_args (&runner, SBIN_DIR"/gluster", - "--log-file=-", "system::", "getwd", - NULL); - runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); - if (runner_start (&runner) == 0 && - fgets (config_file, PATH_MAX, - runner_chio (&runner, STDOUT_FILENO)) != NULL && - config_file[strlen (config_file) - 1] == '\n' && - runner_end (&runner) == 0) - gluster_workdir_len = strlen (config_file) - 1; - - if (gluster_workdir_len) { - if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF) + 1 > - PATH_MAX) - goto error; - config_file[gluster_workdir_len] = '/'; - strcat (config_file, GSYNCD_CONF); - } else - goto error; - - if (setenv ("_GSYNCD_RESTRICTED_", "1", 1) == -1) - goto error; - } - - if (chdir ("/") == -1) - goto error; - - j = 0; - nargv[j++] = PYTHON; - nargv[j++] = GSYNCD_PREFIX"/python/syncdaemon/"GSYNCD_PY; - for (i = 1; i < argc; i++) - nargv[j++] = argv[i]; - if (config_file[0]) { - nargv[j++] = "-c"; - nargv[j++] = config_file; - } - nargv[j++] = NULL; - - execvp (PYTHON, nargv); - - fprintf (stderr, "exec of "PYTHON" failed\n"); - return 127; - - error: - fprintf (stderr, "gsyncd initializaion failed\n"); - return 1; -} - - -static int -find_gsyncd (pid_t pid, pid_t ppid, char *name, void *data) -{ - char buf[NAME_MAX * 2] = {0,}; - char *p = NULL; - int zeros = 0; - int ret = 0; - int fd = -1; - pid_t *pida = (pid_t *)data; - - if (ppid != pida[0]) - return 0; - - ret = gf_asprintf (&p, PROC"/%d/cmdline", pid); - if (ret == -1) { - fprintf (stderr, "out of memory\n"); - return -1; - } - - fd = open (p, O_RDONLY); - if (fd == -1) - return 0; - ret = read (fd, buf, sizeof (buf)); - close (fd); - if (ret == -1) - return 0; - for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++) - zeros += !*p; - - ret = 0; - switch (zeros) { - case 2: - if ((strcmp (basename (buf), basename (PYTHON)) || - strcmp (basename (buf + strlen (buf) + 1), GSYNCD_PY)) == 0) { - ret = 1; - break; - } - /* fallthrough */ - case 1: - if (strcmp (basename (buf), GSYNCD_PY) == 0) - ret = 1; - } - - if (ret == 1) { - if (pida[1] != -1) { - fprintf (stderr, GSYNCD_PY" sibling is not unique"); - return -1; - } - pida[1] = pid; - } - - return 0; -} - -static int -invoke_rsync (int argc, char **argv) -{ - int i = 0; - char *p = NULL; - pid_t pid = -1; - pid_t ppid = -1; - pid_t pida[] = {-1, -1}; - char *name = NULL; - char buf[PATH_MAX + 1] = {0,}; - int ret = 0; - - assert (argv[argc] == NULL); - - if (argc < 2 || strcmp (argv[1], "--server") != 0) - goto error; - - for (i = 2; i < argc && argv[i][0] == '-'; i++); - - if (!(i == argc - 2 && strcmp (argv[i], ".") == 0 && argv[i + 1][0] == '/')) { - fprintf (stderr, "need an rsync invocation without protected args\n"); - goto error; - } - - /* look up sshd we are spawned from */ - for (pid = getpid () ;; pid = ppid) { - ppid = pidinfo (pid, &name); - if (ppid < 0) { - fprintf (stderr, "sshd ancestor not found\n"); - goto error; - } - if (strcmp (name, "sshd") == 0) - break; - } - /* look up "ssh-sibling" gsyncd */ - pida[0] = pid; - ret = prociter (find_gsyncd, pida); - if (ret == -1 || pida[1] == -1) { - fprintf (stderr, "gsyncd sibling not found\n"); - goto error; - } - /* check if rsync target matches gsyncd target */ - if (gf_asprintf (&p, PROC"/%d/cwd", pida[1]) == -1) { - fprintf (stderr, "out of memory\n"); - goto error; - } - ret = readlink (p, buf, sizeof (buf)); - if (ret == -1 || ret == sizeof (buf)) - goto error; - if (strcmp (argv[argc - 1], "/") == 0 /* root dir cannot be a target */ || - (strcmp (argv[argc - 1], p) /* match against gluster target */ && - strcmp (argv[argc - 1], buf) /* match against file target */) != 0) { - fprintf (stderr, "rsync target does not match "GEOREP" session\n"); - goto error; - } - - argv[0] = RSYNC; - - execvp (RSYNC, argv); - - fprintf (stderr, "exec of "RSYNC" failed\n"); - return 127; - - error: - fprintf (stderr, "disallowed "RSYNC" invocation\n"); - return 1; -} - - -struct invocable { - char *name; - int (*invoker) (int argc, char **argv); -}; - -struct invocable invocables[] = { - { "rsync", invoke_rsync }, - { "gsyncd", invoke_gsyncd }, - { NULL, NULL} -}; - -int -main (int argc, char **argv) -{ - char *evas = NULL; - struct invocable *i = NULL; - char *b = NULL; - char *sargv = NULL; - - evas = getenv (_GLUSTERD_CALLED_); - if (evas && strcmp (evas, "1") == 0) - /* OK, we know glusterd called us, no need to look for further config - * ... altough this conclusion should not inherit to our children - */ - unsetenv (_GLUSTERD_CALLED_); - else { - /* we regard all gsyncd invocations unsafe - * that do not come from glusterd and - * therefore restrict it - */ - restricted = 1; - - if (!getenv (_GSYNCD_DISPATCHED_)) { - evas = getenv ("SSH_ORIGINAL_COMMAND"); - if (evas) - sargv = evas; - else { - evas = getenv ("SHELL"); - if (evas && strcmp (basename (evas), "gsyncd") == 0 && - argc == 3 && strcmp (argv[1], "-c") == 0) - sargv = argv[2]; - } - } - - } - - if (!(sargv && restricted)) - return invoke_gsyncd (argc, argv); - - argc = str2argv (sargv, &argv); - if (argc == -1 || setenv (_GSYNCD_DISPATCHED_, "1", 1) == -1) { - fprintf (stderr, "internal error\n"); - return 1; - } - - b = basename (argv[0]); - for (i = invocables; i->name; i++) { - if (strcmp (b, i->name) == 0) - return i->invoker (argc, argv); - } - - fprintf (stderr, "invoking %s in restricted SSH session is not allowed\n", - b); - - return 1; -} diff --git a/xlators/features/marker/utils/src/procdiggy.c b/xlators/features/marker/utils/src/procdiggy.c deleted file mode 100644 index fc0f97999d6..00000000000 --- a/xlators/features/marker/utils/src/procdiggy.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <string.h> -#include <ctype.h> - -#include "common-utils.h" -#include "procdiggy.h" - -pid_t -pidinfo (pid_t pid, char **name) -{ - char buf[NAME_MAX * 2] = {0,}; - FILE *f = NULL; - char *p = NULL; - int ret = 0; - - ret = gf_asprintf (&p, PROC"/%d/status", pid); - if (ret == -1) - goto oom; - - f = fopen (p, "r"); - if (!f) - return -1; - - if (name) - *name = NULL; - for (;;) { - memset (buf, 0, sizeof (buf)); - if (fgets (buf, sizeof (buf), f) == NULL || - buf[strlen (buf) - 1] != '\n') { - pid = -1; - goto out; - } - buf[strlen (buf) -1] = '\0'; - - if (name && !*name) { - p = strtail (buf, "Name:"); - if (p) { - while (isspace (*++p)); - *name = gf_strdup (p); - if (!*name) - goto oom; - continue; - } - } - - p = strtail (buf, "PPid:"); - if (p) - break; - } - - while (isspace (*++p)); - ret = gf_string2int (p, &pid); - if (ret == -1) - pid = -1; - - out: - fclose (f); - return pid; - - oom: - fclose (f); - fprintf (stderr, "out of memory\n"); - return -2; -} - -int -prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data), - void *data) -{ - char *name = NULL; - DIR *d = NULL; - struct dirent *de = NULL; - pid_t pid = -1; - pid_t ppid = -1; - int ret = 0; - - d = opendir (PROC); - while (errno = 0, de = readdir (d)) { - if (gf_string2int (de->d_name, &pid) != -1 && pid >= 0) { - ppid = pidinfo (pid, &name); - switch (ppid) { - case -1: continue; - case -2: return -1; - } - ret = proch (pid, ppid, name, data); - if (ret) - return ret; - } - } - if (errno) { - fprintf (stderr, "failed to traverse "PROC" (%s)\n", - strerror (errno)); - return -1; - } - - return 0; -} diff --git a/xlators/features/marker/utils/src/procdiggy.h b/xlators/features/marker/utils/src/procdiggy.h deleted file mode 100644 index f4586de6c67..00000000000 --- a/xlators/features/marker/utils/src/procdiggy.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#define PROC "/proc" - -pid_t pidinfo (pid_t pid, char **name); - -int prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data), - void *data); - diff --git a/xlators/features/marker/utils/syncdaemon/Makefile.am b/xlators/features/marker/utils/syncdaemon/Makefile.am deleted file mode 100644 index cc7cee102ea..00000000000 --- a/xlators/features/marker/utils/syncdaemon/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon - -syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \ - $(top_builddir)/contrib/ipaddr-py/ipaddr.py - -CLEANFILES = diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/xlators/features/marker/utils/syncdaemon/README.md deleted file mode 100644 index d45006932d1..00000000000 --- a/xlators/features/marker/utils/syncdaemon/README.md +++ /dev/null @@ -1,81 +0,0 @@ -gsycnd, the Gluster Syncdaemon -============================== - -REQUIREMENTS ------------- - -_gsyncd_ is a program which can operate either in _master_ or in _slave_ mode. -Requirements are categorized according to this. - -* supported OS is GNU/Linux -* Python >= 2.5, or 2.4 with Ctypes (see below) (both) -* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave) -* rsync (both) -* glusterfs with marker support (master); glusterfs (optional on slave) -* FUSE; for supported versions consult glusterfs - -INSTALLATION ------------- - -As of now, the supported way of operation is running from the source directory. - -If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/). - -CONFIGURATION -------------- - -gsyncd tunables are a subset of the long command-line options; for listing them, -type - - gsyncd.py --help - -and see the long options up to "--config-file". (The leading double dash should be omitted; -interim underscores and dashes are interchangeable.) The set of options bear some resemblance -to those of glusterfs and rsync. - -The config file format matches the following syntax: - - <option1>: <value1> - <option2>: <value2> - # comment - -By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_ -in the source tree. - -USAGE ------ - -gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally. -Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors -for it with gysncd: - -1. _/data/mirror_ -2. local gluster volume _yow_ -3. _/data/far_mirror_ at example.com -4. gluster volume _moz_ at example.com - -The respective gsyncd invocations are (demoing some syntax sugaring): - -1. - - gsyncd.py gluster://localhost:pop file:///data/mirror - - or short form - - gsyncd.py :pop /data/mirror - -2. `gsyncd :pop :yow` -3. - - gsyncd.py :pop ssh://example.com:/data/far_mirror - - or short form - - gsyncd.py :pop example.com:/data/far_mirror - -4. `gsyncd.py :pop example.com::moz` - -gsyncd has to be available on both sides; it's location on the remote side has to be specified -via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be -used for setting options on the remote side, although the suggested mode of operation is to -set parameters like log file / pid file in the configuration file.) diff --git a/xlators/features/marker/utils/syncdaemon/__codecheck.py b/xlators/features/marker/utils/syncdaemon/__codecheck.py deleted file mode 100644 index e3386afba8b..00000000000 --- a/xlators/features/marker/utils/syncdaemon/__codecheck.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import os.path -import sys -import tempfile -import shutil - -ipd = tempfile.mkdtemp(prefix = 'codecheck-aux') - -try: - # add a fake ipaddr module, we don't want to - # deal with the real one (just test our code) - f = open(os.path.join(ipd, 'ipaddr.py'), 'w') - f.write(""" -class IPAddress(object): - pass -class IPNetwork(list): - pass -""") - f.close() - sys.path.append(ipd) - - fl = os.listdir(os.path.dirname(sys.argv[0]) or '.') - fl.sort() - for f in fl: - if f[-3:] != '.py' or f[0] == '_': - continue - m = f[:-3] - sys.stdout.write('importing %s ...' % m) - __import__(m) - print(' OK.') - - def sys_argv_set(a): - sys.argv = sys.argv[:1] + a - - gsyncd = sys.modules['gsyncd'] - for a in [['--help'], ['--version'], ['--canonicalize-escape-url', '/foo']]: - print('>>> invoking program with args: %s' % ' '.join(a)) - pid = os.fork() - if not pid: - sys_argv_set(a) - gsyncd.main() - _, r = os.waitpid(pid, 0) - if r: - raise RuntimeError('invocation failed') -finally: - shutil.rmtree(ipd) diff --git a/xlators/features/marker/utils/syncdaemon/__init__.py b/xlators/features/marker/utils/syncdaemon/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/xlators/features/marker/utils/syncdaemon/__init__.py +++ /dev/null diff --git a/xlators/features/marker/utils/syncdaemon/configinterface.py b/xlators/features/marker/utils/syncdaemon/configinterface.py deleted file mode 100644 index e55bec519e9..00000000000 --- a/xlators/features/marker/utils/syncdaemon/configinterface.py +++ /dev/null @@ -1,224 +0,0 @@ -try: - import ConfigParser -except ImportError: - # py 3 - import configparser as ConfigParser -import re -from string import Template - -from syncdutils import escape, unescape, norm, update_file, GsyncdError - -SECT_ORD = '__section_order__' -SECT_META = '__meta__' -config_version = 2.0 - -re_type = type(re.compile('')) - - -class MultiDict(object): - """a virtual dict-like class which functions as the union of underlying dicts""" - - def __init__(self, *dd): - self.dicts = dd - - def __getitem__(self, key): - val = None - for d in self.dicts: - if d.get(key): - val = d[key] - if not val: - raise KeyError(key) - return val - - -class GConffile(object): - """A high-level interface to ConfigParser which flattens the two-tiered - config layout by implenting automatic section dispatch based on initial - parameters. - - Also ensure section ordering in terms of their time of addition -- a compat - hack for Python < 2.7. - """ - - def _normconfig(self): - """normalize config keys by s/-/_/g""" - for n, s in self.config._sections.items(): - if n.find('__') == 0: - continue - s2 = type(s)() - for k, v in s.items(): - if k.find('__') != 0: - k = norm(k) - s2[k] = v - self.config._sections[n] = s2 - - def __init__(self, path, peers, *dd): - """ - - .path: location of config file - - .config: underlying ConfigParser instance - - .peers: on behalf of whom we flatten .config - (master, or master-slave url pair) - - .auxdicts: template subtituents - """ - self.peers = peers - self.path = path - self.auxdicts = dd - self.config = ConfigParser.RawConfigParser() - self.config.read(path) - self._normconfig() - - def section(self, rx=False): - """get the section name of the section representing .peers in .config""" - peers = self.peers - if not peers: - peers = ['.', '.'] - rx = True - if rx: - st = 'peersrx' - else: - st = 'peers' - return ' '.join([st] + [escape(u) for u in peers]) - - @staticmethod - def parse_section(section): - """retrieve peers sequence encoded by section name - (as urls or regexen, depending on section type) - """ - sl = section.split() - st = sl.pop(0) - sl = [unescape(u) for u in sl] - if st == 'peersrx': - sl = [re.compile(u) for u in sl] - return sl - - def ord_sections(self): - """Return an ordered list of sections. - - Ordering happens based on the auxiliary - SECT_ORD section storing indices for each - section added through the config API. - - To not to go corrupt in case of manually - written config files, we take care to append - also those sections which are not registered - in SECT_ORD. - - Needed for python 2.{4,5,6} where ConfigParser - cannot yet order sections/options internally. - """ - so = {} - if self.config.has_section(SECT_ORD): - so = self.config._sections[SECT_ORD] - so2 = {} - for k, v in so.items(): - if k != '__name__': - so2[k] = int(v) - tv = 0 - if so2: - tv = max(so2.values()) + 1 - ss = [s for s in self.config.sections() if s.find('__') != 0] - for s in ss: - if s in so.keys(): - continue - so2[s] = tv - tv += 1 - def scmp(x, y): - return cmp(*(so2[s] for s in (x, y))) - ss.sort(scmp) - return ss - - def update_to(self, dct, allow_unresolved=False): - """update @dct from key/values of ours. - - key/values are collected from .config by filtering the regexp sections - according to match, and from .section. The values are treated as templates, - which are substituted from .auxdicts and (in case of regexp sections) - match groups. - """ - if not self.peers: - raise GsyncdError('no peers given, cannot select matching options') - def update_from_sect(sect, mud): - for k, v in self.config._sections[sect].items(): - if k == '__name__': - continue - if allow_unresolved: - dct[k] = Template(v).safe_substitute(mud) - else: - dct[k] = Template(v).substitute(mud) - for sect in self.ord_sections(): - sp = self.parse_section(sect) - if isinstance(sp[0], re_type) and len(sp) == len(self.peers): - match = True - mad = {} - for i in range(len(sp)): - m = sp[i].search(self.peers[i]) - if not m: - match = False - break - for j in range(len(m.groups())): - mad['match%d_%d' % (i+1, j+1)] = m.groups()[j] - if match: - update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts)) - if self.config.has_section(self.section()): - update_from_sect(self.section(), MultiDict(dct, *self.auxdicts)) - - def get(self, opt=None): - """print the matching key/value pairs from .config, - or if @opt given, the value for @opt (according to the - logic described in .update_to) - """ - d = {} - self.update_to(d, allow_unresolved = True) - if opt: - opt = norm(opt) - v = d.get(opt) - if v: - print(v) - else: - for k, v in d.iteritems(): - if k == '__name__': - continue - print("%s: %s" % (k, v)) - - def write(self, trfn, opt, *a, **kw): - """update on-disk config transactionally - - @trfn is the transaction function - """ - def mergeconf(f): - self.config = ConfigParser.RawConfigParser() - self.config.readfp(f) - self._normconfig() - if not self.config.has_section(SECT_META): - self.config.add_section(SECT_META) - self.config.set(SECT_META, 'version', config_version) - return trfn(norm(opt), *a, **kw) - def updateconf(f): - self.config.write(f) - update_file(self.path, updateconf, mergeconf) - - def _set(self, opt, val, rx=False): - """set @opt to @val in .section""" - sect = self.section(rx) - if not self.config.has_section(sect): - self.config.add_section(sect) - # regarding SECT_ORD, cf. ord_sections - if not self.config.has_section(SECT_ORD): - self.config.add_section(SECT_ORD) - self.config.set(SECT_ORD, sect, len(self.config._sections[SECT_ORD])) - self.config.set(sect, opt, val) - return True - - def set(self, opt, *a, **kw): - """perform ._set transactionally""" - self.write(self._set, opt, *a, **kw) - - def _delete(self, opt, rx=False): - """delete @opt from .section""" - sect = self.section(rx) - if self.config.has_section(sect): - return self.config.remove_option(sect, opt) - - def delete(self, opt, *a, **kw): - """perform ._delete transactionally""" - self.write(self._delete, opt, *a, **kw) diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py deleted file mode 100644 index 146c72a1825..00000000000 --- a/xlators/features/marker/utils/syncdaemon/gconf.py +++ /dev/null @@ -1,20 +0,0 @@ -import os - -class GConf(object): - """singleton class to store globals - shared between gsyncd modules""" - - ssh_ctl_dir = None - ssh_ctl_args = None - cpid = None - pid_file_owned = False - log_exit = False - permanent_handles = [] - log_metadata = {} - - @classmethod - def setup_ssh_ctl(cls, ctld): - cls.ssh_ctl_dir = ctld - cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")] - -gconf = GConf() diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py deleted file mode 100644 index f145cbc6d98..00000000000 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ /dev/null @@ -1,369 +0,0 @@ -#!/usr/bin/env python - -import os -import os.path -import sys -import time -import logging -import signal -import optparse -import fcntl -import fnmatch -from optparse import OptionParser, SUPPRESS_HELP -from logging import Logger -from errno import EEXIST, ENOENT - -from ipaddr import IPAddress, IPNetwork - -from gconf import gconf -from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception -from syncdutils import GsyncdError, select -from configinterface import GConffile -import resource -from monitor import monitor - -class GLogger(Logger): - """Logger customizations for gsyncd. - - It implements a log format similar to that of glusterfs. - """ - - def makeRecord(self, name, level, *a): - rv = Logger.makeRecord(self, name, level, *a) - rv.nsecs = (rv.created - int(rv.created)) * 1000000 - fr = sys._getframe(4) - callee = fr.f_locals.get('self') - if callee: - ctx = str(type(callee)).split("'")[1].split('.')[-1] - else: - ctx = '<top>' - if not hasattr(rv, 'funcName'): - rv.funcName = fr.f_code.co_name - rv.lvlnam = logging.getLevelName(level)[0] - rv.ctx = ctx - return rv - - @classmethod - def setup(cls, **kw): - lbl = kw.get('label', "") - if lbl: - lbl = '(' + lbl + ')' - lprm = {'datefmt': "%Y-%m-%d %H:%M:%S", - 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"} - lprm.update(kw) - lvl = kw.get('level', logging.INFO) - lprm['level'] = lvl - logging.root = cls("root", lvl) - logging.setLoggerClass(cls) - logging.getLogger().handlers = [] - logging.basicConfig(**lprm) - -def startup(**kw): - """set up logging, pidfile grabbing, daemonization""" - if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn': - if not grabpidfile(): - sys.stderr.write("pidfile is taken, exiting.\n") - sys.exit(2) - gconf.pid_file_owned = True - - if kw.get('go_daemon') == 'should': - x, y = os.pipe() - gconf.cpid = os.fork() - if gconf.cpid: - os.close(x) - sys.exit() - os.close(y) - os.setsid() - dn = os.open(os.devnull, os.O_RDWR) - for f in (sys.stdin, sys.stdout, sys.stderr): - os.dup2(dn, f.fileno()) - if getattr(gconf, 'pid_file', None): - if not grabpidfile(gconf.pid_file + '.tmp'): - raise GsyncdError("cannot grab temporary pidfile") - os.rename(gconf.pid_file + '.tmp', gconf.pid_file) - # wait for parent to terminate - # so we can start up with - # no messing from the dirty - # ol' bustard - select((x,), (), ()) - os.close(x) - - lkw = {} - if gconf.log_level: - lkw['level'] = gconf.log_level - if kw.get('log_file'): - if kw['log_file'] in ('-', '/dev/stderr'): - lkw['stream'] = sys.stderr - elif kw['log_file'] == '/dev/stdout': - lkw['stream'] = sys.stdout - else: - lkw['filename'] = kw['log_file'] - - GLogger.setup(label=kw.get('label'), **lkw) - - lkw.update({'saved_label': kw.get('label')}) - gconf.log_metadata = lkw - gconf.log_exit = True - -def main(): - """main routine, signal/exception handling boilerplates""" - signal.signal(signal.SIGTERM, lambda *a: finalize(*a, **{'exval': 1})) - GLogger.setup() - excont = FreeObject(exval = 0) - try: - try: - main_i() - except: - log_raise_exception(excont) - finally: - finalize(exval = excont.exval) - -def main_i(): - """internal main routine - - parse command line, decide what action will be taken; - we can either: - - query/manipulate configuration - - format gsyncd urls using gsyncd's url parsing engine - - start service in following modes, in given stages: - - monitor: startup(), monitor() - - master: startup(), connect_remote(), connect(), service_loop() - - slave: startup(), connect(), service_loop() - """ - rconf = {'go_daemon': 'should'} - - def store_abs(opt, optstr, val, parser): - if val and val != '-': - val = os.path.abspath(val) - setattr(parser.values, opt.dest, val) - def store_local(opt, optstr, val, parser): - rconf[opt.dest] = val - def store_local_curry(val): - return lambda o, oo, vx, p: store_local(o, oo, val, p) - def store_local_obj(op, dmake): - return lambda o, oo, vx, p: store_local(o, oo, FreeObject(op=op, **dmake(vx)), p) - - op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1") - op.add_option('--gluster-command-dir', metavar='DIR', default='') - op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs) - op.add_option('--gluster-log-level', metavar='LVL') - op.add_option('--gluster-params', metavar='PRMS', default='') - op.add_option('--gluster-cli-options', metavar='OPTS', default='--log-file=-') - op.add_option('--mountbroker', metavar='LABEL') - op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs) - op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs) - op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs) - op.add_option('--ignore-deletes', default=False, action='store_true') - op.add_option('-L', '--log-level', metavar='LVL') - op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0])) - op.add_option('--volume-id', metavar='UUID') - op.add_option('--session-owner', metavar='ID') - op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh') - op.add_option('--rsync-command', metavar='CMD', default='rsync') - op.add_option('--rsync-extra', metavar='ARGS', default='-S', help=SUPPRESS_HELP) - op.add_option('--timeout', metavar='SEC', type=int, default=120) - op.add_option('--sync-jobs', metavar='N', type=int, default=3) - op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP) - op.add_option('--allow-network', metavar='IPS', default='') - - op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local) - # duh. need to specify dest or value will be mapped to None :S - op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True)) - op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local) - op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True)) - op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont')) - op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a), - setattr(a[-1].values, 'log_file', '-'), - setattr(a[-1].values, 'log_level', 'DEBUG'))), - - for a in ('check', 'get'): - op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback', - callback=store_local_obj(a, lambda vx: {'opt': vx})) - op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_obj('get', lambda vx: {'opt': None})) - for m in ('', '-rx', '-glob'): - # call this code 'Pythonic' eh? - # have to define a one-shot local function to be able to inject (a value depending on the) - # iteration variable into the inner lambda - def conf_mod_opt_regex_variant(rx): - op.add_option('--config-set' + m, metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback', - callback=store_local_obj('set', lambda vx: {'opt': vx[0], 'val': vx[1], 'rx': rx})) - op.add_option('--config-del' + m, metavar='OPT', type=str, dest='config', action='callback', - callback=store_local_obj('del', lambda vx: {'opt': vx, 'rx': rx})) - conf_mod_opt_regex_variant(m and m[1:] or False) - - op.add_option('--normalize-url', dest='url_print', action='callback', callback=store_local_curry('normal')) - op.add_option('--canonicalize-url', dest='url_print', action='callback', callback=store_local_curry('canon')) - op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc')) - - tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, 'store_true', None) and o.get_opt_string() not in ('--version', '--help') ] - remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file' ] - rq_remote_tunables = { 'listen': True } - - # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults - # -- for this to work out we need to tell apart defaults from explicitly set - # options... so churn out the defaults here and call the parser with virgin - # values container. - defaults = op.get_default_values() - opts, args = op.parse_args(values=optparse.Values()) - confdata = rconf.get('config') - if not (len(args) == 2 or \ - (len(args) == 1 and rconf.get('listen')) or \ - (len(args) <= 2 and confdata) or \ - rconf.get('url_print')): - sys.stderr.write("error: incorrect number of arguments\n\n") - sys.stderr.write(op.get_usage() + "\n") - sys.exit(1) - - restricted = os.getenv('_GSYNCD_RESTRICTED_') - - if restricted: - allopts = {} - allopts.update(opts.__dict__) - allopts.update(rconf) - bannedtuns = set(allopts.keys()) - set(remote_tunables) - if bannedtuns: - raise GsyncdError('following tunables cannot be set with restricted SSH invocaton: ' + \ - ', '.join(bannedtuns)) - for k, v in rq_remote_tunables.items(): - if not k in allopts or allopts[k] != v: - raise GsyncdError('tunable %s is not set to value %s required for restricted SSH invocaton' % \ - (k, v)) - - confrx = getattr(confdata, 'rx', None) - if confrx: - # peers are regexen, don't try to parse them - if confrx == 'glob': - args = [ '\A' + fnmatch.translate(a) for a in args ] - canon_peers = args - namedict = {} - else: - rscs = [resource.parse_url(u) for u in args] - dc = rconf.get('url_print') - if dc: - for r in rscs: - print(r.get_url(**{'normal': {}, - 'canon': {'canonical': True}, - 'canon_esc': {'canonical': True, 'escaped': True}}[dc])) - return - local = remote = None - if rscs: - local = rscs[0] - if len(rscs) > 1: - remote = rscs[1] - if not local.can_connect_to(remote): - raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path)) - pa = ([], [], []) - urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True}) - for x in rscs: - for i in range(len(pa)): - pa[i].append(x.get_url(**urlprms[i])) - peers, canon_peers, canon_esc_peers = pa - # creating the namedict, a dict representing various ways of referring to / repreenting - # peers to be fillable in config templates - mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:]) - if remote: - rmap = { local: ('local', 'master'), remote: ('remote', 'slave') } - else: - rmap = { local: ('local', 'slave') } - namedict = {} - for i in range(len(rscs)): - x = rscs[i] - for name in rmap[x]: - for j in range(3): - namedict[mods[j](name)] = pa[j][i] - if x.scheme == 'gluster': - namedict[name + 'vol'] = x.volume - if not 'config_file' in rconf: - rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd.conf") - gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict) - - if confdata: - opt_ok = norm(confdata.opt) in tunables + [None] - if confdata.op == 'check': - if opt_ok: - sys.exit(0) - else: - sys.exit(1) - elif not opt_ok: - raise GsyncdError("not a valid option: " + confdata.opt) - if confdata.op == 'get': - gcnf.get(confdata.opt) - elif confdata.op == 'set': - gcnf.set(confdata.opt, confdata.val, confdata.rx) - elif confdata.op == 'del': - gcnf.delete(confdata.opt, confdata.rx) - return - - gconf.__dict__.update(defaults.__dict__) - gcnf.update_to(gconf.__dict__) - gconf.__dict__.update(opts.__dict__) - gconf.configinterface = gcnf - - if restricted and gconf.allow_network: - ssh_conn = os.getenv('SSH_CONNECTION') - if not ssh_conn: - #legacy env var - ssh_conn = os.getenv('SSH_CLIENT') - if ssh_conn: - allowed_networks = [ IPNetwork(a) for a in gconf.allow_network.split(',') ] - client_ip = IPAddress(ssh_conn.split()[0]) - allowed = False - for nw in allowed_networks: - if client_ip in nw: - allowed = True - break - if not allowed: - raise GsyncdError("client IP address is not allowed") - - ffd = rconf.get('feedback_fd') - if ffd: - fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - - #normalize loglevel - lvl0 = gconf.log_level - if isinstance(lvl0, str): - lvl1 = lvl0.upper() - lvl2 = logging.getLevelName(lvl1) - # I have _never_ _ever_ seen such an utterly braindead - # error condition - if lvl2 == "Level " + lvl1: - raise GsyncdError('cannot recognize log level "%s"' % lvl0) - gconf.log_level = lvl2 - - go_daemon = rconf['go_daemon'] - be_monitor = rconf.get('monitor') - - if not be_monitor and isinstance(remote, resource.SSH) and \ - go_daemon == 'should': - go_daemon = 'postconn' - log_file = None - else: - log_file = gconf.log_file - if be_monitor: - label = 'monitor' - elif remote: - #master - label = '' - else: - label = 'slave' - startup(go_daemon=go_daemon, log_file=log_file, label=label) - - if be_monitor: - return monitor() - - logging.info("syncing: %s" % " -> ".join(peers)) - resource.Popen.init_errhandler() - if remote: - go_daemon = remote.connect_remote(go_daemon=go_daemon) - if go_daemon: - startup(go_daemon=go_daemon, log_file=gconf.log_file) - # complete remote connection in child - remote.connect_remote(go_daemon='done') - local.connect() - if ffd: - os.close(ffd) - local.service_loop(*[r for r in [remote] if r]) - - -if __name__ == "__main__": - main() diff --git a/xlators/features/marker/utils/syncdaemon/libcxattr.py b/xlators/features/marker/utils/syncdaemon/libcxattr.py deleted file mode 100644 index f0a9d22920a..00000000000 --- a/xlators/features/marker/utils/syncdaemon/libcxattr.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -from ctypes import * -from ctypes.util import find_library - -class Xattr(object): - """singleton that wraps the extended attribues system - interface for python using ctypes - - Just implement it to the degree we need it, in particular - - we need just the l*xattr variants, ie. we never want symlinks to be - followed - - don't need size discovery for getxattr, as we always know the exact - sizes we expect - """ - - libc = CDLL(find_library("libc")) - - @classmethod - def geterrno(cls): - return c_int.in_dll(cls.libc, 'errno').value - - @classmethod - def raise_oserr(cls): - errn = cls.geterrno() - raise OSError(errn, os.strerror(errn)) - - @classmethod - def _query_xattr(cls, path, siz, syscall, *a): - if siz: - buf = create_string_buffer('\0' * siz) - else: - buf = None - ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz))) - if ret == -1: - cls.raise_oserr() - if siz: - return buf.raw[:ret] - else: - return ret - - @classmethod - def lgetxattr(cls, path, attr, siz=0): - return cls._query_xattr( path, siz, 'lgetxattr', attr) - - @classmethod - def llistxattr(cls, path, siz=0): - ret = cls._query_xattr(path, siz, 'llistxattr') - if isinstance(ret, str): - ret = ret.split('\0') - return ret - - @classmethod - def lsetxattr(cls, path, attr, val): - ret = cls.libc.lsetxattr(path, attr, val, len(val), 0) - if ret == -1: - cls.raise_oserr() - - @classmethod - def lremovexattr(cls, path, attr): - ret = cls.libc.lremovexattr(path, attr) - if ret == -1: - cls.raise_oserr() - - @classmethod - def llistxattr_buf(cls, path): - """listxattr variant with size discovery""" - size = cls.llistxattr(path) - if size == -1: - cls.raise_oserr() - if size == 0: - return [] - return cls.llistxattr(path, size) diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py deleted file mode 100644 index 9e54dc4faf2..00000000000 --- a/xlators/features/marker/utils/syncdaemon/master.py +++ /dev/null @@ -1,518 +0,0 @@ -import os -import sys -import time -import stat -import signal -import logging -import errno -from errno import ENOENT, ENODATA -from threading import currentThread, Condition, Lock - -from gconf import gconf -from syncdutils import FreeObject, Thread, GsyncdError, boolify - -URXTIME = (-1, 0) - -class GMaster(object): - """class impementling master role""" - - KFGN = 0 - KNAT = 1 - - def get_sys_volinfo(self): - """query volume marks on fs root - - err out on multiple foreign masters - """ - fgn_vis, nat_vi = self.master.server.foreign_volume_infos(), \ - self.master.server.native_volume_info() - fgn_vi = None - if fgn_vis: - if len(fgn_vis) > 1: - raise GsyncdError("cannot work with multiple foreign masters") - fgn_vi = fgn_vis[0] - return fgn_vi, nat_vi - - @property - def uuid(self): - if self.volinfo: - return self.volinfo['uuid'] - - @property - def volmark(self): - if self.volinfo: - return self.volinfo['volume_mark'] - - @property - def inter_master(self): - """decide if we are an intermediate master - in a cascading setup - """ - return self.volinfo_state[self.KFGN] and True or False - - def xtime(self, path, *a, **opts): - """get amended xtime - - as of amending, we can create missing xtime, or - determine a valid value if what we get is expired - (as of the volume mark expiry); way of amendig - depends on @opts and on subject of query (master - or slave). - """ - if a: - rsc = a[0] - else: - rsc = self.master - if not 'create' in opts: - opts['create'] = (rsc == self.master and not self.inter_master) - if not 'default_xtime' in opts: - if rsc == self.master and self.inter_master: - opts['default_xtime'] = ENODATA - else: - opts['default_xtime'] = URXTIME - xt = rsc.server.xtime(path, self.uuid) - if isinstance(xt, int) and xt != ENODATA: - return xt - invalid_xtime = (xt == ENODATA or xt < self.volmark) - if invalid_xtime and opts['create']: - t = time.time() - sec = int(t) - nsec = int((t - sec) * 1000000) - xt = (sec, nsec) - rsc.server.set_xtime(path, self.uuid, xt) - if invalid_xtime: - xt = opts['default_xtime'] - return xt - - def __init__(self, master, slave): - self.master = master - self.slave = slave - self.jobtab = {} - self.syncer = Syncer(slave) - # crawls vs. turns: - # - self.crawls is simply the number of crawl() invocations on root - # - one turn is a maximal consecutive sequence of crawls so that each - # crawl in it detects a change to be synced - # - self.turns is the number of turns since start - # - self.total_turns is a limit so that if self.turns reaches it, then - # we exit (for diagnostic purposes) - # so, eg., if the master fs changes unceasingly, self.turns will remain 0. - self.crawls = 0 - self.turns = 0 - self.total_turns = int(gconf.turns) - self.lastreport = {'crawls': 0, 'turns': 0} - self.start = None - self.change_seen = None - # the authoritative (foreign, native) volinfo pair - # which lets us deduce what to do when we refetch - # the volinfos from system - uuid_preset = getattr(gconf, 'volume_id', None) - self.volinfo_state = (uuid_preset and {'uuid': uuid_preset}, None) - # the actual volinfo we make use of - self.volinfo = None - self.terminate = False - - def crawl_loop(self): - """start the keep-alive thread and iterate .crawl""" - timo = int(gconf.timeout or 0) - if timo > 0: - def keep_alive(): - while True: - gap = timo * 0.5 - # first grab a reference as self.volinfo - # can be changed in main thread - vi = self.volinfo - if vi: - # then have a private copy which we can mod - vi = vi.copy() - vi['timeout'] = int(time.time()) + timo - else: - # send keep-alives more frequently to - # avoid a delay in announcing our volume info - # to slave if it becomes established in the - # meantime - gap = min(10, gap) - self.slave.server.keep_alive(vi) - time.sleep(gap) - t = Thread(target=keep_alive) - t.start() - self.lastreport['time'] = time.time() - while not self.terminate: - self.crawl() - - def add_job(self, path, label, job, *a, **kw): - """insert @job function to job table at @path with @label""" - if self.jobtab.get(path) == None: - self.jobtab[path] = [] - self.jobtab[path].append((label, a, lambda : job(*a, **kw))) - - def add_failjob(self, path, label): - """invoke .add_job with a job that does nothing just fails""" - logging.debug('salvaged: ' + label) - self.add_job(path, label, lambda: False) - - def wait(self, path, *args): - """perform jobs registered for @path - - Reset jobtab entry for @path, - determine success as the conjuction of - success of all the jobs. In case of - success, call .sendmark on @path - """ - jobs = self.jobtab.pop(path, []) - succeed = True - for j in jobs: - ret = j[-1]() - if not ret: - succeed = False - if succeed: - self.sendmark(path, *args) - return succeed - - def sendmark(self, path, mark, adct=None): - """update slave side xtime for @path to master side xtime - - also can send a setattr payload (see Server.setattr). - """ - if adct: - self.slave.server.setattr(path, adct) - self.slave.server.set_xtime(path, self.uuid, mark) - - @staticmethod - def volinfo_state_machine(volinfo_state, volinfo_sys): - """compute new volinfo_state from old one and incoming - as of current system state, also indicating if there was a - change regarding which volume mark is the authoritative one - - @volinfo_state, @volinfo_sys are pairs of volume mark dicts - (foreign, native). - - Note this method is marked as static, ie. the computation is - pure, without reliance on any excess implicit state. State - transitions which are deemed as ambiguous or banned will raise - an exception. - - """ - # store the value below "boxed" to emulate proper closures - # (variables of the enclosing scope are available inner functions - # provided they are no reassigned; mutation is OK). - param = FreeObject(relax_mismatch = False, state_change = None, index=-1) - def select_vi(vi0, vi): - param.index += 1 - if vi and (not vi0 or vi0['uuid'] == vi['uuid']): - if not vi0 and not param.relax_mismatch: - param.state_change = param.index - # valid new value found; for the rest, we are graceful about - # uuid mismatch - param.relax_mismatch = True - return vi - if vi0 and vi and vi0['uuid'] != vi['uuid'] and not param.relax_mismatch: - # uuid mismatch for master candidate, bail out - raise GsyncdError("aborting on uuid change from %s to %s" % \ - (vi0['uuid'], vi['uuid'])) - # fall back to old - return vi0 - newstate = tuple(select_vi(*vip) for vip in zip(volinfo_state, volinfo_sys)) - srep = lambda vi: vi and vi['uuid'][0:8] - logging.debug('(%s, %s) << (%s, %s) -> (%s, %s)' % \ - tuple(srep(vi) for vi in volinfo_state + volinfo_sys + newstate)) - return newstate, param.state_change - - def crawl(self, path='.', xtl=None): - """crawling... - - Standing around - All the right people - Crawling - Tennis on Tuesday - The ladder is long - It is your nature - You've gotta suntan - Football on Sunday - Society boy - - Recursively walk the master side tree and check if updates are - needed due to xtime differences. One invocation of crawl checks - children of @path and do a recursive enter only on - those directory children where there is an update needed. - - Way of updates depend on file type: - - for symlinks, sync them directy and synchronously - - for regular children, register jobs for @path (cf. .add_job) to start - and wait on their rsync - - for directory children, register a job for @path which waits (.wait) - on jobs for the given child - (other kind of filesystem nodes are not considered) - - Those slave side children which do not exist on master are simply - purged (see Server.purge). - - Behavior is fault tolerant, synchronization is adaptive: if some action fails, - just go on relentlessly, adding a fail job (see .add_failjob) which will prevent - the .sendmark on @path, so when the next crawl will arrive to @path it will not - see it as up-to-date and will try to sync it again. While this semantics can be - supported by funky design principles (http://c2.com/cgi/wiki?LazinessImpatienceHubris), - the ultimate reason which excludes other possibilities is simply transience: we cannot - assert that the file systems (master / slave) underneath do not change and actions - taken upon some condition will not lose their context by the time they are performed. - """ - if path == '.': - if self.start: - self.crawls += 1 - logging.debug("... crawl #%d done, took %.6f seconds" % \ - (self.crawls, time.time() - self.start)) - time.sleep(1) - self.start = time.time() - should_display_info = self.start - self.lastreport['time'] >= 60 - if should_display_info: - logging.info("completed %d crawls, %d turns", - self.crawls - self.lastreport['crawls'], - self.turns - self.lastreport['turns']) - self.lastreport.update(crawls = self.crawls, - turns = self.turns, - time = self.start) - volinfo_sys = self.get_sys_volinfo() - self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state, - volinfo_sys) - if self.inter_master: - self.volinfo = volinfo_sys[self.KFGN] - else: - self.volinfo = volinfo_sys[self.KNAT] - if state_change == self.KFGN or (state_change == self.KNAT and not self.inter_master): - logging.info('new master is %s', self.uuid) - if self.volinfo: - logging.info("%s master with volume id %s ..." % \ - (self.inter_master and "intermediate" or "primary", - self.uuid)) - if state_change == self.KFGN: - gconf.configinterface.set('volume_id', self.uuid) - if self.volinfo: - if self.volinfo['retval']: - raise GsyncdError ("master is corrupt") - else: - if should_display_info or self.crawls == 0: - if self.inter_master: - logging.info("waiting for being synced from %s ..." % \ - self.volinfo_state[self.KFGN]['uuid']) - else: - logging.info("waiting for volume info ...") - return - logging.debug("entering " + path) - if not xtl: - xtl = self.xtime(path) - if isinstance(xtl, int): - self.add_failjob(path, 'no-local-node') - return - xtr0 = self.xtime(path, self.slave) - if isinstance(xtr0, int): - if xtr0 != ENOENT: - self.slave.server.purge(path) - try: - self.slave.server.mkdir(path) - except OSError: - self.add_failjob(path, 'no-remote-node') - return - xtr = URXTIME - else: - xtr = xtr0 - if xtr > xtl: - raise GsyncdError("timestamp corruption for " + path) - if xtl == xtr: - if path == '.' and self.change_seen: - self.turns += 1 - self.change_seen = False - if self.total_turns: - logging.info("finished turn #%s/%s" % \ - (self.turns, self.total_turns)) - if self.turns == self.total_turns: - logging.info("reached turn limit") - self.terminate = True - return - if path == '.': - self.change_seen = True - try: - dem = self.master.server.entries(path) - except OSError: - self.add_failjob(path, 'local-entries-fail') - return - try: - des = self.slave.server.entries(path) - except OSError: - self.slave.server.purge(path) - try: - self.slave.server.mkdir(path) - des = self.slave.server.entries(path) - except OSError: - self.add_failjob(path, 'remote-entries-fail') - return - dd = set(des) - set(dem) - if dd and not boolify(gconf.ignore_deletes): - self.slave.server.purge(path, dd) - chld = [] - for e in dem: - e = os.path.join(path, e) - xte = self.xtime(e) - if isinstance(xte, int): - logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte])) - elif xte > xtr: - chld.append((e, xte)) - def indulgently(e, fnc, blame=None): - if not blame: - blame = path - try: - return fnc(e) - except (IOError, OSError): - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - logging.warn("salvaged ENOENT for" + e) - self.add_failjob(blame, 'by-indulgently') - return False - else: - raise - for e, xte in chld: - st = indulgently(e, lambda e: os.lstat(e)) - if st == False: - continue - mo = st.st_mode - adct = {'own': (st.st_uid, st.st_gid)} - if stat.S_ISLNK(mo): - if indulgently(e, lambda e: self.slave.server.symlink(os.readlink(e), e)) == False: - continue - self.sendmark(e, xte, adct) - elif stat.S_ISREG(mo): - logging.debug("syncing %s ..." % e) - pb = self.syncer.add(e) - def regjob(e, xte, pb): - if pb.wait(): - logging.debug("synced " + e) - self.sendmark(e, xte) - return True - else: - logging.error("failed to sync " + e) - self.add_job(path, 'reg', regjob, e, xte, pb) - elif stat.S_ISDIR(mo): - adct['mode'] = mo - if indulgently(e, lambda e: (self.add_job(path, 'cwait', self.wait, e, xte, adct), - self.crawl(e, xte), - True)[-1], blame=e) == False: - continue - else: - # ignore fifos, sockets and special files - pass - if path == '.': - self.wait(path, xtl) - -class BoxClosedErr(Exception): - pass - -class PostBox(list): - """synchronized collection for storing things thought of as "requests" """ - - def __init__(self, *a): - list.__init__(self, *a) - # too bad Python stdlib does not have read/write locks... - # it would suffivce to grab the lock in .append as reader, in .close as writer - self.lever = Condition() - self.open = True - self.done = False - - def wait(self): - """wait on requests to be processed""" - self.lever.acquire() - if not self.done: - self.lever.wait() - self.lever.release() - return self.result - - def wakeup(self, data): - """wake up requestors with the result""" - self.result = data - self.lever.acquire() - self.done = True - self.lever.notifyAll() - self.lever.release() - - def append(self, e): - """post a request""" - self.lever.acquire() - if not self.open: - raise BoxClosedErr - list.append(self, e) - self.lever.release() - - def close(self): - """prohibit the posting of further requests""" - self.lever.acquire() - self.open = False - self.lever.release() - -class Syncer(object): - """a staged queue to relay rsync requests to rsync workers - - By "staged queue" its meant that when a consumer comes to the - queue, it takes _all_ entries, leaving the queue empty. - (I don't know if there is an official term for this pattern.) - - The queue uses a PostBox to accumulate incoming items. - When a consumer (rsync worker) comes, a new PostBox is - set up and the old one is passed on to the consumer. - - Instead of the simplistic scheme of having one big lock - which synchronizes both the addition of new items and - PostBox exchanges, use a separate lock to arbitrate consumers, - and rely on PostBox's synchronization mechanisms take - care about additions. - - There is a corner case racy situation, producers vs. consumers, - which is not handled by this scheme: namely, when the PostBox - exchange occurs in between being passed to the producer for posting - and the post placement. But that's what Postbox.close is for: - such a posting will find the PostBox closed, in which case - the producer can re-try posting against the actual PostBox of - the queue. - - To aid accumlation of items in the PostBoxen before grabbed - by an rsync worker, the worker goes to sleep a bit after - each completed syncjob. - """ - - def __init__(self, slave): - """spawn worker threads""" - self.slave = slave - self.lock = Lock() - self.pb = PostBox() - for i in range(int(gconf.sync_jobs)): - t = Thread(target=self.syncjob) - t.start() - - def syncjob(self): - """the life of a worker""" - while True: - pb = None - while True: - self.lock.acquire() - if self.pb: - pb, self.pb = self.pb, PostBox() - self.lock.release() - if pb: - break - time.sleep(0.5) - pb.close() - po = self.slave.rsync(pb) - if po.returncode == 0: - ret = True - elif po.returncode in (23, 24): - # partial transfer (cf. rsync(1)), that's normal - ret = False - else: - po.errfail() - pb.wakeup(ret) - - def add(self, e): - while True: - pb = self.pb - try: - pb.append(e) - return pb - except BoxClosedErr: - pass diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py deleted file mode 100644 index 9536f3e2683..00000000000 --- a/xlators/features/marker/utils/syncdaemon/monitor.py +++ /dev/null @@ -1,123 +0,0 @@ -import os -import sys -import time -import signal -import logging -from gconf import gconf -from syncdutils import update_file, select, waitpid - -class Monitor(object): - """class which spawns and manages gsyncd workers""" - - def __init__(self): - self.state = None - - def set_state(self, state): - """set the state that can be used by external agents - like glusterd for status reporting""" - if state == self.state: - return - self.state = state - logging.info('new state: %s' % state) - if getattr(gconf, 'state_file', None): - update_file(gconf.state_file, lambda f: f.write(state + '\n')) - - def monitor(self): - """the monitor loop - - Basic logic is a blantantly simple blunt heuristics: - if spawned client survives 60 secs, it's considered OK. - This servers us pretty well as it's not vulneralbe to - any kind of irregular behavior of the child... - - ... well, except for one: if children is hung up on - waiting for some event, it can survive aeons, still - will be defunct. So we tweak the above logic to - expect the worker to send us a signal within 60 secs - (in the form of closing its end of a pipe). The worker - does this when it's done with the setup stage - ready to enter the service loop (note it's the setup - stage which is vulnerable to hangs -- the full - blown worker blows up on EPIPE if the net goes down, - due to the keep-alive thread) - """ - def sigcont_handler(*a): - """ - Re-init logging and send group kill signal - """ - md = gconf.log_metadata - logging.shutdown() - lcls = logging.getLoggerClass() - lcls.setup(label=md.get('saved_label'), **md) - pid = os.getpid() - os.kill(-pid, signal.SIGUSR1) - signal.signal(signal.SIGUSR1, lambda *a: ()) - signal.signal(signal.SIGCONT, sigcont_handler) - - argv = sys.argv[:] - for o in ('-N', '--no-daemon', '--monitor'): - while o in argv: - argv.remove(o) - argv.extend(('-N', '-p', '')) - argv.insert(0, os.path.basename(sys.executable)) - - self.set_state('starting...') - ret = 0 - def nwait(p, o=0): - p2, r = waitpid(p, o) - if not p2: - return - return r - def exit_signalled(s): - """ child teminated due to receipt of SIGUSR1 """ - return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1)) - def exit_status(s): - if os.WIFEXITED(s): - return os.WEXITSTATUS(s) - return 1 - conn_timeout = 60 - while ret in (0, 1): - logging.info('-' * conn_timeout) - logging.info('starting gsyncd worker') - pr, pw = os.pipe() - cpid = os.fork() - if cpid == 0: - os.close(pr) - os.execv(sys.executable, argv + ['--feedback-fd', str(pw)]) - os.close(pw) - t0 = time.time() - so = select((pr,), (), (), conn_timeout)[0] - os.close(pr) - if so: - ret = nwait(cpid, os.WNOHANG) - if ret != None: - logging.debug("worker died before establishing connection") - else: - logging.debug("worker seems to be connected (?? racy check)") - while time.time() < t0 + conn_timeout: - ret = nwait(cpid, os.WNOHANG) - if ret != None: - logging.debug("worker died in startup phase") - break - time.sleep(1) - else: - logging.debug("worker not confirmed in %d sec, aborting it" % \ - conn_timeout) - os.kill(cpid, signal.SIGKILL) - ret = nwait(cpid) - if ret == None: - self.set_state('OK') - ret = nwait(cpid) - if exit_signalled(ret): - ret = 0 - else: - ret = exit_status(ret) - if ret in (0,1): - self.set_state('faulty') - time.sleep(10) - self.set_state('inconsistent') - return ret - -def monitor(): - """oh yeah, actually Monitor is used as singleton, too""" - return Monitor().monitor() diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/xlators/features/marker/utils/syncdaemon/repce.py deleted file mode 100644 index 755fb61df48..00000000000 --- a/xlators/features/marker/utils/syncdaemon/repce.py +++ /dev/null @@ -1,225 +0,0 @@ -import os -import sys -import time -import logging -from threading import Condition -try: - import thread -except ImportError: - # py 3 - import _thread as thread -try: - from Queue import Queue -except ImportError: - # py 3 - from queue import Queue -try: - import cPickle as pickle -except ImportError: - # py 3 - import pickle - -from syncdutils import Thread, select - -pickle_proto = -1 -repce_version = 1.0 - -def ioparse(i, o): - if isinstance(i, int): - i = os.fdopen(i) - # rely on duck typing for recognizing - # streams as that works uniformly - # in py2 and py3 - if hasattr(o, 'fileno'): - o = o.fileno() - return (i, o) - -def send(out, *args): - """pickle args and write out wholly in one syscall - - ie. not use the ability of pickle to dump directly to - a stream, as that would potentially mess up messages - by interleaving them - """ - os.write(out, pickle.dumps(args, pickle_proto)) - -def recv(inf): - """load an object from input stream""" - return pickle.load(inf) - - -class RepceServer(object): - """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce - - ... also our homebrewed RPC backend where the transport layer is - reduced to a pair of filehandles. - - This is the server component. - """ - - def __init__(self, obj, i, o, wnum=6): - """register a backend object .obj to which incoming messages - are dispatched, also incoming/outcoming streams - """ - self.obj = obj - self.inf, self.out = ioparse(i, o) - self.wnum = wnum - self.q = Queue() - - def service_loop(self): - """fire up worker threads, get messages and dispatch among them""" - for i in range(self.wnum): - t = Thread(target=self.worker) - t.start() - try: - while True: - self.q.put(recv(self.inf)) - except EOFError: - logging.info("terminating on reaching EOF.") - - def worker(self): - """life of a worker - - Get message, extract its id, method name and arguments - (kwargs not supported), call method on .obj. - Send back message id + return value. - If method call throws an exception, rescue it, and send - back the exception as result (with flag marking it as - exception). - """ - while True: - in_data = self.q.get(True) - rid = in_data[0] - rmeth = in_data[1] - exc = False - if rmeth == '__repce_version__': - res = repce_version - else: - try: - res = getattr(self.obj, rmeth)(*in_data[2:]) - except: - res = sys.exc_info()[1] - exc = True - logging.exception("call failed: ") - send(self.out, rid, exc, res) - - -class RepceJob(object): - """class representing message status we can use - for waiting on reply""" - - def __init__(self, cbk): - """ - - .rid: (process-wise) unique id - - .cbk: what we do upon receiving reply - """ - self.rid = (os.getpid(), thread.get_ident(), time.time()) - self.cbk = cbk - self.lever = Condition() - self.done = False - - def __repr__(self): - return ':'.join([str(x) for x in self.rid]) - - def wait(self): - self.lever.acquire() - if not self.done: - self.lever.wait() - self.lever.release() - return self.result - - def wakeup(self, data): - self.result = data - self.lever.acquire() - self.done = True - self.lever.notify() - self.lever.release() - - -class RepceClient(object): - """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce - - ... also our homebrewed RPC backend where the transport layer is - reduced to a pair of filehandles. - - This is the client component. - """ - - def __init__(self, i, o): - self.inf, self.out = ioparse(i, o) - self.jtab = {} - t = Thread(target = self.listen) - t.start() - - def listen(self): - while True: - select((self.inf,), (), ()) - rid, exc, res = recv(self.inf) - rjob = self.jtab.pop(rid) - if rjob.cbk: - rjob.cbk(rjob, [exc, res]) - - def push(self, meth, *args, **kw): - """wrap arguments in a RepceJob, send them to server - and return the RepceJob - - @cbk to pass on RepceJob can be given as kwarg. - """ - cbk = kw.get('cbk') - if not cbk: - def cbk(rj, res): - if res[0]: - raise res[1] - rjob = RepceJob(cbk) - self.jtab[rjob.rid] = rjob - logging.debug("call %s %s%s ..." % (repr(rjob), meth, repr(args))) - send(self.out, rjob.rid, meth, *args) - return rjob - - def __call__(self, meth, *args): - """RePCe client is callabe, calling it implements a synchronous remote call - - We do a .push with a cbk which does a wakeup upon receiving anwser, then wait - on the RepceJob. - """ - rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)}) - exc, res = rjob.wait() - if exc: - logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__))) - raise res - logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res))) - return res - - class mprx(object): - """method proxy, standard trick to implement rubyesque method_missing - in Python - - A class is a closure factory, you know what I mean, or go read some SICP. - """ - def __init__(self, ins, meth): - self.ins = ins - self.meth = meth - - def __call__(self, *a): - return self.ins(self.meth, *a) - - def __getattr__(self, meth): - """this implements transparent method dispatch to remote object, - so that you don't need to call the RepceClient instance like - - rclient('how_old_are_you_if_born_in', 1979) - - but you can make it into an ordinary method call like - - rclient.how_old_are_you_if_born_in(1979) - """ - return self.mprx(self, meth) - - def __version__(self): - """used in handshake to verify compatibility""" - d = {'proto': self('__repce_version__')} - try: - d['object'] = self('version') - except AttributeError: - pass - return d diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py deleted file mode 100644 index 3454c38234a..00000000000 --- a/xlators/features/marker/utils/syncdaemon/resource.py +++ /dev/null @@ -1,837 +0,0 @@ -import re -import os -import sys -import stat -import time -import errno -import struct -import socket -import logging -import tempfile -import threading -import subprocess -from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR - -from gconf import gconf -import repce -from repce import RepceServer, RepceClient -from master import GMaster -import syncdutils -from syncdutils import GsyncdError, select - -UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z') -HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I) -UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+") - -def sup(x, *a, **kw): - """a rubyesque "super" for python ;) - - invoke caller method in parent class with given args. - """ - return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw) - -def desugar(ustr): - """transform sugared url strings to standard <scheme>://<urlbody> form - - parsing logic enforces the constraint that sugared forms should contatin - a ':' or a '/', which ensures that sugared urls do not conflict with - gluster volume names. - """ - m = re.match('([^:]*):(.*)', ustr) - if m: - if not m.groups()[0]: - return "gluster://localhost" + ustr - elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]): - return "ssh://" + ustr - else: - return "gluster://" + ustr - else: - if ustr[0] != '/': - raise GsyncdError("cannot resolve sugared url '%s'" % ustr) - ap = os.path.normpath(ustr) - if ap.startswith('//'): - ap = ap[1:] - return "file://" + ap - -def gethostbyname(hnam): - """gethostbyname wrapper""" - try: - return socket.gethostbyname(hnam) - except socket.gaierror: - ex = sys.exc_info()[1] - raise GsyncdError("failed to resolve %s: %s" % \ - (hnam, ex.strerror)) - -def parse_url(ustr): - """instantiate an url object by scheme-to-class dispatch - - The url classes taken into consideration are the ones in - this module whose names are full-caps. - """ - m = UrlRX.match(ustr) - if not m: - ustr = desugar(ustr) - m = UrlRX.match(ustr) - if not m: - raise GsyncdError("malformed url") - sch, path = m.groups() - this = sys.modules[__name__] - if not hasattr(this, sch.upper()): - raise GsyncdError("unknown url scheme " + sch) - return getattr(this, sch.upper())(path) - - -class _MetaXattr(object): - """singleton class, a lazy wrapper around the - libcxattr module - - libcxattr (a heavy import due to ctypes) is - loaded only when when the single - instance is tried to be used. - - This reduces runtime for those invocations - which do not need filesystem manipulation - (eg. for config, url parsing) - """ - - def __getattr__(self, meth): - from libcxattr import Xattr as LXattr - xmeth = [ m for m in dir(LXattr) if m[0] != '_' ] - if not meth in xmeth: - return - for m in xmeth: - setattr(self, m, getattr(LXattr, m)) - return getattr(self, meth) - -Xattr = _MetaXattr() - - -class Popen(subprocess.Popen): - """customized subclass of subprocess.Popen with a ring - buffer for children error ouput""" - - @classmethod - def init_errhandler(cls): - """start the thread which handles children's error output""" - cls.errstore = {} - def tailer(): - while True: - for po in select([po.stderr for po in cls.errstore], [], []): - po.lock.acquire() - try: - la = cls.errstore.get(po) - if la == None: - continue - l = os.read(po.stderr.fileno(), 1024) - tots = len(l) - for lx in la: - tots += len(lx) - while tots > 1<<20 and la: - tots -= len(la.pop(0)) - finally: - po.lock.release() - t = syncdutils.Thread(target = tailer) - t.start() - cls.errhandler = t - - def __init__(self, args, *a, **kw): - """customizations for subprocess.Popen instantiation - - - 'close_fds' is taken to be the default - - if child's stderr is chosen to be managed, - register it with the error handler thread - """ - self.args = args - if 'close_fds' not in kw: - kw['close_fds'] = True - self.lock = threading.Lock() - try: - sup(self, args, *a, **kw) - except: - ex = sys.exc_info()[1] - if not isinstance(ex, OSError): - raise - raise GsyncdError("""execution of "%s" failed with %s (%s)""" % \ - (args[0], errno.errorcode[ex.errno], os.strerror(ex.errno))) - if kw['stderr'] == subprocess.PIPE: - assert(getattr(self, 'errhandler', None)) - self.errstore[self] = [] - - def errfail(self): - """fail nicely if child did not terminate with success""" - filling = None - if self.elines: - filling = ", saying:" - logging.error("""command "%s" returned with %d%s""" % \ - (" ".join(self.args), self.returncode, filling)) - for l in self.elines: - for ll in l.rstrip().split("\n"): - logging.error(self.args[0] + "> " + ll.rstrip()) - syncdutils.finalize(exval = 1) - - def terminate_geterr(self, fail_on_err = True): - """kill child, finalize stderr harvesting (unregister - from errhandler, set up .elines), fail on error if - asked for - """ - self.lock.acquire() - try: - elines = self.errstore.pop(self) - finally: - self.lock.release() - if self.poll() == None: - self.terminate() - if sp.poll() == None: - time.sleep(0.1) - sp.kill() - while True: - b = os.read(self.stderr.fileno(), 1024) - if b: - elines.append(b) - else: - break - self.stderr.close() - self.elines = elines - if fail_on_err and self.returncode != 0: - self.errfail() - - -class Server(object): - """singleton implemening those filesystem access primitives - which are needed for geo-replication functionality - - (Singleton in the sense it's a class which has only static - and classmethods and is used directly, without instantiation.) - """ - - GX_NSPACE = "trusted.glusterfs" - NTV_FMTSTR = "!" + "B"*19 + "II" - FRGN_XTRA_FMT = "I" - FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT - - def _pathguard(f): - """decorator method that checks - the path argument of the decorated - functions to make sure it does not - point out of the managed tree - """ - - fc = getattr(f, 'func_code', None) - if not fc: - # python 3 - fc = f.__code__ - pi = list(fc.co_varnames).index('path') - def ff(*a): - path = a[pi] - ps = path.split('/') - if path[0] == '/' or '..' in ps: - raise ValueError('unsafe path') - return f(*a) - return ff - - @staticmethod - @_pathguard - def entries(path): - """directory entries in an array""" - # prevent symlinks being followed - if not stat.S_ISDIR(os.lstat(path).st_mode): - raise OSError(ENOTDIR, os.strerror(ENOTDIR)) - return os.listdir(path) - - @classmethod - @_pathguard - def purge(cls, path, entries=None): - """force-delete subtrees - - If @entries is not specified, delete - the whole subtree under @path (including - @path). - - Otherwise, @entries should be a - a sequence of children of @path, and - the effect is identical with a joint - @entries-less purge on them, ie. - - for e in entries: - cls.purge(os.path.join(path, e)) - """ - me_also = entries == None - if not entries: - try: - # if it's a symlink, prevent - # following it - try: - os.unlink(path) - return - except OSError: - ex = sys.exc_info()[1] - if ex.errno == EISDIR: - entries = os.listdir(path) - else: - raise - except OSError: - ex = sys.exc_info()[1] - if ex.errno in (ENOTDIR, ENOENT, ELOOP): - try: - os.unlink(path) - return - except OSError: - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - return - raise - else: - raise - for e in entries: - cls.purge(os.path.join(path, e)) - if me_also: - os.rmdir(path) - - @classmethod - @_pathguard - def _create(cls, path, ctor): - """path creation backend routine""" - try: - ctor(path) - except OSError: - ex = sys.exc_info()[1] - if ex.errno == EEXIST: - cls.purge(path) - return ctor(path) - raise - - @classmethod - @_pathguard - def mkdir(cls, path): - cls._create(path, os.mkdir) - - @classmethod - @_pathguard - def symlink(cls, lnk, path): - cls._create(path, lambda p: os.symlink(lnk, p)) - - @classmethod - @_pathguard - def xtime(cls, path, uuid): - """query xtime extended attribute - - Return xtime of @path for @uuid as a pair of integers. - "Normal" errors due to non-existent @path or extended attribute - are tolerated and errno is returned in such a case. - """ - - try: - return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8)) - except OSError: - ex = sys.exc_info()[1] - if ex.errno in (ENOENT, ENODATA, ENOTDIR): - return ex.errno - else: - raise - - @classmethod - @_pathguard - def set_xtime(cls, path, uuid, mark): - """set @mark as xtime for @uuid on @path""" - Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark)) - - @staticmethod - @_pathguard - def setattr(path, adct): - """set file attributes - - @adct is a dict, where 'own', 'mode' and 'times' - keys are looked for and values used to perform - chown, chmod or utimes on @path. - """ - own = adct.get('own') - if own: - os.lchown(path, *own) - mode = adct.get('mode') - if mode: - os.chmod(path, stat.S_IMODE(mode)) - times = adct.get('times') - if times: - os.utime(path, times) - - @staticmethod - def pid(): - return os.getpid() - - last_keep_alive = 0 - @classmethod - def keep_alive(cls, dct): - """process keepalive messages. - - Return keep-alive counter (number of received keep-alive - messages). - - Now the "keep-alive" message can also have a payload which is - used to set a foreign volume-mark on the underlying file system. - """ - if dct: - key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']]) - val = struct.pack(cls.FRGN_FMTSTR, - *(dct['version'] + - tuple(int(x,16) for x in re.findall('(?:[\da-f]){2}', dct['uuid'])) + - (dct['retval'],) + dct['volume_mark'][0:2] + (dct['timeout'],))) - Xattr.lsetxattr('.', key, val) - cls.last_keep_alive += 1 - return cls.last_keep_alive - - @staticmethod - def version(): - """version used in handshake""" - return 1.0 - - -class SlaveLocal(object): - """mix-in class to implement some factes of a slave server - - ("mix-in" is sort of like "abstract class", ie. it's not - instantiated just included in the ancesty DAG. I use "mix-in" - to indicate that it's not used as an abstract base class, - rather just taken in to implement additional functionality - on the basis of the assumed availability of certain interfaces.) - """ - - def can_connect_to(self, remote): - """determine our position in the connectibility matrix""" - return not remote - - def service_loop(self): - """start a RePCe server serving self's server - - stop servicing if a timeout is configured and got no - keep-alime in that inteval - """ - repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs)) - t = syncdutils.Thread(target=lambda: (repce.service_loop(), - syncdutils.finalize())) - t.start() - logging.info("slave listening") - if gconf.timeout and int(gconf.timeout) > 0: - while True: - lp = self.server.last_keep_alive - time.sleep(int(gconf.timeout)) - if lp == self.server.last_keep_alive: - logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout)) - break - else: - select((), (), ()) - -class SlaveRemote(object): - """mix-in class to implement an interface to a remote slave""" - - def connect_remote(self, rargs=[], **opts): - """connects to a remote slave - - Invoke an auxiliary utility (slave gsyncd, possibly wrapped) - which sets up the connection and set up a RePCe client to - communicate throuh its stdio. - """ - slave = opts.get('slave', self.url) - so = getattr(gconf, 'session_owner', None) - if so: - so_args = ['--session-owner', so] - else: - so_args = [] - po = Popen(rargs + gconf.remote_gsyncd.split() + so_args + \ - ['-N', '--listen', '--timeout', str(gconf.timeout), slave], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - gconf.transport = po - return self.start_fd_client(po.stdout, po.stdin, **opts) - - def start_fd_client(self, i, o, **opts): - """set up RePCe client, handshake with server - - It's cut out as a separate method to let - subclasses hook into client startup - """ - self.server = RepceClient(i, o) - rv = self.server.__version__() - exrv = {'proto': repce.repce_version, 'object': Server.version()} - da0 = (rv, exrv) - da1 = ({}, {}) - for i in range(2): - for k, v in da0[i].iteritems(): - da1[i][k] = int(v) - if da1[0] != da1[1]: - raise GsyncdError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv)) - - def rsync(self, files, *args): - """invoke rsync""" - if not files: - raise GsyncdError("no files to sync") - logging.debug("files: " + ", ".join(files)) - argv = gconf.rsync_command.split() + gconf.rsync_extra.split() + ['-aR'] + files + list(args) - po = Popen(argv, stderr=subprocess.PIPE) - po.wait() - po.terminate_geterr(fail_on_err = False) - return po - - -class AbstractUrl(object): - """abstract base class for url scheme classes""" - - def __init__(self, path, pattern): - m = re.search(pattern, path) - if not m: - raise GsyncdError("malformed path") - self.path = path - return m.groups() - - @property - def scheme(self): - return type(self).__name__.lower() - - def canonical_path(self): - return self.path - - def get_url(self, canonical=False, escaped=False): - """format self's url in various styles""" - if canonical: - pa = self.canonical_path() - else: - pa = self.path - u = "://".join((self.scheme, pa)) - if escaped: - u = syncdutils.escape(u) - return u - - @property - def url(self): - return self.get_url() - - - ### Concrete resource classes ### - - -class FILE(AbstractUrl, SlaveLocal, SlaveRemote): - """scheme class for file:// urls - - can be used to represent a file slave server - on slave side, or interface to a remote file - file server on master side - """ - - class FILEServer(Server): - """included server flavor""" - pass - - server = FILEServer - - def __init__(self, path): - sup(self, path, '^/') - - def connect(self): - """inhibit the resource beyond""" - os.chdir(self.path) - - def rsync(self, files): - return sup(self, files, self.path) - - -class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): - """scheme class for gluster:// urls - - can be used to represent a gluster slave server - on slave side, or interface to a remote gluster - slave on master side, or to represent master - (slave-ish features come from the mixins, master - functionality is outsourced to GMaster from master) - """ - - class GLUSTERServer(Server): - "server enhancements for a glusterfs backend""" - - @classmethod - def _attr_unpack_dict(cls, xattr, extra_fields = ''): - """generic volume mark fetching/parsing backed""" - fmt_string = cls.NTV_FMTSTR + extra_fields - buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string)) - vm = struct.unpack(fmt_string, buf) - m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]])) - uuid = '-'.join(m.groups()) - volinfo = { 'version': vm[0:2], - 'uuid' : uuid, - 'retval' : vm[18], - 'volume_mark': vm[19:21], - } - if extra_fields: - return volinfo, vm[-len(extra_fields):] - else: - return volinfo - - @classmethod - def foreign_volume_infos(cls): - """return list of valid (not expired) foreign volume marks""" - dict_list = [] - xattr_list = Xattr.llistxattr_buf('.') - for ele in xattr_list: - if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0: - d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT) - now = int(time.time()) - if x[0] > now: - logging.debug("volinfo[%s] expires: %d (%d sec later)" % \ - (d['uuid'], x[0], x[0] - now)) - dict_list.append(d) - else: - try: - Xattr.lremovexattr('.', ele) - except OSError: - pass - return dict_list - - @classmethod - def native_volume_info(cls): - """get the native volume mark of the underlying gluster volume""" - try: - return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, 'volume-mark'])) - except OSError: - ex = sys.exc_info()[1] - if ex.errno != ENODATA: - raise - - server = GLUSTERServer - - def __init__(self, path): - self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern) - - def canonical_path(self): - return ':'.join([gethostbyname(self.host), self.volume]) - - def can_connect_to(self, remote): - """determine our position in the connectibility matrix""" - return True - - class Mounter(object): - """Abstract base class for mounter backends""" - - def __init__(self, params): - self.params = params - - @classmethod - def get_glusterprog(cls): - return os.path.join(gconf.gluster_command_dir, cls.glusterprog) - - def umount_l(self, d): - """perform lazy umount""" - po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE) - po.wait() - return po - - @classmethod - def make_umount_argv(cls, d): - raise NotImplementedError - - def make_mount_argv(self, *a): - raise NotImplementedError - - def cleanup_mntpt(self): - pass - - def handle_mounter(self, po): - po.wait() - - def inhibit(self, *a): - """inhibit a gluster filesystem - - Mount glusterfs over a temporary mountpoint, - change into the mount, and lazy unmount the - filesystem. - """ - mounted = False - try: - po = Popen(self.make_mount_argv(*a), **self.mountkw) - self.handle_mounter(po) - po.terminate_geterr() - d = self.mntpt - mounted = True - logging.debug('auxiliary glusterfs mount in place') - os.chdir(d) - self.umount_l(d).terminate_geterr() - mounted = False - finally: - try: - if mounted: - self.umount_l(d).terminate_geterr(fail_on_err = False) - self.cleanup_mntpt() - except: - logging.warn('stale mount possibly left behind on ' + d) - logging.debug('auxiliary glusterfs mount prepared') - - class DirectMounter(Mounter): - """mounter backend which calls mount(8), umount(8) directly""" - - mountkw = {'stderr': subprocess.PIPE} - glusterprog = 'glusterfs' - - @staticmethod - def make_umount_argv(d): - return ['umount', '-l', d] - - def make_mount_argv(self): - self.mntpt = tempfile.mkdtemp(prefix = 'gsyncd-aux-mount-') - return [self.get_glusterprog()] + ['--' + p for p in self.params] + [self.mntpt] - - def cleanup_mntpt(self): - os.rmdir(self.mntpt) - - class MountbrokerMounter(Mounter): - """mounter backend using the mountbroker gluster service""" - - mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE} - glusterprog = 'gluster' - - @classmethod - def make_cli_argv(cls): - return [cls.get_glusterprog()] + gconf.gluster_cli_options.split() + ['system::'] - - @classmethod - def make_umount_argv(cls, d): - return cls.make_cli_argv() + ['umount', d, 'lazy'] - - def make_mount_argv(self, label): - return self.make_cli_argv() + \ - ['mount', label, 'user-map-root=' + syncdutils.getusername()] + self.params - - def handle_mounter(self, po): - self.mntpt = po.stdout.readline()[:-1] - po.stdout.close() - sup(self, po) - if po.returncode != 0: - # if cli terminated with error due to being - # refused by glusterd, what it put - # out on stdout is a diagnostic message - logging.error('glusterd answered: %s' % self.mntpt) - - def connect(self): - """inhibit the resource beyond - - Choose mounting backend (direct or mountbroker), - set up glusterfs parameters and perform the mount - with given backend - """ - - label = getattr(gconf, 'mountbroker', None) - if not label: - uid = os.geteuid() - if uid != 0: - label = syncdutils.getusername(uid) - mounter = label and self.MountbrokerMounter or self.DirectMounter - params = gconf.gluster_params.split() + \ - (gconf.gluster_log_level and ['log-level=' + gconf.gluster_log_level] or []) + \ - ['log-file=' + gconf.gluster_log_file, 'volfile-server=' + self.host, - 'volfile-id=' + self.volume, 'client-pid=-1'] - mounter(params).inhibit(*[l for l in [label] if l]) - - def connect_remote(self, *a, **kw): - sup(self, *a, **kw) - self.slavedir = "/proc/%d/cwd" % self.server.pid() - - def service_loop(self, *args): - """enter service loop - - - if slave given, instantiate GMaster and - pass control to that instance, which implements - master behavior - - else do that's what's inherited - """ - if args: - GMaster(self, args[0]).crawl_loop() - else: - sup(self, *args) - - def rsync(self, files): - return sup(self, files, self.slavedir) - - -class SSH(AbstractUrl, SlaveRemote): - """scheme class for ssh:// urls - - interface to remote slave on master side - implementing an ssh based proxy - """ - - def __init__(self, path): - self.remote_addr, inner_url = sup(self, path, - '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ])) - self.inner_rsc = parse_url(inner_url) - - def canonical_path(self): - m = re.match('([^@]+)@(.+)', self.remote_addr) - if m: - u, h = m.groups() - else: - u, h = syncdutils.getusername(), self.remote_addr - remote_addr = '@'.join([u, gethostbyname(h)]) - return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)]) - - def can_connect_to(self, remote): - """determine our position in the connectibility matrix""" - return False - - def start_fd_client(self, *a, **opts): - """customizations for client startup - - - be a no-op if we are to daemonize (client startup is deferred - to post-daemon stage) - - determine target url for rsync after consulting server - """ - if opts.get('deferred'): - return a - sup(self, *a) - ityp = type(self.inner_rsc) - if ityp == FILE: - slavepath = self.inner_rsc.path - elif ityp == GLUSTER: - slavepath = "/proc/%d/cwd" % self.server.pid() - else: - raise NotImplementedError - self.slaveurl = ':'.join([self.remote_addr, slavepath]) - - def connect_remote(self, go_daemon=None): - """connect to inner slave url through outer ssh url - - Wrap the connecting utility in ssh. - - Much care is put into daemonizing: in that case - ssh is started before daemonization, but - RePCe client is to be created after that (as ssh - interactive password auth would be defeated by - a daemonized ssh, while client should be present - only in the final process). In that case the action - is taken apart to two parts, this method is ivoked - once pre-daemon, once post-daemon. Use @go_daemon - to deiced what part to perform. - - [NB. ATM gluster product does not makes use of interactive - authentication.] - """ - if go_daemon == 'done': - return self.start_fd_client(*self.fd_pair) - gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-')) - deferred = go_daemon == 'postconn' - ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred) - if deferred: - # send a message to peer so that we can wait for - # the answer from which we know connection is - # established and we can proceed with daemonization - # (doing that too early robs the ssh passwd prompt...) - # However, we'd better not start the RepceClient - # before daemonization (that's not preserved properly - # in daemon), we just do a an ad-hoc linear put/get. - i, o = ret - inf = os.fdopen(i) - repce.send(o, None, '__repce_version__') - select((inf,), (), ()) - repce.recv(inf) - # hack hack hack: store a global reference to the file - # to save it from getting GC'd which implies closing it - gconf.permanent_handles.append(inf) - self.fd_pair = (i, o) - return 'should' - - def rsync(self, files): - return sup(self, files, '-ze', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), self.slaveurl) diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/xlators/features/marker/utils/syncdaemon/syncdutils.py deleted file mode 100644 index 59defa711ed..00000000000 --- a/xlators/features/marker/utils/syncdaemon/syncdutils.py +++ /dev/null @@ -1,269 +0,0 @@ -import os -import sys -import pwd -import time -import fcntl -import shutil -import logging -from threading import Lock, Thread as baseThread -from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, EINTR -from signal import SIGTERM, SIGKILL -from time import sleep -import select as oselect -from os import waitpid as owaitpid -try: - from cPickle import PickleError -except ImportError: - # py 3 - from pickle import PickleError - -from gconf import gconf - -try: - # py 3 - from urllib import parse as urllib -except ImportError: - import urllib - -def escape(s): - """the chosen flavor of string escaping, used all over - to turn whatever data to creatable representation""" - return urllib.quote_plus(s) - -def unescape(s): - """inverse of .escape""" - return urllib.unquote_plus(s) - -def norm(s): - if s: - return s.replace('-', '_') - -def update_file(path, updater, merger = lambda f: True): - """update a file in a transaction-like manner""" - - fr = fw = None - try: - fd = os.open(path, os.O_CREAT|os.O_RDWR) - try: - fr = os.fdopen(fd, 'r+b') - except: - os.close(fd) - raise - fcntl.lockf(fr, fcntl.LOCK_EX) - if not merger(fr): - return - - tmpp = path + '.tmp.' + str(os.getpid()) - fd = os.open(tmpp, os.O_CREAT|os.O_EXCL|os.O_WRONLY) - try: - fw = os.fdopen(fd, 'wb', 0) - except: - os.close(fd) - raise - updater(fw) - os.fsync(fd) - os.rename(tmpp, path) - finally: - for fx in (fr, fw): - if fx: - fx.close() - -def grabfile(fname, content=None): - """open @fname + contest for its fcntl lock - - @content: if given, set the file content to it - """ - # damn those messy open() mode codes - fd = os.open(fname, os.O_CREAT|os.O_RDWR) - f = os.fdopen(fd, 'r+b', 0) - try: - fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB) - except: - ex = sys.exc_info()[1] - f.close() - if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): - # cannot grab, it's taken - return - raise - if content: - try: - f.truncate() - f.write(content) - except: - f.close() - raise - gconf.permanent_handles.append(f) - return f - -def grabpidfile(fname=None, setpid=True): - """.grabfile customization for pid files""" - if not fname: - fname = gconf.pid_file - content = None - if setpid: - content = str(os.getpid()) + '\n' - return grabfile(fname, content=content) - -final_lock = Lock() - -def finalize(*a, **kw): - """all those messy final steps we go trough upon termination - - Do away with pidfile, ssh control dir and logging. - """ - final_lock.acquire() - if getattr(gconf, 'pid_file', None): - rm_pidf = gconf.pid_file_owned - if gconf.cpid: - # exit path from parent branch of daemonization - rm_pidf = False - while True: - f = grabpidfile(setpid=False) - if not f: - # child has already taken over pidfile - break - if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid: - # child has terminated - rm_pidf = True - break; - time.sleep(0.1) - if rm_pidf: - try: - os.unlink(gconf.pid_file) - except: - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - pass - else: - raise - if gconf.ssh_ctl_dir and not gconf.cpid: - shutil.rmtree(gconf.ssh_ctl_dir) - if gconf.log_exit: - logging.info("exiting.") - sys.stdout.flush() - sys.stderr.flush() - os._exit(kw.get('exval', 0)) - -def log_raise_exception(excont): - """top-level exception handler - - Try to some fancy things to cover up we face with an error. - Translate some weird sounding but well understood exceptions - into human-friendly lingo - """ - is_filelog = False - for h in logging.getLogger().handlers: - fno = getattr(getattr(h, 'stream', None), 'fileno', None) - if fno and not os.isatty(fno()): - is_filelog = True - - exc = sys.exc_info()[1] - if isinstance(exc, SystemExit): - excont.exval = exc.code or 0 - raise - else: - logtag = None - if isinstance(exc, GsyncdError): - if is_filelog: - logging.error(exc.message) - sys.stderr.write('failure: ' + exc.message + "\n") - elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \ - ((isinstance(exc, OSError) or isinstance(exc, IOError)) and \ - exc.errno == EPIPE): - logging.error('connection to peer is broken') - if hasattr(gconf, 'transport'): - gconf.transport.wait() - gconf.transport.terminate_geterr() - elif isinstance(exc, OSError) and exc.errno == ENOTCONN: - logging.error('glusterfs session went down') - else: - logtag = "FAIL" - if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG): - logtag = "FULL EXCEPTION TRACE" - if logtag: - logging.exception(logtag + ": ") - sys.stderr.write("failed with %s.\n" % type(exc).__name__) - excont.exval = 1 - sys.exit(excont.exval) - - -class FreeObject(object): - """wildcard class for which any attribute can be set""" - - def __init__(self, **kw): - for k,v in kw.items(): - setattr(self, k, v) - -class Thread(baseThread): - """thread class flavor for gsyncd - - - always a daemon thread - - force exit for whole program if thread - function coughs up an exception - """ - def __init__(self, *a, **kw): - tf = kw.get('target') - if tf: - def twrap(*aa): - excont = FreeObject(exval = 0) - try: - tf(*aa) - except: - try: - log_raise_exception(excont) - finally: - finalize(exval = excont.exval) - kw['target'] = twrap - baseThread.__init__(self, *a, **kw) - self.setDaemon(True) - -class GsyncdError(Exception): - pass - -def getusername(uid = None): - if uid == None: - uid = os.geteuid() - return pwd.getpwuid(uid).pw_name - -def boolify(s): - """ - Generic string to boolean converter - - return - - Quick return if string 's' is of type bool - - True if it's in true_list - - False if it's in false_list - - Warn if it's not present in either and return False - """ - true_list = ['true', 'yes', '1', 'on'] - false_list = ['false', 'no', '0', 'off'] - - if isinstance(s, bool): - return s - - rv = False - lstr = s.lower() - if lstr in true_list: - rv = True - elif not lstr in false_list: - logging.warn("Unknown string (%s) in string to boolean conversion defaulting to False\n" % (s)) - - return rv - -def eintr_wrap(func, exc, *a): - """ - wrapper around syscalls resilient to interrupt caused - by signals - """ - while True: - try: - return func(*a) - except exc, ex: - if not ex[0] == EINTR: - raise GsyncdError(ex[1]) - -def select(*a): - return eintr_wrap(oselect.select, oselect.error, *a) - -def waitpid (*a): - return eintr_wrap(owaitpid, OSError, *a) |
