diff options
Diffstat (limited to 'xlators/features/marker')
22 files changed, 6129 insertions, 2260 deletions
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am index a6ba2de16ae..a985f42a877 100644 --- a/xlators/features/marker/Makefile.am +++ b/xlators/features/marker/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = src @SYNCDAEMON_SUBDIR@ +SUBDIRS = src CLEANFILES = diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am index 58b12b3f594..58056b36511 100644 --- a/xlators/features/marker/src/Makefile.am +++ b/xlators/features/marker/src/Makefile.am @@ -1,15 +1,24 @@ +if WITH_SERVER xlator_LTLIBRARIES = marker.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -marker_la_LDFLAGS = -module -avoidversion +marker_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c \ + marker-common.c -marker_la_SOURCES = marker.c marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = marker-mem-types.h marker.h $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h \ + marker-quota-helper.h marker-common.h \ + $(top_builddir)/xlators/lib/src/libxlator.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/xlators/lib/src -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src $(GF_CFLAGS) -shared -nostartfiles +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c new file mode 100644 index 00000000000..9c9047005d6 --- /dev/null +++ b/xlators/features/marker/src/marker-common.c @@ -0,0 +1,57 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <fnmatch.h> +#include "marker-common.h" + +marker_inode_ctx_t * +marker_inode_ctx_new() +{ + marker_inode_ctx_t *ctx = NULL; + + ctx = GF_CALLOC(1, sizeof(marker_inode_ctx_t), + gf_marker_mt_marker_inode_ctx_t); + if (ctx == NULL) + goto out; + + ctx->quota_ctx = NULL; +out: + return ctx; +} + +int32_t +marker_force_inode_ctx_get(inode_t *inode, xlator_t *this, + marker_inode_ctx_t **ctx) +{ + int32_t ret = -1; + uint64_t ctx_int = 0; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctx_int); + if (ret == 0) + *ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int; + else { + *ctx = marker_inode_ctx_new(); + if (*ctx == NULL) + goto unlock; + + ret = __inode_ctx_put(inode, this, (uint64_t)(unsigned long)*ctx); + if (ret == -1) { + GF_FREE(*ctx); + goto unlock; + } + ret = 0; + } + } +unlock: + UNLOCK(&inode->lock); + + return ret; +} diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h new file mode 100644 index 00000000000..7f8cffe7d35 --- /dev/null +++ b/xlators/features/marker/src/marker-common.h @@ -0,0 +1,19 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _MARKER_COMMON_H +#define _MARKER_COMMON_H + +#include <glusterfs/xlator.h> +#include "marker.h" + +int32_t +marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **); + +#endif diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h index 3936ef794fc..aedfdb4a1b7 100644 --- a/xlators/features/marker/src/marker-mem-types.h +++ b/xlators/features/marker/src/marker-mem-types.h @@ -1,32 +1,28 @@ /* - Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __MARKER_MEM_TYPES_H__ #define __MARKER_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_marker_mem_types_ { - gf_marker_mt_marker_local_t = gf_common_mt_end + 1, - gf_marker_mt_marker_conf_t, - gf_marker_mt_loc_t, - gf_marker_mt_volume_mark, - gf_marker_mt_end + /* Those are used by ALLOCATE_OR_GOTO macro */ + gf_marker_mt_marker_conf_t = gf_common_mt_end + 1, + gf_marker_mt_loc_t, + gf_marker_mt_volume_mark, + gf_marker_mt_int64_t, + gf_marker_mt_quota_inode_ctx_t, + gf_marker_mt_marker_inode_ctx_t, + gf_marker_mt_inode_contribution_t, + gf_marker_mt_quota_meta_t, + gf_marker_mt_quota_synctask_t, + gf_marker_mt_end }; #endif diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c new file mode 100644 index 00000000000..ecd85d67b2b --- /dev/null +++ b/xlators/features/marker/src/marker-quota-helper.c @@ -0,0 +1,380 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/locking.h> +#include "marker-quota.h" +#include "marker-common.h" +#include "marker-quota-helper.h" +#include "marker-mem-types.h" + +int +mq_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", inode, out); + GF_VALIDATE_OR_GOTO("marker", path, out); + /* Not checking for parent because while filling + * loc of root, parent will be NULL + */ + + if (inode) { + loc->inode = inode_ref(inode); + } + + if (parent) + loc->parent = inode_ref(parent); + + if (!gf_uuid_is_null(inode->gfid)) + gf_uuid_copy(loc->gfid, inode->gfid); + + loc->path = gf_strdup(path); + if (!loc->path) { + gf_log("loc fill", GF_LOG_ERROR, "strdup failed"); + goto out; + } + + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + else + goto out; + + ret = 0; + +out: + if (ret < 0) + loc_wipe(loc); + + return ret; +} + +int32_t +mq_inode_loc_fill(const char *parent_gfid, inode_t *inode, loc_t *loc) +{ + char *resolvedpath = NULL; + inode_t *parent = NULL; + quota_inode_ctx_t *ctx = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + if (inode == NULL) { + gf_log_callingfn("marker", GF_LOG_ERROR, + "loc fill failed, " + "inode is NULL"); + return ret; + } + + if (loc == NULL) + return ret; + + if ((inode) && __is_root_gfid(inode->gfid)) { + loc->parent = NULL; + goto ignore_parent; + } + + if (parent_gfid == NULL) + parent = inode_parent(inode, 0, NULL); + else + parent = inode_find(inode->table, (unsigned char *)parent_gfid); + + if (parent == NULL) { + gf_log("marker", GF_LOG_ERROR, "parent is NULL for %s", + uuid_utoa(inode->gfid)); + goto err; + } + +ignore_parent: + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) { + gf_log("marker", GF_LOG_ERROR, "failed to resolve path for %s", + uuid_utoa(inode->gfid)); + goto err; + } + + ret = mq_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; + + ret = mq_inode_ctx_get(inode, this, &ctx); + if (ret < 0 || ctx == NULL) + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + ret = -1; + goto err; + } + ret = 0; + +err: + if (parent) + inode_unref(parent); + + GF_FREE(resolvedpath); + + return ret; +} + +quota_inode_ctx_t * +mq_alloc_inode_ctx() +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + + QUOTA_ALLOC(ctx, quota_inode_ctx_t, ret); + if (ret == -1) + goto out; + + ctx->size = 0; + ctx->dirty = 0; + ctx->updation_status = _gf_false; + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->contribution_head); +out: + return ctx; +} + +static void +mq_contri_fini(inode_contribution_t *contri) +{ + LOCK_DESTROY(&contri->lock); + GF_FREE(contri); +} + +inode_contribution_t * +mq_contri_init(inode_t *inode) +{ + inode_contribution_t *contri = NULL; + int32_t ret = 0; + + QUOTA_ALLOC(contri, inode_contribution_t, ret); + if (ret == -1) + goto out; + + GF_REF_INIT(contri, mq_contri_fini); + + contri->contribution = 0; + contri->file_count = 0; + contri->dir_count = 0; + gf_uuid_copy(contri->gfid, inode->gfid); + + LOCK_INIT(&contri->lock); + INIT_LIST_HEAD(&contri->contri_list); + +out: + return contri; +} + +inode_contribution_t * +mq_get_contribution_node(inode_t *inode, quota_inode_ctx_t *ctx) +{ + inode_contribution_t *contri = NULL; + inode_contribution_t *temp = NULL; + + if (!inode || !ctx) + goto out; + + LOCK(&ctx->lock); + { + if (list_empty(&ctx->contribution_head)) + goto unlock; + + list_for_each_entry(temp, &ctx->contribution_head, contri_list) + { + if (gf_uuid_compare(temp->gfid, inode->gfid) == 0) { + contri = temp; + GF_REF_GET(contri); + break; + } + } + } +unlock: + UNLOCK(&ctx->lock); + +out: + return contri; +} + +inode_contribution_t * +__mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, + loc_t *loc) +{ + inode_contribution_t *contribution = NULL; + + if (!loc->parent) { + if (!gf_uuid_is_null(loc->pargfid)) + loc->parent = inode_find(loc->inode->table, loc->pargfid); + + if (!loc->parent) + loc->parent = inode_parent(loc->inode, loc->pargfid, loc->name); + if (!loc->parent) + goto out; + } + + list_for_each_entry(contribution, &ctx->contribution_head, contri_list) + { + if (loc->parent && + gf_uuid_compare(contribution->gfid, loc->parent->gfid) == 0) { + goto out; + } + } + + contribution = mq_contri_init(loc->parent); + if (contribution == NULL) + goto out; + + list_add_tail(&contribution->contri_list, &ctx->contribution_head); + +out: + return contribution; +} + +inode_contribution_t * +mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +{ + inode_contribution_t *contribution = NULL; + + if ((ctx == NULL) || (loc == NULL)) + return NULL; + + if (((loc->path) && (strcmp(loc->path, "/") == 0)) || + (!loc->path && gf_uuid_is_null(loc->pargfid))) + return NULL; + + LOCK(&ctx->lock); + { + contribution = __mq_add_new_contribution_node(this, ctx, loc); + if (contribution) + GF_REF_GET(contribution); + } + UNLOCK(&ctx->lock); + + return contribution; +} + +int32_t +mq_dict_set_contribution(xlator_t *this, dict_t *dict, loc_t *loc, uuid_t gfid, + char *contri_key) +{ + int32_t ret = -1; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", dict, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + + if (gfid && !gf_uuid_is_null(gfid)) { + GET_CONTRI_KEY(this, key, gfid, ret); + } else if (loc->parent) { + GET_CONTRI_KEY(this, key, loc->parent->gfid, ret); + } else { + /* nameless lookup, fetch contributions to all parents */ + GET_CONTRI_KEY(this, key, NULL, ret); + } + + if (ret < 0) + goto out; + + ret = dict_set_int64(dict, key, 0); + if (ret < 0) + goto out; + + if (contri_key) + if (snprintf(contri_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) { + ret = -1; + goto out; + } + +out: + if (ret < 0) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "dict set failed"); + + return ret; +} + +int32_t +mq_inode_ctx_get(inode_t *inode, xlator_t *this, quota_inode_ctx_t **ctx) +{ + int32_t ret = -1; + uint64_t ctx_int = 0; + marker_inode_ctx_t *mark_ctx = NULL; + + GF_VALIDATE_OR_GOTO("marker", inode, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + + ret = inode_ctx_get(inode, this, &ctx_int); + if (ret < 0) { + ret = -1; + *ctx = NULL; + goto out; + } + + mark_ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int; + if (mark_ctx->quota_ctx == NULL) { + ret = -1; + goto out; + } + + *ctx = mark_ctx->quota_ctx; + + ret = 0; + +out: + return ret; +} + +quota_inode_ctx_t * +__mq_inode_ctx_new(inode_t *inode, xlator_t *this) +{ + int32_t ret = -1; + quota_inode_ctx_t *quota_ctx = NULL; + marker_inode_ctx_t *mark_ctx = NULL; + + ret = marker_force_inode_ctx_get(inode, this, &mark_ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "marker_force_inode_ctx_get() failed"); + goto out; + } + + LOCK(&inode->lock); + { + if (mark_ctx->quota_ctx == NULL) { + quota_ctx = mq_alloc_inode_ctx(); + if (quota_ctx == NULL) { + ret = -1; + goto unlock; + } + mark_ctx->quota_ctx = quota_ctx; + } else { + quota_ctx = mark_ctx->quota_ctx; + } + + ret = 0; + } +unlock: + UNLOCK(&inode->lock); +out: + return quota_ctx; +} + +quota_inode_ctx_t * +mq_inode_ctx_new(inode_t *inode, xlator_t *this) +{ + return __mq_inode_ctx_new(inode, this); +} diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h new file mode 100644 index 00000000000..d4091dd2180 --- /dev/null +++ b/xlators/features/marker/src/marker-quota-helper.h @@ -0,0 +1,66 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _MARKER_QUOTA_HELPER_H +#define _MARKER_QUOTA_HELPER_H + +#include "marker.h" + +#define QUOTA_FREE_CONTRIBUTION_NODE(ctx, _contribution) \ + do { \ + LOCK(&ctx->lock); \ + { \ + list_del_init(&_contribution->contri_list); \ + GF_REF_PUT(_contribution); \ + } \ + UNLOCK(&ctx->lock); \ + } while (0) + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) + +#define QUOTA_SAFE_DECREMENT(lock, var, value) \ + do { \ + LOCK(lock); \ + { \ + value = --var; \ + } \ + UNLOCK(lock); \ + } while (0) + +inode_contribution_t * +mq_add_new_contribution_node(xlator_t *, quota_inode_ctx_t *, loc_t *); + +int32_t +mq_dict_set_contribution(xlator_t *, dict_t *, loc_t *, uuid_t, char *); + +quota_inode_ctx_t * +mq_inode_ctx_new(inode_t *, xlator_t *); + +int32_t +mq_inode_ctx_get(inode_t *, xlator_t *, quota_inode_ctx_t **); + +int32_t +mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *); + +int32_t +mq_inode_loc_fill(const char *, inode_t *, loc_t *); + +inode_contribution_t * +mq_contri_init(inode_t *inode); + +inode_contribution_t * +mq_get_contribution_node(inode_t *, quota_inode_ctx_t *); + +#endif diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c new file mode 100644 index 00000000000..3de2ea1c92c --- /dev/null +++ b/xlators/features/marker/src/marker-quota.c @@ -0,0 +1,2297 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include "libxlator.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/byte-order.h> +#include "marker-quota.h" +#include "marker-quota-helper.h" +#include <glusterfs/syncop.h> +#include <glusterfs/quota-common-utils.h> + +int +mq_loc_copy(loc_t *dst, loc_t *src) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("marker", dst, out); + GF_VALIDATE_OR_GOTO("marker", src, out); + + if (src->inode == NULL || + ((src->parent == NULL) && (gf_uuid_is_null(src->pargfid)) && + !__is_root_gfid(src->inode->gfid))) { + gf_log("marker", GF_LOG_WARNING, "src loc is not valid"); + goto out; + } + + ret = loc_copy(dst, src); +out: + return ret; +} + +static void +mq_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t status) +{ + LOCK(&ctx->lock); + { + *flag = status; + } + UNLOCK(&ctx->lock); +} + +static void +mq_test_and_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t *status) +{ + gf_boolean_t temp = _gf_false; + + LOCK(&ctx->lock); + { + temp = *status; + *status = *flag; + *flag = temp; + } + UNLOCK(&ctx->lock); +} + +static void +mq_get_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t *status) +{ + LOCK(&ctx->lock); + { + *status = *flag; + } + UNLOCK(&ctx->lock); +} + +int32_t +mq_get_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) +{ + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); + + mq_get_ctx_status(ctx, &ctx->updation_status, status); + return 0; +out: + return -1; +} + +int32_t +mq_set_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t status) +{ + GF_VALIDATE_OR_GOTO("marker", ctx, out); + + mq_set_ctx_status(ctx, &ctx->updation_status, status); + return 0; +out: + return -1; +} + +int32_t +mq_test_and_set_ctx_updation_status(quota_inode_ctx_t *ctx, + gf_boolean_t *status) +{ + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); + + mq_test_and_set_ctx_status(ctx, &ctx->updation_status, status); + return 0; +out: + return -1; +} + +int32_t +mq_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t status) +{ + GF_VALIDATE_OR_GOTO("marker", ctx, out); + + mq_set_ctx_status(ctx, &ctx->create_status, status); + return 0; +out: + return -1; +} + +int32_t +mq_test_and_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) +{ + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); + + mq_test_and_set_ctx_status(ctx, &ctx->create_status, status); + return 0; +out: + return -1; +} + +static void +mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status) +{ + GF_VALIDATE_OR_GOTO("marker", ctx, out); + + mq_set_ctx_status(ctx, &ctx->dirty_status, status); +out: + return; +} + +int +mq_build_ancestry(xlator_t *this, loc_t *loc) +{ + int32_t ret = -1; + fd_t *fd = NULL; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + dict_t *xdata = NULL; + inode_t *tmp_parent = NULL; + inode_t *tmp_inode = NULL; + inode_t *linked_inode = NULL; + quota_inode_ctx_t *ctx = NULL; + + INIT_LIST_HEAD(&entries.list); + + xdata = dict_new(); + if (xdata == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -ENOMEM; + goto out; + } + + ret = dict_set_int8(xdata, GET_ANCESTRY_DENTRY_KEY, 1); + if (ret < 0) + goto out; + + fd = fd_anonymous(loc->inode); + if (fd == NULL) { + gf_log(this->name, GF_LOG_ERROR, "fd creation failed"); + ret = -ENOMEM; + goto out; + } + + fd_bind(fd); + + ret = syncop_readdirp(this, fd, 131072, 0, &entries, xdata, NULL); + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "readdirp failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (list_empty(&entries.list)) { + ret = -1; + goto out; + } + + list_for_each_entry(entry, &entries.list, list) + { + if (__is_root_gfid(entry->inode->gfid)) { + /* The list contains a sub-list for each possible path + * to the target inode. Each sub-list starts with the + * root entry of the tree and is followed by the child + * entries for a particular path to the target entry. + * The root entry is an implied sub-list delimiter, + * as it denotes we have started processing a new path. + * Reset the parent pointer and continue + */ + + tmp_parent = NULL; + } else { + linked_inode = inode_link(entry->inode, tmp_parent, entry->d_name, + &entry->d_stat); + if (linked_inode) { + tmp_inode = entry->inode; + entry->inode = linked_inode; + inode_unref(tmp_inode); + } else { + gf_log(this->name, GF_LOG_ERROR, "inode link failed"); + ret = -EINVAL; + goto out; + } + } + + ctx = mq_inode_ctx_new(entry->inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(entry->inode->gfid)); + ret = -ENOMEM; + goto out; + } + + /* For non-directory, posix_get_ancestry_non_directory returns + * all hard-links that are represented by nodes adjacent to + * each other in the dentry-list. + * (Unlike the directory case where adjacent nodes either have + * a parent/child relationship or belong to different paths). + */ + if (entry->inode->ia_type == IA_IFDIR) + tmp_parent = entry->inode; + } + + if (loc->parent) + inode_unref(loc->parent); + + loc->parent = inode_parent(loc->inode, 0, NULL); + if (loc->parent == NULL) { + ret = -1; + goto out; + } + + ret = 0; + +out: + gf_dirent_free(&entries); + + if (fd) + fd_unref(fd); + + if (xdata) + dict_unref(xdata); + + return ret; +} + +/* This function should be used only in inspect_directory and inspect_file + * function to heal quota xattrs. + * Inode quota feature is introduced in 3.7. + * If gluster setup is upgraded from 3.6 to 3.7, there can be a + * getxattr and setxattr spikes with quota heal as inode quota is missing. + * So this wrapper function is to avoid xattrs spikes during upgrade. + * This function returns success even is inode-quota xattrs are missing and + * hence no healing performed. + */ +static int32_t +_quota_dict_get_meta(xlator_t *this, dict_t *dict, char *key, const int keylen, + quota_meta_t *meta, ia_type_t ia_type, + gf_boolean_t add_delta) +{ + int32_t ret = 0; + marker_conf_t *priv = NULL; + + priv = this->private; + + ret = quota_dict_get_inode_meta(dict, key, keylen, meta); + if (ret == -2 && (priv->feature_enabled & GF_INODE_QUOTA) == 0) { + /* quota_dict_get_inode_meta returns -2 if + * inode quota xattrs are not present. + * if inode quota self heal is turned off, + * then we should skip healing inode quotas + */ + + gf_log(this->name, GF_LOG_DEBUG, + "inode quota disabled. " + "inode quota self heal will not be performed"); + ret = 0; + if (add_delta) { + if (ia_type == IA_IFDIR) + meta->dir_count = 1; + else + meta->file_count = 1; + } + } + + return ret; +} + +int32_t +quota_dict_set_size_meta(xlator_t *this, dict_t *dict, const quota_meta_t *meta) +{ + int32_t ret = -ENOMEM; + quota_meta_t *value = NULL; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + + value = GF_MALLOC(2 * sizeof(quota_meta_t), gf_common_quota_meta_t); + if (value == NULL) { + goto out; + } + value[0].size = hton64(meta->size); + value[0].file_count = hton64(meta->file_count); + value[0].dir_count = hton64(meta->dir_count); + + value[1].size = 0; + value[1].file_count = 0; + value[1].dir_count = hton64(1); + + GET_SIZE_KEY(this, size_key, ret); + if (ret < 0) + goto out; + ret = dict_set_bin(dict, size_key, value, (sizeof(quota_meta_t) * 2)); + if (ret < 0) { + gf_log_callingfn("quota", GF_LOG_ERROR, "dict set failed"); + GF_FREE(value); + } +out: + return ret; +} + +void +mq_compute_delta(quota_meta_t *delta, const quota_meta_t *op1, + const quota_meta_t *op2) +{ + delta->size = op1->size - op2->size; + delta->file_count = op1->file_count - op2->file_count; + delta->dir_count = op1->dir_count - op2->dir_count; +} + +void +mq_add_meta(quota_meta_t *dst, const quota_meta_t *src) +{ + dst->size += src->size; + dst->file_count += src->file_count; + dst->dir_count += src->dir_count; +} + +void +mq_sub_meta(quota_meta_t *dst, const quota_meta_t *src) +{ + if (src == NULL) { + dst->size = -dst->size; + dst->file_count = -dst->file_count; + dst->dir_count = -dst->dir_count; + } else { + dst->size = src->size - dst->size; + dst->file_count = src->file_count - dst->file_count; + dst->dir_count = src->dir_count - dst->dir_count; + } +} + +int32_t +mq_are_xattrs_set(xlator_t *this, loc_t *loc, gf_boolean_t *contri_set, + gf_boolean_t *size_set) +{ + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + quota_meta_t meta = { + 0, + }; + struct iatt stbuf = { + 0, + }; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + ret = mq_req_xattr(this, loc, dict, contri_key, size_key); + if (ret < 0) + goto out; + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (rsp_dict == NULL) + goto out; + + *contri_set = _gf_true; + *size_set = _gf_true; + if (loc->inode->ia_type == IA_IFDIR) { + ret = quota_dict_get_inode_meta(rsp_dict, size_key, strlen(size_key), + &meta); + if (ret < 0 || meta.dir_count == 0) + *size_set = _gf_false; + } + + if (!loc_is_root(loc)) { + ret = quota_dict_get_inode_meta(rsp_dict, contri_key, + strlen(contri_key), &meta); + if (ret < 0) + *contri_set = _gf_false; + } + + ret = 0; +out: + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + return ret; +} + +int32_t +mq_create_size_xattrs(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +{ + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + if (loc->inode->ia_type != IA_IFDIR) { + ret = 0; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = quota_dict_set_size_meta(this, dict, &size); + if (ret < 0) + goto out; + + ret = syncop_xattrop(FIRST_CHILD(this), loc, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL, + NULL); + + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + +out: + if (dict) + dict_unref(dict); + + return ret; +} + +int32_t +mq_lock(xlator_t *this, loc_t *loc, short l_type) +{ + struct gf_flock lock = { + 0, + }; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + gf_log(this->name, GF_LOG_DEBUG, "set lock type %d on %s", l_type, + loc->path); + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = l_type; + lock.l_whence = SEEK_SET; + + ret = syncop_inodelk(FIRST_CHILD(this), this->name, loc, F_SETLKW, &lock, + NULL, NULL); + if (ret < 0) + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "inodelk failed " + "for %s: %s", + loc->path, strerror(-ret)); + +out: + + return ret; +} + +int32_t +mq_get_dirty(xlator_t *this, loc_t *loc, int32_t *dirty) +{ + int32_t ret = -1; + int8_t value = 0; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + struct iatt stbuf = { + 0, + }; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + ret = dict_set_int64(dict, QUOTA_DIRTY_KEY, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "dict set failed"); + goto out; + } + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value); + if (ret < 0) + goto out; + + *dirty = value; + +out: + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + return ret; +} + +int32_t +mq_get_set_dirty(xlator_t *this, loc_t *loc, int32_t dirty, int32_t *prev_dirty) +{ + int32_t ret = -1; + int8_t value = 0; + quota_inode_ctx_t *ctx = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", prev_dirty, out); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_GET_AND_SET, dict, + NULL, NULL, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + *prev_dirty = 0; + if (rsp_dict) { + ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value); + if (ret == 0) + *prev_dirty = value; + } + + LOCK(&ctx->lock); + { + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + ret = 0; +out: + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + return ret; +} + +int32_t +mq_mark_dirty(xlator_t *this, loc_t *loc, int32_t dirty) +{ + int32_t ret = -1; + dict_t *dict = NULL; + quota_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + ret = 0; + goto out; + } + + dict = dict_new(); + if (!dict) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = syncop_setxattr(FIRST_CHILD(this), loc, dict, 0, NULL, NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "setxattr dirty = %d " + "failed for %s: %s", + dirty, loc->path, strerror(-ret)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + +out: + if (dict) + dict_unref(dict); + + return ret; +} + +int32_t +_mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri, + quota_meta_t *size, uuid_t contri_gfid) +{ + int32_t ret = -1; + quota_meta_t meta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + struct iatt stbuf = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + if (size == NULL && contri == NULL) + goto out; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } + + if (size && loc->inode->ia_type == IA_IFDIR) { + GET_SIZE_KEY(this, size_key, keylen); + if (keylen < 0) + goto out; + ret = dict_set_int64(dict, size_key, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed."); + goto out; + } + } + + if (contri && !loc_is_root(loc)) { + ret = mq_dict_set_contribution(this, dict, loc, contri_gfid, + contri_key); + if (ret < 0) + goto out; + } + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (size) { + if (loc->inode->ia_type == IA_IFDIR) { + ret = quota_dict_get_meta(rsp_dict, size_key, keylen, &meta); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_get failed."); + goto out; + } + + size->size = meta.size; + size->file_count = meta.file_count; + size->dir_count = meta.dir_count; + } else { + size->size = stbuf.ia_blocks * 512; + size->file_count = 1; + size->dir_count = 0; + } + } + + if (contri && !loc_is_root(loc)) { + ret = quota_dict_get_meta(rsp_dict, contri_key, strlen(contri_key), + &meta); + if (ret < 0) { + contri->size = 0; + contri->file_count = 0; + contri->dir_count = 0; + } else { + contri->size = meta.size; + contri->file_count = meta.file_count; + contri->dir_count = meta.dir_count; + } + } + + ret = 0; + +out: + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + return ret; +} + +int32_t +mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri, + quota_meta_t *size, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", contribution, out); + + if (size == NULL && contri == NULL) { + ret = 0; + goto out; + } + + ret = _mq_get_metadata(this, loc, contri, size, contribution->gfid); + if (ret < 0) + goto out; + + if (size) { + LOCK(&ctx->lock); + { + ctx->size = size->size; + ctx->file_count = size->file_count; + ctx->dir_count = size->dir_count; + } + UNLOCK(&ctx->lock); + } + + if (contri) { + LOCK(&contribution->lock); + { + contribution->contribution = contri->size; + contribution->file_count = contri->file_count; + contribution->dir_count = contri->dir_count; + } + UNLOCK(&contribution->lock); + } + +out: + return ret; +} + +int32_t +mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta, + quota_inode_ctx_t *ctx, inode_contribution_t *contribution) +{ + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", contribution, out); + + ret = mq_get_metadata(this, loc, &contri, &size, ctx, contribution); + if (ret < 0) + goto out; + + mq_compute_delta(delta, &size, &contri); + +out: + return ret; +} + +int32_t +mq_remove_contri(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx, + inode_contribution_t *contri, quota_meta_t *delta, + uint32_t nlink) +{ + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + + if (nlink == 1) { + /*File was a last link and has been deleted */ + ret = 0; + goto done; + } + + GET_CONTRI_KEY(this, contri_key, contri->gfid, ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "get contri_key " + "failed for %s", + uuid_utoa(contri->gfid)); + goto out; + } + + ret = syncop_removexattr(FIRST_CHILD(this), loc, contri_key, 0, NULL); + if (ret < 0) { + if (-ret == ENOENT || -ret == ESTALE || -ret == ENODATA || + -ret == ENOATTR) { + /* Remove contri in done when unlink operation is + * performed, so return success on ENOENT/ESTSLE + * rename operation removes xattr earlier, + * so return success on ENODATA + */ + ret = 0; + } else { + gf_log_callingfn(this->name, GF_LOG_ERROR, + "removexattr %s failed for %s: %s", contri_key, + loc->path, strerror(-ret)); + goto out; + } + } + +done: + LOCK(&contri->lock); + { + contri->contribution += delta->size; + contri->file_count += delta->file_count; + contri->dir_count += delta->dir_count; + } + UNLOCK(&contri->lock); + + ret = 0; + +out: + QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri); + + return ret; +} + +int32_t +mq_update_contri(xlator_t *this, loc_t *loc, inode_contribution_t *contri, + quota_meta_t *delta) +{ + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", delta, out); + GF_VALIDATE_OR_GOTO("marker", contri, out); + + if (quota_meta_is_null(delta)) { + ret = 0; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + GET_CONTRI_KEY(this, contri_key, contri->gfid, ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "get contri_key " + "failed for %s", + uuid_utoa(contri->gfid)); + goto out; + } + + ret = quota_dict_set_meta(dict, contri_key, delta, loc->inode->ia_type); + if (ret < 0) + goto out; + + ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_ADD_ARRAY64, dict, + NULL, NULL, NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + LOCK(&contri->lock); + { + contri->contribution += delta->size; + contri->file_count += delta->file_count; + contri->dir_count += delta->dir_count; + } + UNLOCK(&contri->lock); + +out: + if (dict) + dict_unref(dict); + + return ret; +} + +int32_t +mq_update_size(xlator_t *this, loc_t *loc, quota_meta_t *delta) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", delta, out); + + if (quota_meta_is_null(delta)) { + ret = 0; + goto out; + } + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = quota_dict_set_size_meta(this, dict, delta); + if (ret < 0) + goto out; + + ret = syncop_xattrop(FIRST_CHILD(this), loc, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL, + NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->size += delta->size; + ctx->file_count += delta->file_count; + if (ctx->dir_count == 0) + ctx->dir_count += delta->dir_count + 1; + else + ctx->dir_count += delta->dir_count; + } + UNLOCK(&ctx->lock); + +out: + if (dict) + dict_unref(dict); + + return ret; +} + +int +mq_synctask_cleanup(int ret, call_frame_t *frame, void *opaque) +{ + quota_synctask_t *args = NULL; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc_wipe(&args->loc); + + if (args->stub) + call_resume(args->stub); + + if (!args->is_static) + GF_FREE(args); + + return 0; +} + +int +mq_synctask1(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc, + quota_meta_t *contri, uint32_t nlink, call_stub_t *stub) +{ + int32_t ret = -1; + quota_synctask_t *args = NULL; + quota_synctask_t static_args = { + 0, + }; + + if (spawn) { + QUOTA_ALLOC_OR_GOTO(args, quota_synctask_t, ret, out); + args->is_static = _gf_false; + } else { + args = &static_args; + args->is_static = _gf_true; + } + + args->this = this; + args->stub = stub; + loc_copy(&args->loc, loc); + args->ia_nlink = nlink; + + if (contri) { + args->contri = *contri; + } else { + args->contri.size = -1; + args->contri.file_count = -1; + args->contri.dir_count = -1; + } + + if (spawn) { + ret = synctask_new1(this->ctx->env, 1024 * 16, task, + mq_synctask_cleanup, NULL, args); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to spawn " + "new synctask"); + mq_synctask_cleanup(ret, NULL, args); + } + } else { + ret = task(args); + mq_synctask_cleanup(ret, NULL, args); + } + +out: + return ret; +} + +int +mq_synctask(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc) +{ + return mq_synctask1(this, task, spawn, loc, NULL, -1, NULL); +} + +int32_t +mq_prevalidate_txn(xlator_t *this, loc_t *origin_loc, loc_t *loc, + quota_inode_ctx_t **ctx, struct iatt *buf) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctxtmp = NULL; + + if (buf) { + if (buf->ia_type == IA_IFREG && IS_DHT_LINKFILE_MODE(buf)) + goto out; + + if (buf->ia_type != IA_IFREG && buf->ia_type != IA_IFLNK && + buf->ia_type != IA_IFDIR) + goto out; + } + + if (origin_loc == NULL || origin_loc->inode == NULL || + gf_uuid_is_null(origin_loc->inode->gfid)) + goto out; + + loc_copy(loc, origin_loc); + + if (gf_uuid_is_null(loc->gfid)) + gf_uuid_copy(loc->gfid, loc->inode->gfid); + + if (!loc_is_root(loc) && loc->parent == NULL) + loc->parent = inode_parent(loc->inode, 0, NULL); + + ret = mq_inode_ctx_get(loc->inode, this, &ctxtmp); + if (ret < 0) { + gf_log_callingfn(this->name, GF_LOG_WARNING, + "inode ctx for " + "is NULL for %s", + loc->path); + goto out; + } + if (ctx) + *ctx = ctxtmp; + + ret = 0; +out: + return ret; +} + +int +mq_create_xattrs_task(void *opaque) +{ + int32_t ret = -1; + gf_boolean_t locked = _gf_false; + gf_boolean_t contri_set = _gf_false; + gf_boolean_t size_set = _gf_false; + gf_boolean_t need_txn = _gf_false; + quota_synctask_t *args = NULL; + quota_inode_ctx_t *ctx = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + gf_boolean_t status = _gf_false; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + THIS = this; + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to" + "get inode ctx, aborting quota create txn"); + goto out; + } + + if (loc->inode->ia_type == IA_IFDIR) { + /* lock not required for files */ + ret = mq_lock(this, loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + } + + ret = mq_are_xattrs_set(this, loc, &contri_set, &size_set); + if (ret < 0 || (contri_set && size_set)) + goto out; + + mq_set_ctx_create_status(ctx, _gf_false); + status = _gf_true; + + if (loc->inode->ia_type == IA_IFDIR && size_set == _gf_false) { + ret = mq_create_size_xattrs(this, ctx, loc); + if (ret < 0) + goto out; + } + + need_txn = _gf_true; +out: + if (locked) + ret = mq_lock(this, loc, F_UNLCK); + + if (status == _gf_false) + mq_set_ctx_create_status(ctx, _gf_false); + + if (need_txn) + ret = mq_initiate_quota_blocking_txn(this, loc, NULL); + + return ret; +} + +static int +_mq_create_xattrs_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf, + gf_boolean_t spawn) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + gf_boolean_t status = _gf_true; + loc_t loc = { + 0, + }; + inode_contribution_t *contribution = NULL; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0) + goto out; + + ret = mq_test_and_set_ctx_create_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; + + if (!loc_is_root(&loc) && loc.parent) { + contribution = mq_add_new_contribution_node(this, ctx, &loc); + if (contribution == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "cannot add a new contribution node " + "(%s)", + uuid_utoa(loc.gfid)); + ret = -1; + goto out; + } else { + GF_REF_PUT(contribution); + } + } + + ret = mq_synctask(this, mq_create_xattrs_task, spawn, &loc); +out: + if (ret < 0 && status == _gf_false) + mq_set_ctx_create_status(ctx, _gf_false); + + loc_wipe(&loc); + return ret; +} + +int +mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + ret = _mq_create_xattrs_txn(this, loc, buf, _gf_true); +out: + return ret; +} + +int32_t +mq_reduce_parent_size_task(void *opaque) +{ + int32_t ret = -1; + int32_t prev_dirty = 0; + quota_inode_ctx_t *ctx = NULL; + quota_inode_ctx_t *parent_ctx = NULL; + inode_contribution_t *contribution = NULL; + quota_meta_t delta = { + 0, + }; + quota_meta_t contri = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t dirty = _gf_false; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + gf_boolean_t remove_xattr = _gf_true; + uint32_t nlink = 0; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + contri = args->contri; + nlink = args->ia_nlink; + this = args->this; + THIS = this; + + ret = mq_inode_loc_fill(NULL, loc->parent, &parent_loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "parent_loc fill failed for " + "child inode %s: ", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + ret = mq_lock(this, &parent_loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + if (contri.size >= 0) { + /* contri parameter is supplied only for rename operation. + * remove xattr is alreday performed, we need to skip + * removexattr for rename operation + */ + remove_xattr = _gf_false; + delta.size = contri.size; + delta.file_count = contri.file_count; + delta.dir_count = contri.dir_count; + } else { + remove_xattr = _gf_true; + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log_callingfn(this->name, GF_LOG_WARNING, + "ctx for" + " the node %s is NULL", + loc->path); + goto out; + } + + contribution = mq_get_contribution_node(loc->parent, ctx); + if (contribution == NULL) { + ret = -1; + gf_log(this->name, GF_LOG_DEBUG, + "contribution for the node %s is NULL", loc->path); + goto out; + } + + LOCK(&contribution->lock); + { + delta.size = contribution->contribution; + delta.file_count = contribution->file_count; + delta.dir_count = contribution->dir_count; + } + UNLOCK(&contribution->lock); + } + + ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty); + if (ret < 0) + goto out; + dirty = _gf_true; + + mq_sub_meta(&delta, NULL); + + if (remove_xattr) { + ret = mq_remove_contri(this, loc, ctx, contribution, &delta, nlink); + if (ret < 0) + goto out; + } + + if (quota_meta_is_null(&delta)) + goto out; + + ret = mq_update_size(this, &parent_loc, &delta); + if (ret < 0) + goto out; + +out: + if (dirty) { + if (ret < 0 || prev_dirty) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if dir was dirty before + * the txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } else { + ret = mq_mark_dirty(this, &parent_loc, 0); + } + } + + if (locked) + ret = mq_lock(this, &parent_loc, F_UNLCK); + + if (ret >= 0) + ret = mq_initiate_quota_blocking_txn(this, &parent_loc, NULL); + + loc_wipe(&parent_loc); + + if (contribution) + GF_REF_PUT(contribution); + + return ret; +} + +int32_t +mq_reduce_parent_size_txn(xlator_t *this, loc_t *origin_loc, + quota_meta_t *contri, uint32_t nlink, + call_stub_t *stub) +{ + int32_t ret = -1; + loc_t loc = { + 0, + }; + gf_boolean_t resume_stub = _gf_true; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", origin_loc, out); + + ret = mq_prevalidate_txn(this, origin_loc, &loc, NULL, NULL); + if (ret < 0) + goto out; + + if (loc_is_root(&loc)) { + ret = 0; + goto out; + } + + resume_stub = _gf_false; + ret = mq_synctask1(this, mq_reduce_parent_size_task, _gf_true, &loc, contri, + nlink, stub); +out: + loc_wipe(&loc); + + if (resume_stub && stub) + call_resume(stub); + + if (ret) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "mq_reduce_parent_size_txn failed"); + + return ret; +} + +int +mq_initiate_quota_task(void *opaque) +{ + int32_t ret = -1; + int32_t prev_dirty = 0; + loc_t child_loc = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t dirty = _gf_false; + gf_boolean_t status = _gf_false; + quota_meta_t delta = { + 0, + }; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + inode_contribution_t *contri = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_inode_ctx_t *parent_ctx = NULL; + inode_t *tmp_parent = NULL; + + GF_VALIDATE_OR_GOTO("marker", opaque, out); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + + GF_VALIDATE_OR_GOTO("marker", this, out); + THIS = this; + + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = mq_loc_copy(&child_loc, loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "loc copy failed"); + goto out; + } + + while (!__is_root_gfid(child_loc.gfid)) { + ret = mq_inode_ctx_get(child_loc.inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "inode ctx get failed for %s, " + "aborting update txn", + child_loc.path); + goto out; + } + + /* To improve performance, abort current transaction + * if one is already in progress for same inode + */ + if (status == _gf_true) { + /* status will already set before txn start, + * so it should not be set in first + * loop iteration + */ + ret = mq_test_and_set_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; + } + + if (child_loc.parent == NULL) { + ret = mq_build_ancestry(this, &child_loc); + if (ret < 0 || child_loc.parent == NULL) { + /* If application performs parallel remove + * operations on same set of files/directories + * then we may get ENOENT/ESTALE + */ + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + "build ancestry failed for inode %s", + uuid_utoa(child_loc.inode->gfid)); + ret = -1; + goto out; + } + } + + ret = mq_inode_loc_fill(NULL, child_loc.parent, &parent_loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "parent_loc fill " + "failed for child inode %s: ", + uuid_utoa(child_loc.inode->gfid)); + goto out; + } + + ret = mq_lock(this, &parent_loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + mq_set_ctx_updation_status(ctx, _gf_false); + status = _gf_true; + + /* Contribution node can be NULL in below scenarios and + create if needed: + + Scenario 1) + In this case create a new contribution node + Suppose hard link for a file f1 present in a directory d1 is + created in the directory d2 (as f2). Now, since d2's + contribution is not there in f1's inode ctx, d2's + contribution xattr won't be created and will create problems + for quota operations. + + Don't create contribution if parent has been changed after + taking a lock, this can happen when rename is performed + and writes is still in-progress for the same file + + Scenario 2) + When a rename operation is performed, contribution node + for olp path will be removed. + + Create contribution node only if oldparent is same as + newparent. + Consider below example + 1) rename FOP invoked on file 'x' + 2) write is still in progress for file 'x' + 3) rename takes a lock on old-parent + 4) write-update txn blocked on old-parent to acquire lock + 5) in rename_cbk, contri xattrs are removed and contribution + is deleted and lock is released + 6) now write-update txn gets the lock and updates the + wrong parent as it was holding lock on old parent + so validate parent once the lock is acquired + + For more information on this problem, please see + doc for marker_rename in file marker.c + */ + contri = mq_get_contribution_node(child_loc.parent, ctx); + if (contri == NULL) { + tmp_parent = inode_parent(child_loc.inode, 0, NULL); + if (tmp_parent == NULL) { + /* This can happen if application performs + * parallel remove operations on same set + * of files/directories + */ + gf_log(this->name, GF_LOG_WARNING, + "parent is " + "NULL for inode %s", + uuid_utoa(child_loc.inode->gfid)); + ret = -1; + goto out; + } + if (gf_uuid_compare(tmp_parent->gfid, parent_loc.gfid)) { + /* abort txn if parent has changed */ + ret = 0; + goto out; + } + + inode_unref(tmp_parent); + tmp_parent = NULL; + + contri = mq_add_new_contribution_node(this, ctx, &child_loc); + if (contri == NULL) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to " + "create contribution node for %s, " + "abort update txn", + child_loc.path); + ret = -1; + goto out; + } + } + + ret = mq_get_delta(this, &child_loc, &delta, ctx, contri); + if (ret < 0) + goto out; + + if (quota_meta_is_null(&delta)) + goto out; + + ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty); + if (ret < 0) + goto out; + dirty = _gf_true; + + ret = mq_update_contri(this, &child_loc, contri, &delta); + if (ret < 0) + goto out; + + ret = mq_update_size(this, &parent_loc, &delta); + if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, + "rollback " + "contri updation"); + mq_sub_meta(&delta, NULL); + mq_update_contri(this, &child_loc, contri, &delta); + goto out; + } + + if (prev_dirty == 0) { + ret = mq_mark_dirty(this, &parent_loc, 0); + } else { + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } + dirty = _gf_false; + prev_dirty = 0; + + ret = mq_lock(this, &parent_loc, F_UNLCK); + locked = _gf_false; + + if (__is_root_gfid(parent_loc.gfid)) + break; + + /* Repeate above steps upwards till the root */ + loc_wipe(&child_loc); + ret = mq_loc_copy(&child_loc, &parent_loc); + if (ret < 0) + goto out; + + loc_wipe(&parent_loc); + GF_REF_PUT(contri); + contri = NULL; + } + +out: + if ((dirty) && (ret < 0)) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if the dir was dirty before + * txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } + + if (locked) + ret = mq_lock(this, &parent_loc, F_UNLCK); + + if (ctx && status == _gf_false) + mq_set_ctx_updation_status(ctx, _gf_false); + + loc_wipe(&child_loc); + loc_wipe(&parent_loc); + + if (tmp_parent) + inode_unref(tmp_parent); + + if (contri) + GF_REF_PUT(contri); + + return 0; +} + +int +_mq_initiate_quota_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf, + gf_boolean_t spawn) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + gf_boolean_t status = _gf_true; + loc_t loc = { + 0, + }; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0) + goto out; + + if (loc_is_root(&loc)) { + ret = 0; + goto out; + } + + ret = mq_test_and_set_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; + + ret = mq_synctask(this, mq_initiate_quota_task, spawn, &loc); + +out: + if (ret < 0 && status == _gf_false) + mq_set_ctx_updation_status(ctx, _gf_false); + + loc_wipe(&loc); + return ret; +} + +int +mq_initiate_quota_txn(xlator_t *this, loc_t *loc, struct iatt *buf) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + ret = _mq_initiate_quota_txn(this, loc, buf, _gf_true); +out: + return ret; +} + +int +mq_initiate_quota_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + ret = _mq_initiate_quota_txn(this, loc, buf, _gf_false); +out: + return ret; +} + +int +mq_update_dirty_inode_task(void *opaque) +{ + int32_t ret = -1; + fd_t *fd = NULL; + off_t offset = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + gf_boolean_t locked = _gf_false; + gf_boolean_t updated = _gf_false; + int32_t dirty = 0; + quota_meta_t contri = { + 0, + }; + quota_meta_t size = { + 0, + }; + quota_meta_t contri_sum = { + 0, + }; + quota_meta_t delta = { + 0, + }; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + quota_inode_ctx_t *ctx = NULL; + dict_t *xdata = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + THIS = this; + INIT_LIST_HEAD(&entries.list); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) + goto out; + + GET_CONTRI_KEY(this, contri_key, loc->gfid, keylen); + if (keylen < 0) { + ret = keylen; + goto out; + } + + xdata = dict_new(); + if (xdata == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = dict_set_int64(xdata, contri_key, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = mq_lock(this, loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + ret = mq_get_dirty(this, loc, &dirty); + if (ret < 0 || dirty == 0) { + ret = 0; + goto out; + } + + fd = fd_create(loc->inode, 0); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } + + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "opendir failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + fd_bind(fd); + while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, xdata, + NULL)) != 0) { + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + "readdirp failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (list_empty(&entries.list)) + break; + + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + memset(&contri, 0, sizeof(contri)); + quota_dict_get_meta(entry->dict, contri_key, keylen, &contri); + if (quota_meta_is_null(&contri)) + continue; + + mq_add_meta(&contri_sum, &contri); + } + + gf_dirent_free(&entries); + } + /* Inculde for self */ + contri_sum.dir_count++; + + ret = _mq_get_metadata(this, loc, NULL, &size, 0); + if (ret < 0) + goto out; + + mq_compute_delta(&delta, &contri_sum, &size); + + if (quota_meta_is_null(&delta)) + goto out; + + gf_log(this->name, GF_LOG_INFO, + "calculated size = %" PRId64 ", original size = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.size, size.size, delta.size, loc->path); + + gf_log(this->name, GF_LOG_INFO, + "calculated f_count = %" PRId64 ", original f_count = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.file_count, size.file_count, delta.file_count, loc->path); + + gf_log(this->name, GF_LOG_INFO, + "calculated d_count = %" PRId64 ", original d_count = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.dir_count, size.dir_count, delta.dir_count, loc->path); + + ret = mq_update_size(this, loc, &delta); + if (ret < 0) + goto out; + + updated = _gf_true; + +out: + gf_dirent_free(&entries); + + if (fd) + fd_unref(fd); + + if (xdata) + dict_unref(xdata); + + if (ret < 0) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory + */ + if (ctx) + mq_set_ctx_dirty_status(ctx, _gf_false); + } else if (dirty) { + mq_mark_dirty(this, loc, 0); + } + + if (locked) + mq_lock(this, loc, F_UNLCK); + + if (updated) + mq_initiate_quota_blocking_txn(this, loc, NULL); + + return ret; +} + +int32_t +mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx) +{ + int32_t ret = -1; + gf_boolean_t status = _gf_true; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status); + if (status == _gf_true) + goto out; + + ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc); +out: + if (ret < 0 && status == _gf_false) + mq_set_ctx_dirty_status(ctx, _gf_false); + + return ret; +} + +int32_t +mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution, loc_t *loc, + dict_t *dict) +{ + int32_t ret = -1; + int8_t dirty = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + quota_meta_t delta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + gf_boolean_t status = _gf_false; + + ret = dict_get_int8(dict, QUOTA_DIRTY_KEY, &dirty); + if (ret < 0) { + /* dirty is set only on the first file write operation + * so ignore this error + */ + ret = 0; + dirty = 0; + } + + GET_SIZE_KEY(this, size_key, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + ret = _quota_dict_get_meta(this, dict, size_key, keylen, &size, IA_IFDIR, + _gf_false); + if (ret < 0) + goto create_xattr; + + if (!contribution) + goto create_xattr; + + if (!loc_is_root(loc)) { + GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri, + IA_IFDIR, _gf_false); + if (ret < 0) + goto create_xattr; + + LOCK(&contribution->lock); + { + contribution->contribution = contri.size; + contribution->file_count = contri.file_count; + contribution->dir_count = contri.dir_count; + } + UNLOCK(&contribution->lock); + } + + LOCK(&ctx->lock); + { + ctx->size = size.size; + ctx->file_count = size.file_count; + ctx->dir_count = size.dir_count; + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + + ret = mq_get_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) { + /* If the update txn is in progress abort inspection */ + ret = 0; + goto out; + } + + mq_compute_delta(&delta, &size, &contri); + + if (dirty) { + ret = mq_update_dirty_inode_txn(this, loc, ctx); + goto out; + } + + if (!loc_is_root(loc) && !quota_meta_is_null(&delta)) + mq_initiate_quota_txn(this, loc, NULL); + + ret = 0; + goto out; + +create_xattr: + if (ret < 0) + ret = mq_create_xattrs_txn(this, loc, NULL); + +out: + return ret; +} + +int32_t +mq_inspect_file_xattr(xlator_t *this, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution, loc_t *loc, + dict_t *dict, struct iatt *buf) +{ + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + quota_meta_t delta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + gf_boolean_t status = _gf_false; + + if (!buf || !contribution || !ctx) + goto out; + + LOCK(&ctx->lock); + { + ctx->size = 512 * buf->ia_blocks; + ctx->file_count = 1; + ctx->dir_count = 0; + + size.size = ctx->size; + size.file_count = ctx->file_count; + size.dir_count = ctx->dir_count; + } + UNLOCK(&ctx->lock); + + GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + + ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri, + IA_IFREG, _gf_true); + if (ret < 0) { + ret = mq_create_xattrs_txn(this, loc, NULL); + } else { + LOCK(&contribution->lock); + { + contribution->contribution = contri.size; + contribution->file_count = contri.file_count; + contribution->dir_count = contri.dir_count; + } + UNLOCK(&contribution->lock); + + ret = mq_get_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) { + /* If the update txn is in progress abort inspection */ + ret = 0; + goto out; + } + + mq_compute_delta(&delta, &size, &contri); + if (!quota_meta_is_null(&delta)) + mq_initiate_quota_txn(this, loc, NULL); + } + /* TODO: revist this code when fixing hardlinks */ + +out: + return ret; +} + +int32_t +mq_xattr_state(xlator_t *this, loc_t *origin_loc, dict_t *dict, + struct iatt *buf) +{ + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + loc_t loc = { + 0, + }; + inode_contribution_t *contribution = NULL; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0 || loc.parent == NULL) + goto out; + + if (!loc_is_root(&loc)) { + contribution = mq_add_new_contribution_node(this, ctx, &loc); + if (contribution == NULL) { + if (!gf_uuid_is_null(loc.inode->gfid)) + gf_log(this->name, GF_LOG_WARNING, + "cannot add a new contribution node " + "(%s)", + uuid_utoa(loc.gfid)); + ret = -1; + goto out; + } + if (buf->ia_type == IA_IFDIR) + mq_inspect_directory_xattr(this, ctx, contribution, &loc, dict); + else + mq_inspect_file_xattr(this, ctx, contribution, &loc, dict, buf); + } else { + mq_inspect_directory_xattr(this, ctx, 0, &loc, dict); + } + +out: + loc_wipe(&loc); + + if (contribution) + GF_REF_PUT(contribution); + + return ret; +} + +int32_t +mq_req_xattr(xlator_t *this, loc_t *loc, dict_t *dict, char *contri_key, + char *size_key) +{ + int32_t ret = -1; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", dict, out); + + if (!loc_is_root(loc)) { + ret = mq_dict_set_contribution(this, dict, loc, NULL, contri_key); + if (ret < 0) + goto out; + } + + GET_SIZE_KEY(this, key, ret); + if (ret < 0) + goto out; + if (size_key) + if (snprintf(size_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) { + ret = -1; + goto out; + } + + ret = dict_set_uint64(dict, key, 0); + if (ret < 0) + goto out; + + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, 0); + +out: + if (ret < 0) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "dict set failed"); + return ret; +} + +int32_t +mq_forget(xlator_t *this, quota_inode_ctx_t *ctx) +{ + inode_contribution_t *contri = NULL; + inode_contribution_t *next = NULL; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + + list_for_each_entry_safe(contri, next, &ctx->contribution_head, contri_list) + { + list_del_init(&contri->contri_list); + GF_REF_PUT(contri); + } + + LOCK_DESTROY(&ctx->lock); + GF_FREE(ctx); +out: + return 0; +} diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h new file mode 100644 index 00000000000..4bbf6878b22 --- /dev/null +++ b/xlators/features/marker/src/marker-quota.h @@ -0,0 +1,140 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _MARKER_QUOTA_H +#define _MARKER_QUOTA_H + +#include <glusterfs/xlator.h> +#include "marker-mem-types.h" +#include <glusterfs/refcount.h> +#include <glusterfs/quota-common-utils.h> +#include <glusterfs/call-stub.h> + +#define QUOTA_XATTR_PREFIX "trusted.glusterfs" +#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty" + +#define CONTRIBUTION "contri" +#define QUOTA_KEY_MAX 512 +#define READDIR_BUF 4096 + +#define QUOTA_ALLOC(var, type, ret) \ + do { \ + ret = 0; \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + ret = -1; \ + } \ + } while (0); + +#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + gf_log("", GF_LOG_ERROR, "out of memory"); \ + ret = -1; \ + goto label; \ + } \ + ret = 0; \ + } while (0); + +#define GET_QUOTA_KEY(_this, var, key, _ret) \ + do { \ + marker_conf_t *_priv = _this->private; \ + if (_priv->version > 0) \ + _ret = snprintf(var, QUOTA_KEY_MAX, "%s.%d", key, _priv->version); \ + else \ + _ret = snprintf(var, QUOTA_KEY_MAX, "%s", key); \ + } while (0) + +#define GET_CONTRI_KEY(_this, var, _gfid, _ret) \ + do { \ + char _tmp_var[QUOTA_KEY_MAX] = { \ + 0, \ + }; \ + if (_gfid != NULL) { \ + char _gfid_unparsed[40]; \ + gf_uuid_unparse(_gfid, _gfid_unparsed); \ + _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \ + QUOTA_XATTR_PREFIX ".%s.%s." CONTRIBUTION, \ + "quota", _gfid_unparsed); \ + } else { \ + _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \ + QUOTA_XATTR_PREFIX ".%s.." CONTRIBUTION, "quota"); \ + } \ + GET_QUOTA_KEY(_this, var, _tmp_var, _ret); \ + } while (0) + +#define GET_SIZE_KEY(_this, var, _ret) \ + { \ + GET_QUOTA_KEY(_this, var, QUOTA_SIZE_KEY, _ret); \ + } + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) + +struct quota_inode_ctx { + int64_t size; + int64_t file_count; + int64_t dir_count; + int8_t dirty; + gf_boolean_t create_status; + gf_boolean_t updation_status; + gf_boolean_t dirty_status; + gf_lock_t lock; + struct list_head contribution_head; +}; +typedef struct quota_inode_ctx quota_inode_ctx_t; + +struct quota_synctask { + xlator_t *this; + loc_t loc; + quota_meta_t contri; + gf_boolean_t is_static; + uint32_t ia_nlink; + call_stub_t *stub; +}; +typedef struct quota_synctask quota_synctask_t; + +struct inode_contribution { + struct list_head contri_list; + int64_t contribution; + int64_t file_count; + int64_t dir_count; + uuid_t gfid; + gf_lock_t lock; + GF_REF_DECL; +}; +typedef struct inode_contribution inode_contribution_t; + +int32_t +mq_req_xattr(xlator_t *, loc_t *, dict_t *, char *, char *); + +int32_t +mq_xattr_state(xlator_t *, loc_t *, dict_t *, struct iatt *); + +int +mq_initiate_quota_txn(xlator_t *, loc_t *, struct iatt *); + +int +mq_initiate_quota_blocking_txn(xlator_t *, loc_t *, struct iatt *); + +int +mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf); + +int32_t +mq_reduce_parent_size_txn(xlator_t *, loc_t *, quota_meta_t *, uint32_t nlink, + call_stub_t *stub); + +int32_t +mq_forget(xlator_t *, quota_inode_ctx_t *); +#endif diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index 7111c7cd3cf..1375ccc498c 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1,970 +1,2319 @@ -/*Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "libxlator.h" #include "marker.h" #include "marker-mem-types.h" +#include "marker-quota.h" +#include "marker-quota-helper.h" +#include "marker-common.h" +#include <glusterfs/byte-order.h> +#include <glusterfs/syncop.h> +#include <glusterfs/syscall.h> + +#include <fnmatch.h> + +#define _GF_UID_GID_CHANGED 1 + +static char *mq_ext_xattrs[] = { + QUOTA_SIZE_KEY, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + NULL, +}; void -fini (xlator_t *this); +fini(xlator_t *this); int32_t -marker_start_setxattr (call_frame_t *, xlator_t *); - +marker_start_setxattr(call_frame_t *, xlator_t *); + +/* When client/quotad request for quota xattrs, + * replace the key-name by adding the version number + * in end of the key-name. + * In the cbk, result value of xattrs for original + * key-name. + * Below function marker_key_replace_with_ver and + * marker_key_set_ver is used for setting/removing + * version for the key-name + */ int -marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +marker_key_replace_with_ver(xlator_t *this, dict_t *dict) { - int ret = -1; + int ret = -1; + int i = 0; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; - if (!loc) - return ret; + priv = this->private; - if (inode) { - loc->inode = inode_ref (inode); - loc->ino = inode->ino; - } + if (dict == NULL || priv->version <= 0) { + ret = 0; + goto out; + } - if (parent) - loc->parent = inode_ref (parent); + for (i = 0; mq_ext_xattrs[i]; i++) { + if (dict_get(dict, mq_ext_xattrs[i])) { + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto out; - loc->path = gf_strdup (path); - if (!loc->path) { - gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); - goto loc_wipe; + ret = dict_set(dict, key, dict_get(dict, mq_ext_xattrs[i])); + if (ret < 0) + goto out; + + dict_del(dict, mq_ext_xattrs[i]); } + } - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - else - goto loc_wipe; + ret = 0; + +out: + return ret; +} + +int +marker_key_set_ver(xlator_t *this, dict_t *dict) +{ + int ret = -1; + int i = -1; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + priv = this->private; + + if (dict == NULL || priv->version <= 0) { ret = 0; -loc_wipe: + goto out; + } + + for (i = 0; mq_ext_xattrs[i]; i++) { + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); if (ret < 0) - loc_wipe (loc); + goto out; - return ret; + if (dict_get(dict, key)) + dict_set(dict, mq_ext_xattrs[i], dict_get(dict, key)); + } + + ret = 0; +out: + return ret; +} + +marker_local_t * +marker_local_ref(marker_local_t *local) +{ + GF_VALIDATE_OR_GOTO("marker", local, err); + + LOCK(&local->lock); + { + local->ref++; + } + UNLOCK(&local->lock); + + return local; +err: + return NULL; } int -marker_inode_loc_fill (inode_t *inode, loc_t *loc) +marker_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) { - char *resolvedpath = NULL; - inode_t *parent = NULL; - int ret = -1; + int ret = -1; - if ((!inode) || (!loc)) - return ret; + if (!loc) + return ret; - if ((inode) && (inode->ino == 1)) { - loc->parent = NULL; - goto ignore_parent; + if (inode) { + loc->inode = inode_ref(inode); + if (gf_uuid_is_null(loc->gfid)) { + gf_uuid_copy(loc->gfid, loc->inode->gfid); } + } - parent = inode_parent (inode, 0, NULL); - if (!parent) { - goto err; + if (parent) + loc->parent = inode_ref(parent); + + if (path) { + loc->path = gf_strdup(path); + if (!loc->path) { + gf_log("loc fill", GF_LOG_ERROR, "strdup failed"); + goto loc_wipe; } -ignore_parent: - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) - goto err; + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + } - ret = marker_loc_fill (loc, inode, parent, resolvedpath); - if (ret < 0) - goto err; + ret = 0; +loc_wipe: + if (ret < 0) + loc_wipe(loc); -err: - if (parent) - inode_unref (parent); + return ret; +} - if (resolvedpath) - GF_FREE (resolvedpath); +int +_marker_inode_loc_fill(inode_t *inode, inode_t *parent, char *name, loc_t *loc) +{ + char *resolvedpath = NULL; + int ret = -1; + gf_boolean_t free_parent = _gf_false; + if ((!inode) || (!loc)) return ret; + + if (parent && name) + ret = inode_path(parent, name, &resolvedpath); + else + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) + goto err; + + if (parent == NULL) { + parent = inode_parent(inode, NULL, NULL); + free_parent = _gf_true; + } + + ret = marker_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; + +err: + if (free_parent) + inode_unref(parent); + + GF_FREE(resolvedpath); + + return ret; +} + +int +marker_inode_loc_fill(inode_t *inode, loc_t *loc) +{ + return _marker_inode_loc_fill(inode, NULL, NULL, loc); } int32_t -marker_trav_parent (marker_local_t *local) +marker_trav_parent(marker_local_t *local) { - int32_t ret = 0; - loc_t loc = {0, }; + int32_t ret = 0; + loc_t loc = { + 0, + }; + inode_t *parent = NULL; + int8_t need_unref = 0; + + if (!local->loc.parent) { + parent = inode_parent(local->loc.inode, NULL, NULL); + if (parent) + need_unref = 1; + } else + parent = local->loc.parent; - ret = marker_inode_loc_fill (local->loc.parent, &loc); + ret = marker_inode_loc_fill(parent, &loc); - if (ret == -1) - goto out; + if (ret < 0) { + ret = -1; + goto out; + } - loc_wipe (&local->loc); + loc_wipe(&local->loc); - local->loc = loc; + local->loc = loc; out: - return ret; + if (need_unref) + inode_unref(parent); + + return ret; +} + +void +marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno) +{ + marker_conf_t *priv = (marker_conf_t *)this->private; + const char *path = local ? ((local->loc.path) ? local->loc.path + : uuid_utoa(local->loc.gfid)) + : "<nul>"; + + gf_log(this->name, GF_LOG_CRITICAL, + "Indexing gone corrupt at %s (reason: %s)." + " Geo-replication slave content needs to be revalidated", + path, strerror(op_errno)); + sys_unlink(priv->timestamp_file); } int32_t -marker_error_handler (xlator_t *this) +marker_local_unref(marker_local_t *local) { - marker_conf_t *priv = NULL; + int32_t var = 0; - priv = (marker_conf_t *) this->private; + if (local == NULL) + return -1; - unlink (priv->timestamp_file); + LOCK(&local->lock); + { + var = --local->ref; + } + UNLOCK(&local->lock); - return 0; -} + if (var != 0) + goto out; -int32_t -marker_free_local (marker_local_t *local) -{ - loc_wipe (&local->loc); + loc_wipe(&local->loc); + loc_wipe(&local->parent_loc); + if (local->xdata) + dict_unref(local->xdata); - if (local->oplocal) { - loc_wipe (&local->oplocal->loc); - GF_FREE (local->oplocal); - } - GF_FREE (local); + if (local->lk_frame) { + STACK_DESTROY(local->lk_frame->root); + local->lk_frame = NULL; + } - return 0; + if (local->oplocal) { + marker_local_unref(local->oplocal); + local->oplocal = NULL; + } + mem_put(local); +out: + return 0; } int32_t -stat_stampfile (xlator_t *this, marker_conf_t *priv, struct volume_mark **status) +stat_stampfile(xlator_t *this, marker_conf_t *priv, struct volume_mark **status) { - struct stat buf; - struct volume_mark *vol_mark; + struct stat buf = { + 0, + }; + struct volume_mark *vol_mark = NULL; - vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1, - gf_marker_mt_volume_mark); + vol_mark = GF_CALLOC(sizeof(struct volume_mark), 1, + gf_marker_mt_volume_mark); - vol_mark->major = 1; - vol_mark->minor = 0; + vol_mark->major = 1; + vol_mark->minor = 0; - GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16); - memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16); + GF_ASSERT(sizeof(priv->volume_uuid_bin) == 16); + memcpy(vol_mark->uuid, priv->volume_uuid_bin, 16); - if (stat (priv->timestamp_file, &buf) != -1) { - vol_mark->retval = 0; - vol_mark->sec = htonl (buf.st_ctime); - vol_mark->usec = htonl (ST_CTIM_NSEC (&buf)/1000); - } else - vol_mark->retval = 0; + if (sys_stat(priv->timestamp_file, &buf) != -1) { + vol_mark->retval = 0; + vol_mark->sec = htonl(buf.st_mtime); + vol_mark->usec = htonl(ST_MTIM_NSEC(&buf) / 1000); + } else + vol_mark->retval = 1; - *status = vol_mark; + *status = vol_mark; - return 0; + return 0; } int32_t -marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this, - const char *name, struct volume_mark *vol_mark) +marker_getxattr_stampfile_cbk(call_frame_t *frame, xlator_t *this, + const char *name, struct volume_mark *vol_mark, + dict_t *xdata) { - int32_t ret; - dict_t *dict = NULL; + int32_t ret = -1; + dict_t *dict = NULL; - if (vol_mark == NULL){ - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL); + if (vol_mark == NULL) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); - goto out; - } + goto out; + } - dict = dict_new (); + dict = dict_new(); - ret = dict_set_bin (dict, (char *)name, vol_mark, - sizeof (struct volume_mark)); + ret = dict_set_bin(dict, (char *)name, vol_mark, + sizeof(struct volume_mark)); + if (ret) { + GF_FREE(vol_mark); + gf_log(this->name, GF_LOG_WARNING, "failed to set key %s", name); + } - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict); + STACK_UNWIND_STRICT(getxattr, frame, 0, 0, dict, xdata); - dict_unref (dict); + if (dict) + dict_unref(dict); out: - return 0; + return 0; } -int32_t -call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name) +gf_boolean_t +call_from_special_client(call_frame_t *frame, xlator_t *this, const char *name) { - struct volume_mark *vol_mark = NULL; - marker_conf_t *priv = NULL; - gf_boolean_t ret = _gf_true; + struct volume_mark *vol_mark = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t is_true = _gf_true; - priv = (marker_conf_t *)this->private; + priv = (marker_conf_t *)this->private; - if (frame->root->pid != -1 || name == NULL || - strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { - ret = _gf_false; - goto out; - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || + strcmp(name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { + is_true = _gf_false; + goto out; + } - stat_stampfile (this, priv, &vol_mark); + stat_stampfile(this, priv, &vol_mark); - marker_getxattr_stampfile_cbk (frame, this, name, vol_mark); + marker_getxattr_stampfile_cbk(frame, this, name, vol_mark, NULL); out: - return ret; + return is_true; } -int32_t -marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +static gf_boolean_t +_is_quota_internal_xattr(dict_t *d, char *k, data_t *v, void *data) { - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict); - return 0; + int i = 0; + char **external_xattrs = data; + + for (i = 0; external_xattrs && external_xattrs[i]; i++) { + if (strcmp(k, external_xattrs[i]) == 0) + return _gf_false; + } + + if (fnmatch("trusted.glusterfs.quota*", k, 0) == 0) + return _gf_true; + + /* It would be nice if posix filters pgfid xattrs. But since marker + * also takes up responsibility to clean these up, adding the filtering + * here (Check 'quota_xattr_cleaner') + */ + if (fnmatch(PGFID_XATTR_KEY_PREFIX "*", k, 0) == 0) + return _gf_true; + + return _gf_false; } -int32_t -marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) +static void +marker_filter_internal_xattrs(xlator_t *this, dict_t *xattrs) { - gf_boolean_t ret; + marker_conf_t *priv = NULL; + char **ext = NULL; - gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); + priv = this->private; + if (priv->feature_enabled & GF_QUOTA) + ext = mq_ext_xattrs; - ret = call_from_special_client (frame, this, name); + dict_foreach_match(xattrs, _is_quota_internal_xattr, ext, + dict_remove_foreach_fn, NULL); +} - if (ret == _gf_false) - STACK_WIND (frame, marker_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name); +static void +marker_filter_gsyncd_xattrs(call_frame_t *frame, xlator_t *this, dict_t *xattrs) +{ + marker_conf_t *priv = NULL; - return 0; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(frame); + + if (xattrs && frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_REMOVE_INTERNAL_XATTR(GF_XATTR_XTIME_PATTERN, xattrs); + } + return; } +int32_t +marker_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int32_t ret = -1; + if (op_ret < 0) + goto unwind; + + ret = marker_key_set_ver(this, dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (cookie) { + gf_log(this->name, GF_LOG_DEBUG, + "Filtering the quota extended attributes"); + + /* If the getxattr is from a non special client, then do not + copy the quota related xattrs (except the quota limit key + i.e trusted.glusterfs.quota.limit-set which has been set by + glusterd on the directory on which quota limit is set.) for + directories. Let the healing of xattrs happen upon lookup. + NOTE: setting of trusted.glusterfs.quota.limit-set as of now + happens from glusterd. It should be moved to quotad. Also + trusted.glusterfs.quota.limit-set is set on directory which + is permanent till quota is removed on that directory or limit + is changed. So let that xattr be healed by other xlators + properly whenever directory healing is done. + */ + /* + * Except limit-set xattr, rest of the xattrs are maintained + * by quota xlator. Don't expose them to other xlators. + * This filter makes sure quota xattrs are not healed as part of + * metadata self-heal + */ + marker_filter_internal_xattrs(frame->this, dict); + } + + /* Filter gsyncd xtime xattr for non gsyncd clients */ + marker_filter_gsyncd_xattrs(frame, frame->this, dict); + +unwind: + MARKER_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} int32_t -marker_setxattr_done (call_frame_t *frame) +marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - marker_local_t *local = NULL; + gf_boolean_t is_true = _gf_false; + marker_conf_t *priv = NULL; + unsigned long cookie = 0; + marker_local_t *local = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + int32_t ret = -1; + int32_t i = 0; + + priv = this->private; + + if (name) { + for (i = 0; mq_ext_xattrs[i]; i++) { + if (strcmp(name, mq_ext_xattrs[i])) + continue; + + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto out; + name = key; + break; + } + } - local = (marker_local_t *) frame->local; + frame->local = mem_get0(this->local_pool); + local = frame->local; + if (local == NULL) + goto out; - frame->local = NULL; + MARKER_INIT_LOCAL(frame, local); - STACK_DESTROY (frame->root); + if ((loc_copy(&local->loc, loc)) < 0) + goto out; - marker_free_local (local); + gf_log(this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); - return 0; + if (priv && priv->feature_enabled & GF_XTIME) + is_true = call_from_special_client(frame, this, name); + + if (is_true == _gf_false) { + if (name == NULL) { + /* Signifies that marker translator + * has to filter the quota's xattr's, + * this is to prevent afr from performing + * self healing on marker-quota xattrs' + */ + cookie = 1; + } + STACK_WIND_COOKIE(frame, marker_getxattr_cbk, (void *)cookie, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, + loc, name, xdata); + } + + return 0; +out: + MARKER_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; } -int -marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int32_t +marker_setxattr_done(call_frame_t *frame) { - int32_t ret = 0; - int32_t done = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; - local = (marker_local_t*) frame->local; + local = (marker_local_t *)frame->local; - if (op_ret == -1 && op_errno == ENOSPC) { - marker_error_handler (this); - done = 1; - goto out; - } + frame->local = NULL; - if (strcmp (local->loc.path, "/") == 0) { - done = 1; - goto out; - } + STACK_DESTROY(frame->root); - ret = marker_trav_parent (local); + marker_local_unref(local); - if (ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "Error occured " - "while traversing to the parent, stopping marker"); + return 0; +} - done = 1; +int +marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t ret = 0; + int32_t done = 1; + marker_local_t *local = NULL; - goto out; + local = (marker_local_t *)frame->local; + + if (op_ret == -1 && op_errno == ENOSPC) { + marker_error_handler(this, local, op_errno); + goto out; + } + + if (local) { + if (local->loc.path && strcmp(local->loc.path, "/") == 0) { + goto out; + } + if (__is_root_gfid(local->loc.gfid)) { + goto out; } + } - marker_start_setxattr (frame, this); + ret = (local) ? marker_trav_parent(local) : -1; + if (ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, + "Error occurred " + "while traversing to the parent, stopping marker"); + goto out; + } + + marker_start_setxattr(frame, this); + done = 0; out: - if (done) { - marker_setxattr_done (frame); - } + if (done) { + marker_setxattr_done(frame); + } - return 0; + return 0; } int32_t -marker_start_setxattr (call_frame_t *frame, xlator_t *this) +marker_start_setxattr(call_frame_t *frame, xlator_t *this) { - int32_t ret = 0; - dict_t *dict = NULL; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = -1; + dict_t *dict = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - local = (marker_local_t*) frame->local; + local = (marker_local_t *)frame->local; - dict = dict_new (); + if (!local) + goto out; - ret = dict_set_static_bin (dict, priv->marker_xattr, - (void *)local->timebuf, 8); + dict = dict_new(); - gf_log (this->name, GF_LOG_DEBUG, "path = %s", local->loc.path); + if (!dict) + goto out; - STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0); + if (local->loc.inode && gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, local->loc.inode->gfid); - dict_unref (dict); + GF_UUID_ASSERT(local->loc.gfid); - return 0; + ret = dict_set_static_bin(dict, priv->marker_xattr, (void *)local->timebuf, + 8); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set marker xattr (%s)", + local->loc.path); + goto out; + } + + STACK_WIND(frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0, NULL); + + ret = 0; +out: + if (dict) + dict_unref(dict); + + return ret; } void -marker_gettimeofday (marker_local_t *local) +marker_gettimeofday(marker_local_t *local) { - struct timeval tv; + struct timeval tv = { + 0, + }; - gettimeofday (&tv, NULL); + gettimeofday(&tv, NULL); - local->timebuf [0] = htonl (tv.tv_sec); - local->timebuf [1] = htonl (tv.tv_usec); + local->timebuf[0] = htonl(tv.tv_sec); + local->timebuf[1] = htonl(tv.tv_usec); - return; + return; } int32_t -marker_create_frame (xlator_t *this, marker_local_t *local) +marker_create_frame(xlator_t *this, marker_local_t *local) { - call_frame_t *frame = NULL; + call_frame_t *frame = NULL; - frame = create_frame (this, this->ctx->pool); + frame = create_frame(this, this->ctx->pool); - frame->local = (void *) local; + if (!frame) + return -1; - marker_start_setxattr (frame, this); + frame->local = (void *)local; - return 0; + marker_start_setxattr(frame, this); + + return 0; } int32_t -update_marks (xlator_t *this, marker_local_t *local, int32_t ret) +marker_xtime_update_marks(xlator_t *this, marker_local_t *local) { - if (ret == -1 || local->pid < 0) - marker_free_local (local); - else { - marker_gettimeofday (local); + marker_conf_t *priv = NULL; - marker_create_frame (this, local); - } + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); - return 0; + priv = this->private; + + if ((local->pid == GF_CLIENT_PID_GSYNCD && + !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE)) || + (local->pid == GF_CLIENT_PID_DEFRAG)) + goto out; + + marker_gettimeofday(local); + + marker_local_ref(local); + + marker_create_frame(this, local); +out: + return 0; } int32_t -marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "error occurred " - "while Creating a file %s", strerror (op_errno)); - ret = -1; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while creating directory %s", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + priv = this->private; + + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; } + } - local = (marker_local_t *) frame->local; + STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - frame->local = NULL; + if (op_ret == -1 || local == NULL) + goto out; - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent); + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - update_marks (this, local, ret); + if (priv->feature_enabled & GF_QUOTA) + mq_create_xattrs_txn(this, &local->loc, NULL); - return 0; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + +out: + marker_local_unref(local); + + return 0; } int -marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dict_t *params) +marker_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, params); + ret = loc_copy(&local->loc, loc); - return 0; + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + + return 0; err: - STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL); - return 0; + MARKER_STACK_UNWIND(mkdir, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + + return 0; } int32_t -marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) -{ - int32_t ret = 0; - marker_local_t *local = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "error occurred " - "while Creating a file %s", strerror (op_errno)); - ret = -1; +marker_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while creating file %s", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + priv = this->private; + + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; } + } - local = (marker_local_t *) frame->local; + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); - frame->local = NULL; + if (op_ret == -1 || local == NULL) + goto out; - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - update_marks (this, local, ret); + if (priv->feature_enabled & GF_QUOTA) + mq_create_xattrs_txn(this, &local->loc, buf); - return 0; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + +out: + marker_local_unref(local); + + return 0; } int32_t -marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) +marker_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, fd, - params); - return 0; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; err: - STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } int32_t -marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +marker_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "error occurred " - "while write, %s", strerror (op_errno)); - ret = -1; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while write, %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + +out: + marker_local_unref(local); + + return 0; } int32_t -marker_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t offset, - struct iobref *iobref) +marker_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = marker_inode_loc_fill (fd->inode, &local->loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - iobref); - return 0; + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; err: - STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(writev, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; +} + +int32_t +marker_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + call_stub_t *stub = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "rmdir %s", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + priv = this->private; + + if (op_ret == -1 || local == NULL) + goto out; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + + if (priv->feature_enabled & GF_QUOTA) { + /* If a 'rm -rf' is performed by a client, rmdir can be faster + than marker background mq_reduce_parent_size_txn. + In this case, as part of rmdir parent child association + will be removed in the server protocol. + This can lead to mq_reduce_parent_size_txn failures. + + So perform mq_reduce_parent_size_txn in foreground + and unwind to server once txn is complete + */ + + stub = fop_rmdir_cbk_stub(frame, default_rmdir_cbk, op_ret, op_errno, + preparent, postparent, xdata); + mq_reduce_parent_size_txn(this, &local->loc, NULL, 1, stub); + + if (stub) { + marker_local_unref(local); + return 0; + } + } + +out: + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent, + xdata); + + marker_local_unref(local); + + return 0; } int32_t -marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) +marker_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0(this->local_pool); + + MARKER_INIT_LOCAL(frame, local); + + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + return 0; +err: + MARKER_STACK_UNWIND(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "error occurred " - "rmdir %s", strerror (op_errno)); - ret = -1; + return 0; +} + +int32_t +marker_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + uint32_t nlink = -1; + GF_UNUSED int32_t ret = 0; + call_stub_t *stub = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred in unlink", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + priv = this->private; + + if (op_ret == -1 || local == NULL) + goto out; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); + + if (priv->feature_enabled & GF_QUOTA) { + if (local->skip_txn) + goto out; + + if (xdata) { + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret) { + gf_log(this->name, GF_LOG_TRACE, "dict get failed %s ", + strerror(-ret)); + } } - local = (marker_local_t *) frame->local; + /* If a 'rm -rf' is performed by a client, unlink can be faster + than marker background mq_reduce_parent_size_txn. + In this case, as part of unlink parent child association + will be removed in the server protocol. + This can lead to mq_reduce_parent_size_txn failures. - frame->local = NULL; + So perform mq_reduce_parent_size_txn in foreground + and unwind to server once txn is complete + */ - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent); + stub = fop_unlink_cbk_stub(frame, default_unlink_cbk, op_ret, op_errno, + preparent, postparent, xdata); + mq_reduce_parent_size_txn(this, &local->loc, NULL, nlink, stub); - update_marks (this, local, ret); + if (stub) { + marker_local_unref(local); + return 0; + } + } - return 0; +out: + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + + marker_local_unref(local); + + return 0; } int32_t -marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) +marker_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t dict_free = _gf_false; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto unlink_wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); + local->xflag = xflag; + if (xdata) + local->xdata = dict_ref(xdata); + MARKER_INIT_LOCAL(frame, local); - if (ret == -1) - goto err; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; + + if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) { + local->skip_txn = 1; + goto unlink_wind; + } + + if (xdata == NULL) { + xdata = dict_new(); + dict_free = _gf_true; + } + + ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + if (ret < 0) + goto err; + +unlink_wind: + STACK_WIND(frame, marker_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + goto out; - STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, flags); - return 0; err: - STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; +out: + if (dict_free) + dict_unref(xdata); + return 0; } int32_t -marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) +marker_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "%s occurred in unlink", strerror (op_errno)); + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "linking a file ", + strerror(op_errno)); + } - ret = -1; - } + local = (marker_local_t *)frame->local; - local = (marker_local_t *) frame->local; + frame->local = NULL; - frame->local = NULL; + STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent); + if (op_ret == -1 || local == NULL) + goto out; - update_marks (this, local, ret); + priv = this->private; - return 0; + if (priv->feature_enabled & GF_QUOTA) { + if (!local->skip_txn) + mq_create_xattrs_txn(this, &local->loc, buf); + } + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int32_t -marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +marker_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc); - return 0; + ret = loc_copy(&local->loc, newloc); + + if (ret == -1) + goto err; + + if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) + local->skip_txn = 1; +wind: + STACK_WIND(frame, marker_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; err: - STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } int32_t -marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_rename_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL, *oplocal = NULL; + loc_t newloc = { + 0, + }; + marker_conf_t *priv = NULL; + + local = frame->local; + oplocal = local->oplocal; + + priv = this->private; + + frame->local = NULL; + + if (op_ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)", + oplocal->parent_loc.path, + uuid_utoa(oplocal->parent_loc.inode->gfid), strerror(op_errno)); + } + + if (local->err != 0) + goto err; + + mq_reduce_parent_size_txn(this, &oplocal->loc, &oplocal->contribution, -1, + NULL); + + if (local->loc.inode != NULL) { + /* If destination file exits before rename, it would have + * been unlinked while renaming a file + */ + mq_reduce_parent_size_txn(this, &local->loc, NULL, local->ia_nlink, + NULL); + } + + newloc.inode = inode_ref(oplocal->loc.inode); + newloc.path = gf_strdup(local->loc.path); + newloc.name = strrchr(newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref(local->loc.parent); + + mq_create_xattrs_txn(this, &newloc, &local->buf); + + loc_wipe(&newloc); + + if (priv->feature_enabled & GF_XTIME) { + if (!local->loc.inode) + local->loc.inode = inode_ref(oplocal->loc.inode); + // update marks on oldpath + gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks(this, oplocal); + marker_xtime_update_marks(this, local); + } + +err: + marker_local_unref(local); + marker_local_unref(oplocal); + + return 0; +} + +void +marker_rename_release_oldp_lock(marker_local_t *local, xlator_t *this) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + call_frame_t *lk_frame = NULL; + struct gf_flock lock = { + 0, + }; + + oplocal = local->oplocal; + lk_frame = local->lk_frame; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "linking a file ", strerror (op_errno)); - ret = -1; + if (lk_frame == NULL) + goto err; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; + + STACK_WIND(lk_frame, marker_rename_done, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, this->name, + &oplocal->parent_loc, F_SETLKW, &lock, NULL); + + return; + +err: + marker_local_unref(local); + marker_local_unref(oplocal); +} + +int32_t +marker_rename_unwind(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contri = NULL; + + local = frame->local; + oplocal = local->oplocal; + frame->local = NULL; + + // Reset frame uid and gid if set. + if (cookie == (void *)_GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID(frame, frame->root, local); + + if (op_ret < 0) + local->err = op_errno ? op_errno : EINVAL; + + if (local->stub != NULL) { + /* Remove contribution node from in-memory even if + * remove-xattr has failed as the rename is already performed + * if local->stub is set, which means rename was successful + */ + (void)mq_inode_ctx_get(oplocal->loc.inode, this, &ctx); + if (ctx) { + contri = mq_get_contribution_node(oplocal->loc.parent, ctx); + if (contri) { + QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri); + GF_REF_PUT(contri); + } } - local = (marker_local_t *) frame->local; + call_resume(local->stub); + local->stub = NULL; + local->err = 0; + } else if (local->err != 0) { + STACK_UNWIND_STRICT(rename, frame, -1, local->err, NULL, NULL, NULL, + NULL, NULL, NULL); + } else { + gf_log(this->name, GF_LOG_CRITICAL, + "continuation stub to unwind the call is absent, hence " + "call will be hung (call-stack id = %" PRIu64 ")", + frame->root->unique); + } + + /* If there are in-progress writes on old-path when during rename + * operation, update txn will update the wrong path if lock + * is released before rename unwind. + * So release lock only after rename unwind + */ + marker_rename_release_oldp_lock(local, this); + + return 0; +} +int32_t +marker_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + call_stub_t *stub = NULL; + int32_t ret = 0; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + loc_t newloc = { + 0, + }; + + local = (marker_local_t *)frame->local; + + if (local != NULL) { + oplocal = local->oplocal; + } + + priv = this->private; + + if (op_ret < 0) { + if (local != NULL) { + local->err = op_errno; + } + + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "renaming a file ", + strerror(op_errno)); + } + + if (priv->feature_enabled & GF_QUOTA) { + if ((op_ret < 0) || (local == NULL)) { + goto quota_err; + } + + local->ia_nlink = 0; + if (xdata) + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, + &local->ia_nlink); + + local->buf = *buf; + stub = fop_rename_cbk_stub(frame, default_rename_cbk, op_ret, op_errno, + buf, preoldparent, postoldparent, + prenewparent, postnewparent, xdata); + if (stub == NULL) { + local->err = ENOMEM; + goto quota_err; + } + + local->stub = stub; + + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = ENOMEM; + goto quota_err; + } + + /* Removexattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID(frame, local, frame->root); + + newloc.inode = inode_ref(oplocal->loc.inode); + newloc.path = gf_strdup(local->loc.path); + newloc.name = strrchr(newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref(local->loc.parent); + gf_uuid_copy(newloc.gfid, oplocal->loc.inode->gfid); + + STACK_WIND_COOKIE( + frame, marker_rename_unwind, frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &newloc, contri_key, NULL); + + loc_wipe(&newloc); + } else { frame->local = NULL; - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); - update_marks (this, local, ret); + if ((op_ret < 0) || (local == NULL)) { + goto out; + } - return 0; + if (priv->feature_enabled & GF_XTIME) { + // update marks on oldpath + if (!local->loc.inode) + local->loc.inode = inode_ref(oplocal->loc.inode); + gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks(this, oplocal); + marker_xtime_update_marks(this, local); + } + } + +out: + if (!(priv->feature_enabled & GF_QUOTA)) { + marker_local_unref(local); + marker_local_unref(oplocal); + } + + return 0; + +quota_err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; } int32_t -marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +marker_do_rename(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + quota_meta_t contribution = { + 0, + }; + + local = frame->local; + oplocal = local->oplocal; + + // Reset frame uid and gid if set. + if (cookie == (void *)_GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID(frame, frame->root, local); + + if ((op_ret < 0) && (op_errno != ENOATTR) && (op_errno != ENODATA)) { + local->err = op_errno ? op_errno : EINVAL; + gf_log(this->name, GF_LOG_WARNING, + "fetching contribution values from %s (gfid:%s) " + "failed (%s)", + oplocal->loc.path, uuid_utoa(oplocal->loc.inode->gfid), + strerror(op_errno)); + goto err; + } + + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, keylen); + if (keylen < 0) { + local->err = errno ? errno : ENOMEM; + goto err; + } + quota_dict_get_meta(dict, contri_key, keylen, &contribution); + oplocal->contribution = contribution; + + STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &oplocal->loc, &local->loc, + local->xdata); + + return 0; - ALLOCATE_OR_GOTO (local, marker_local_t, err); +err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; +} - MARKER_INIT_LOCAL (frame, local); +int32_t +marker_get_oldpath_contribution(call_frame_t *lk_frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + call_frame_t *frame = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int32_t ret = 0; + + local = lk_frame->local; + oplocal = local->oplocal; + frame = local->frame; + + if (op_ret < 0) { + local->err = op_errno ? op_errno : EINVAL; + gf_log(this->name, GF_LOG_WARNING, + "cannot hold inodelk on %s (gfid:%s) (%s)", oplocal->loc.path, + uuid_utoa(oplocal->loc.inode->gfid), strerror(op_errno)); + if (local->lk_frame) { + STACK_DESTROY(local->lk_frame->root); + local->lk_frame = NULL; + } + goto err; + } - ret = loc_copy (&local->loc, newloc); + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno ? errno : ENOMEM; + goto err; + } - if (ret == -1) - goto err; + /* getxattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID(frame, local, frame->root); - STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc); - return 0; -err: - STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); + if (gf_uuid_is_null(oplocal->loc.gfid)) + gf_uuid_copy(oplocal->loc.gfid, oplocal->loc.inode->gfid); - return 0; + GF_UUID_ASSERT(oplocal->loc.gfid); + + STACK_WIND_COOKIE(frame, marker_do_rename, frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, &oplocal->loc, + contri_key, NULL); + + return 0; +err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; } +/* For a marker_rename FOP, following is the algorithm used for Quota + * accounting. The use-case considered is: + * 1. rename (src, dst) + * 2. both src and dst exist + * 3. there are parallel operations on src and dst (lets say through fds + * opened on them before rename was initiated). + * + * PS: We've not thought through whether this algo works in the presence of + * hardlinks to src and/or dst. + * + * Algorithm: + * ========== + * + * 1) set inodelk on src-parent + * As part of rename operation, parent can change for the file. + * We need to remove contribution (both on disk xattr and in-memory one) + * to src-parent (and its ancestors) and add the contribution to dst-parent + * (and its ancestors). While we are doing these operations, contribution of + * the file/directory shouldn't be changing as we want to be sure that + * a) what we subtract from src-parent is exactly what we add to dst-parent + * b) we should subtract from src-parent exactly what we contributed to + * src-parent + * So, We hold a lock on src-parent to block any parallel transcations on + * src-inode (since that's the one which survives rename). + * + * If there are any parallel transactions on dst-inode they keep succeeding + * till the association of dst-inode with dst-parent is broken because of an + * inode_rename after unwind of rename fop from marker. Only after unwind + * (and hence inode_rename), we delete and subtract the contribution of + * dst-inode to dst-parent. That way we are making sure we subtract exactly + * what dst-inode contributed to dst-parent. + * + * 2) lookup contribution to src-parent on src-inode. + * We need to save the contribution info for use at step-8. + * + * 3) wind rename + * Perform rename on disk + * + * 4) remove xattr on src-loc + * After rename, parent can change, so + * need to remove xattrs storing contribution to src-parent. + * + * 5) remove contribution node corresponding to src-parent from the in-memory + * list. + * After rename, contri gfid can change and we have + * also removed xattr from file. + * We need to remove in-memory contribution node to prevent updations to + * src-parent even after a successful rename + * + * 6) unwind rename + * This will ensure that rename is done in the server + * inode table. An inode_rename disassociates src-inode from src-parent and + * associates it with dst-parent. It also disassociates dst-inode from + * dst-parent. After inode_rename, inode_parent on src-inode will give + * dst-parent and inode_parent on dst-inode will return NULL (assuming + * dst-inode doesn't have any hardlinks). + * + * 7) release inodelk on src-parent + * Lock on src-parent should be released only after + * rename on disk, remove xattr and rename_unwind (and hence inode_rename) + * operations. If lock is released before inode_rename, a parallel + * transaction on src-inode can still update src-parent (as inode_parent on + * src-inode can still return src-parent). This would make the + * contribution from src-inode to src-parent stored in step-2 stale. + * + * 8) Initiate mq_reduce_parent_size_txn on src-parent to remove contribution + * of src-inode to src-parent. We use the contribution stored in step-2. + * Since, we had acquired the lock on src-parent all along step-2 through + * inode_rename, we can be sure that a parallel transaction wouldn't have + * added a delta to src-parent. + * + * 9) Initiate mq_reduce_parent_size_txn on dst-parent if dst-inode exists. + * The size reduced from dst-parent and its ancestors is the + * size stored as contribution to dst-parent in dst-inode. + * If the destination file had existed, rename will unlink the + * destination file as part of its operation. + * We need to reduce the size on the dest parent similarly to + * unlink. Since, we are initiating reduce-parent-size transaction after + * inode_rename, we can be sure that a parallel transaction wouldn't add + * delta to dst-parent while we are reducing the contribution of dst-inode + * from its ancestors before rename. + * + * 10) create contribution xattr to dst-parent on src-inode. + */ int32_t -marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) -{ - int32_t ret = 0; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "renaming a file ", strerror (op_errno)); - ret = -1; - } +marker_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + marker_conf_t *priv = NULL; + struct gf_flock lock = { + 0, + }; - local = (marker_local_t *) frame->local; + priv = this->private; - frame->local = NULL; + if (priv->feature_enabled == 0) + goto rename_wind; - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent); + local = mem_get0(this->local_pool); - oplocal = local->oplocal; - local->oplocal = NULL; + MARKER_INIT_LOCAL(frame, local); - //update marks on oldpath - update_marks (this, oplocal, ret); - update_marks (this, local, ret); + oplocal = mem_get0(this->local_pool); - return 0; + MARKER_INIT_LOCAL(frame, oplocal); + + frame->local = local; + + local->oplocal = marker_local_ref(oplocal); + + ret = loc_copy(&local->loc, newloc); + if (ret < 0) + goto err; + + ret = loc_copy(&oplocal->loc, oldloc); + if (ret < 0) + goto err; + + if (!(priv->feature_enabled & GF_QUOTA)) { + goto rename_wind; + } + + ret = mq_inode_loc_fill(NULL, newloc->parent, &local->parent_loc); + if (ret < 0) + goto err; + + ret = mq_inode_loc_fill(NULL, oldloc->parent, &oplocal->parent_loc); + if (ret < 0) + goto err; + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + local->xdata = xdata ? dict_ref(xdata) : dict_new(); + ret = dict_set_int32(local->xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + if (ret < 0) + goto err; + + local->frame = frame; + local->lk_frame = create_frame(this, this->ctx->pool); + if (local->lk_frame == NULL) + goto err; + + local->lk_frame->root->uid = 0; + local->lk_frame->root->gid = 0; + local->lk_frame->local = local; + set_lk_owner_from_ptr(&local->lk_frame->root->lk_owner, + local->lk_frame->root); + + STACK_WIND(local->lk_frame, marker_get_oldpath_contribution, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk, this->name, + &oplocal->parent_loc, F_SETLKW, &lock, NULL); + + return 0; + +rename_wind: + STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + + return 0; +err: + MARKER_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, + NULL); + marker_local_unref(oplocal); + + return 0; } int32_t -marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) +marker_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "truncating a file ", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) { + /* DHT Rebalance process, at the end of migration will + * first make the src file as a linkto file and then + * truncate the file. By doing a truncate after making the + * src file as linkto file, the contri which is already + * accounted is left over. + * So, we need to account for the linkto file when a truncate + * happens, thereby updating the contri properly. + * By passing NULL for postbuf, mq_prevalidate does not check + * for linkto file. + * Same happens with ftruncate as well. + */ + if (postbuf && IS_DHT_LINKFILE_MODE(postbuf)) + mq_initiate_quota_txn(this, &local->loc, NULL); + else + mq_initiate_quota_txn(this, &local->loc, postbuf); + } - ALLOCATE_OR_GOTO (local, marker_local_t, err); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); - MARKER_INIT_LOCAL (frame, local); +out: + marker_local_unref(local); - ALLOCATE_OR_GOTO (oplocal, marker_local_t, err); + return 0; +} - MARKER_INIT_LOCAL (frame, oplocal); +int32_t +marker_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - frame->local = local; + priv = this->private; - local->oplocal = oplocal; + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, newloc); - if (ret == -1) - goto err; + local = mem_get0(this->local_pool); - ret = loc_copy (&oplocal->loc, oldloc); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc); - return 0; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; err: - STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int32_t -marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +marker_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "truncating a file ", strerror (op_errno)); - ret = -1; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "truncating a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) { + if (postbuf && IS_DHT_LINKFILE_MODE(postbuf)) + mq_initiate_quota_txn(this, &local->loc, NULL); + else + mq_initiate_quota_txn(this, &local->loc, postbuf); + } + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int32_t -marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +marker_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - return 0; + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; err: - STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int32_t -marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +marker_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "truncating a file ", strerror (op_errno)); - ret = -1; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "creating symlinks ", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + priv = this->private; + + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; } + } - local = (marker_local_t *) frame->local; + STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - frame->local = NULL; + if (op_ret == -1 || local == NULL) + goto out; - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - update_marks (this, local, ret); + if (priv->feature_enabled & GF_QUOTA) { + mq_create_xattrs_txn(this, &local->loc, buf); + } - return 0; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } -int32_t -marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +int +marker_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = marker_inode_loc_fill (fd->inode, &local->loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - return 0; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata); + return 0; err: - STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(symlink, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } int32_t -marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) -{ - int32_t ret = 0; - marker_local_t *local = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; +marker_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred with " + "mknod ", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + priv = this->private; + + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; } + } - local = (marker_local_t *) frame->local; + STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - frame->local = NULL; + if (op_ret == -1 || local == NULL) + goto out; - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - update_marks (this, local, ret); + if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG(local->mode))) { + mq_create_xattrs_txn(this, &local->loc, buf); + } - return 0; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int -marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, dict_t *params) +marker_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkpath, loc, params); - return 0; + ret = loc_copy(&local->loc, loc); + + local->mode = mode; + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; err: - STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL); - return 0; + MARKER_STACK_UNWIND(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + + return 0; } int32_t -marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +marker_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "fallocating a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, - buf, preparent, postparent); + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } -int -marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *parms) +int32_t +marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, parms); - return 0; + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; err: - STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL); - return 0; + MARKER_STACK_UNWIND(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + +int32_t +marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during discard", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; +} + +int32_t +marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0(this->local_pool); + + MARKER_INIT_LOCAL(frame, local); + + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +err: + MARKER_STACK_UNWIND(discard, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + +int32_t +marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during zerofill", + strerror(op_errno)); + } + + local = (marker_local_t *)frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; +} + +int32_t +marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0(this->local_pool); + + MARKER_INIT_LOCAL(frame, local); + + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +err: + MARKER_STACK_UNWIND(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; } /* when a call from the special client is received on @@ -974,464 +2323,1246 @@ err: * timestamp file. */ int32_t -call_from_sp_client_to_reset_tmfile (call_frame_t *frame, - xlator_t *this, - dict_t *dict) +call_from_sp_client_to_reset_tmfile(call_frame_t *frame, xlator_t *this, + dict_t *dict) { - int32_t ret = 0; - int32_t op_ret = 0; - int32_t op_errno = 0; - data_t *data = NULL; - marker_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; + int32_t fd = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + data_t *data = NULL; + marker_conf_t *priv = NULL; - if (frame == NULL || this == NULL || dict == NULL) - return -1; + if (frame == NULL || this == NULL || dict == NULL) + return -1; - priv = this->private; + priv = this->private; - data = dict_get (dict, "trusted.glusterfs.volume-mark"); - if (data == NULL) - return -1; + data = dict_get(dict, "trusted.glusterfs.volume-mark"); + if (data == NULL) + return -1; - if (frame->root->pid != -1) { - op_ret = -1; - op_errno = EPERM; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + op_ret = -1; + op_errno = EPERM; + + goto out; + } + + if (data->len == 0 || + (data->len == 5 && memcmp(data->data, "RESET", 5) == 0)) { + fd = open(priv->timestamp_file, O_WRONLY | O_TRUNC); + if (fd != -1) { + /* TODO check whether the O_TRUNC would update the + * timestamps on a zero length file on all machies. + */ + sys_close(fd); + } - goto out; + if (fd != -1 || errno == ENOENT) { + op_ret = 0; + op_errno = 0; + } else { + op_ret = -1; + op_errno = errno; } + } else { + op_ret = -1; + op_errno = EINVAL; + } +out: + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL); - if (data->len == 0 || (data->len == 5 && - memcmp (data->data, "RESET", 5) == 0)) { + return 0; +} - snprintf (cmd_str, 8192,"touch %s", priv->timestamp_file); - ret = system (cmd_str); +int32_t +marker_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (-1 == ret) { - gf_log (this->name, GF_LOG_ERROR, - "Could not touch TimeStamp file of marker"); - op_ret = -1; - op_errno = errno; - goto out; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "setxattr ", + strerror(op_errno)); + } - } else { - op_ret = -1; - op_errno = EINVAL; - } + local = (marker_local_t *)frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + marker_local_unref(local); - return 0; + return 0; } -int32_t -marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int +remove_quota_keys(dict_t *dict, char *k, data_t *v, void *data) { - int32_t ret = 0; - marker_local_t *local = NULL; + call_frame_t *frame = data; + marker_local_t *local = frame->local; + xlator_t *this = frame->this; + marker_conf_t *priv = NULL; + char ver_str[NAME_MAX] = { + 0, + }; + char *dot = NULL; + int ret = -1; + + priv = this->private; + + /* If quota is enabled immediately after disable. + * quota healing starts creating new xattrs + * before completing the cleanup operation. + * So we should check if the xattr is the new. + * Do not remove xattr if its xattr + * version is same as current version + */ + if ((priv->feature_enabled & GF_QUOTA) && priv->version > 0) { + snprintf(ver_str, sizeof(ver_str), ".%d", priv->version); + dot = strrchr(k, '.'); + if (dot && !strcmp(dot, ver_str)) + return 0; + } + + ret = syncop_removexattr(FIRST_CHILD(this), &local->loc, k, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "%s: Failed to remove " + "extended attribute: %s", + local->loc.path, k); + return -1; + } + return 0; +} - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; - } +int +quota_xattr_cleaner_cbk(int ret, call_frame_t *frame, void *args) +{ + dict_t *xdata = args; + int op_ret = -1; + int op_errno = 0; - local = (marker_local_t *) frame->local; + op_ret = (ret < 0) ? -1 : 0; + op_errno = -ret; - frame->local = NULL; + MARKER_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return ret; +} - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); +int +quota_xattr_cleaner(void *args) +{ + struct synctask *task = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + marker_local_t *local = NULL; + dict_t *xdata = NULL; + int ret = -1; + + task = synctask_get(); + if (!task) + goto out; + + frame = task->frame; + this = frame->this; + local = frame->local; + + ret = syncop_listxattr(FIRST_CHILD(this), &local->loc, &xdata, NULL, NULL); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = dict_foreach_fnmatch(xdata, "trusted.glusterfs.quota.*", + remove_quota_keys, frame); + if (ret == -1) { + ret = -errno; + goto out; + } + ret = dict_foreach_fnmatch(xdata, PGFID_XATTR_KEY_PREFIX "*", + remove_quota_keys, frame); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = 0; +out: + if (xdata) + dict_unref(xdata); - update_marks (this, local, ret); + return ret; +} - return 0; +int +marker_do_xattr_cleanup(call_frame_t *frame, xlator_t *this, dict_t *xdata, + loc_t *loc) +{ + int ret = -1; + marker_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + MARKER_INIT_LOCAL(frame, local); + + loc_copy(&local->loc, loc); + ret = synctask_new(this->ctx->env, quota_xattr_cleaner, + quota_xattr_cleaner_cbk, frame, xdata); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to create synctask " + "for cleaning up quota extended attributes"); + goto out; + } + + ret = 0; +out: + if (ret) + MARKER_STACK_UNWIND(setxattr, frame, -1, ENOMEM, xdata); + + return ret; +} + +static gf_boolean_t +marker_xattr_cleanup_cmd(dict_t *dict) +{ + return (dict_get(dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL); } int32_t -marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags) +marker_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + int op_errno = ENOMEM; + + priv = this->private; + + if (marker_xattr_cleanup_cmd(dict)) { + if (frame->root->uid != 0 || frame->root->gid != 0) { + op_errno = EPERM; + ret = -1; + goto err; + } + + /* The following function does the cleanup and then unwinds the + * corresponding call*/ + loc_path(loc, NULL); + marker_do_xattr_cleanup(frame, this, xdata, loc); + return 0; + } - ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); - if (ret == 0) - return 0; + ret = marker_key_replace_with_ver(this, dict); + if (ret < 0) + goto err; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + if (priv->feature_enabled == 0) + goto wind; - MARKER_INIT_LOCAL (frame, local); + ret = call_from_sp_client_to_reset_tmfile(frame, this, dict); + if (ret == 0) + return 0; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags); - return 0; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; err: - STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM); + MARKER_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } int32_t -marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "fsetxattr", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int32_t -marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags) +marker_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); - if (ret == 0) - return 0; + priv = this->private; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + if (priv->feature_enabled == 0) + goto wind; - MARKER_INIT_LOCAL (frame, local); + ret = call_from_sp_client_to_reset_tmfile(frame, this, dict); + if (ret == 0) + return 0; - ret = marker_inode_loc_fill (fd->inode, &local->loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags); - return 0; + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; err: - STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM); + MARKER_STACK_UNWIND(fsetxattr, frame, -1, ENOMEM, NULL); - return 0; + return 0; } int32_t -marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost) +marker_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "fsetattr ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre, - statpost); + STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int32_t -marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) +marker_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = marker_inode_loc_fill (fd->inode, &local->loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid); - return 0; + ret = marker_inode_loc_fill(fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; err: - STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int32_t -marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost) +marker_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; - } + local = (marker_local_t *)frame->local; - local = (marker_local_t *) frame->local; + frame->local = NULL; - frame->local = NULL; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during setattr of %s", + strerror(op_errno), (local ? local->loc.path : "<nul>")); + } - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, - statpost); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int32_t -marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) +marker_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - ALLOCATE_OR_GOTO (local, marker_local_t, err); + priv = this->private; - MARKER_INIT_LOCAL (frame, local); + if (priv->feature_enabled == 0) + goto wind; - ret = loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); - if (ret == -1) - goto err; + MARKER_INIT_LOCAL(frame, local); - STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid); - return 0; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; err: - STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL); + MARKER_STACK_UNWIND(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int32_t -marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +marker_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s occured while " - "creating symlinks ", strerror (op_errno)); - ret = -1; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "removing extended attribute", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); - update_marks (this, local, ret); + if (op_ret == -1 || local == NULL) + goto out; - return 0; + priv = this->private; + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); +out: + marker_local_unref(local); + + return 0; } int32_t -marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) +marker_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; + int32_t ret = -1; + int32_t i = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + priv = this->private; + + if (name) { + for (i = 0; mq_ext_xattrs[i]; i++) { + if (strcmp(name, mq_ext_xattrs[i])) + continue; + + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto err; + name = key; + break; + } + } - ALLOCATE_OR_GOTO (local, marker_local_t, err); + if (priv->feature_enabled == 0) + goto wind; - MARKER_INIT_LOCAL (frame, local); + local = mem_get0(this->local_pool); - ret = loc_copy (&local->loc, loc); + MARKER_INIT_LOCAL(frame, local); - if (ret == -1) - goto err; + ret = loc_copy(&local->loc, loc); - STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name); - return 0; + if (ret == -1) + goto err; +wind: + STACK_WIND(frame, marker_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; err: - STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM); + MARKER_STACK_UNWIND(removexattr, frame, -1, ENOMEM, NULL); - return 0; + return 0; +} + +static gf_boolean_t +__has_quota_xattrs(dict_t *xattrs) +{ + if (dict_foreach_match(xattrs, _is_quota_internal_xattr, NULL, + dict_null_foreach_fn, NULL) > 0) + return _gf_true; + + return _gf_false; } int32_t -mem_acct_init (xlator_t *this) +marker_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) { - int ret = -1; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + dict_t *xattrs = NULL; + quota_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + + priv = this->private; + local = (marker_local_t *)frame->local; + frame->local = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "lookup failed with %s", + strerror(op_errno)); + goto unwind; + } + + ret = marker_key_set_ver(this, dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (dict && __has_quota_xattrs(dict)) { + xattrs = dict_copy_with_ref(dict, NULL); + if (!xattrs) { + op_ret = -1; + op_errno = ENOMEM; + } else { + marker_filter_internal_xattrs(this, xattrs); + } + } else if (dict) { + xattrs = dict_ref(dict); + } + + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (!this) - return ret; +unwind: + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xattrs, + postparent); - ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1); + if (op_ret == -1 || local == NULL) + goto out; - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } + /* copy the gfid from the stat structure instead of inode, + * since if the lookup is fresh lookup, then the inode + * would have not yet linked to the inode table which happens + * in protocol/server. + */ + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - return ret; + if (priv->feature_enabled & GF_QUOTA) { + mq_xattr_state(this, &local->loc, dict, buf); + } + +out: + marker_local_unref(local); + if (xattrs) + dict_unref(xattrs); + + return 0; } int32_t -init (xlator_t *this) +marker_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { - dict_t *options = NULL; - data_t *data = NULL; - int32_t ret = 0; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!xattr_req) + goto err; + + ret = marker_key_replace_with_ver(this, xattr_req); + if (ret < 0) + goto err; + + if (priv->feature_enabled == 0) + goto wind; - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "marker translator needs subvolume defined."); - return -1; + local = mem_get0(this->local_pool); + if (local == NULL) + goto err; + + MARKER_INIT_LOCAL(frame, local); + + ret = loc_copy(&local->loc, loc); + if (ret == -1) + goto err; + + if ((priv->feature_enabled & GF_QUOTA)) + mq_req_xattr(this, loc, xattr_req, NULL, NULL); + +wind: + STACK_WIND(frame, marker_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + + dict_unref(xattr_req); + + return 0; +err: + MARKER_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); + + if (xattr_req) + dict_unref(xattr_req); + + return 0; +} + +int +marker_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + quota_inode_ctx_t *ctx = NULL; + int ret = -1; + + if ((op_ret <= 0) || (entries == NULL)) { + goto out; + } + + list_for_each_entry(entry, &entries->list, list) + { + if (entry->inode == NULL) + continue; + + ret = marker_key_set_ver(this, entry->dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + break; } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling."); - return -1; + ctx = mq_inode_ctx_new(entry->inode, this); + if (ctx == NULL) + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(entry->inode->gfid)); + } + +out: + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} + +int +marker_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + loc_t loc = { + 0, + }; + int ret = -1; + char *resolvedpath = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret <= 0) + goto unwind; + + priv = this->private; + local = frame->local; + + if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) { + goto unwind; + } + + list_for_each_entry(entry, &entries->list, list) + { + if ((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0) || entry->inode == NULL) + continue; + + loc.parent = inode_ref(local->loc.inode); + loc.inode = inode_ref(entry->inode); + ret = inode_path(loc.parent, entry->d_name, &resolvedpath); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get the " + "path for the entry %s", + entry->d_name); + loc_wipe(&loc); + continue; } - options = this->options; + loc.path = resolvedpath; + resolvedpath = NULL; - ALLOCATE_OR_GOTO (this->private, marker_conf_t, err); + ctx = mq_inode_ctx_new(loc.inode, this); + if (ctx == NULL) + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(loc.inode->gfid)); - priv = this->private; + mq_xattr_state(this, &loc, entry->dict, &entry->d_stat); + loc_wipe(&loc); - if( (data = dict_get (options, VOLUME_UUID)) != NULL) { - priv->volume_uuid = data->data; + ret = marker_key_set_ver(this, entry->dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + } - ret = uuid_parse (priv->volume_uuid, priv->volume_uuid_bin); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "invalid volume uuid %s", priv->volume_uuid); - goto err; - } +unwind: + MARKER_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); - ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s", - MARKER_XATTR_PREFIX, priv->volume_uuid, XTIME); + return 0; +} - if (ret == -1){ - priv->marker_xattr = NULL; +int +marker_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + marker_conf_t *priv = NULL; + loc_t loc = { + 0, + }; + marker_local_t *local = NULL; + int ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "Failed to allocate memory"); - goto err; - } + priv = this->private; - gf_log (this->name, GF_LOG_DEBUG, - "the volume-uuid = %s", priv->volume_uuid); - } else { - priv->volume_uuid = NULL; + dict = dict ? dict_ref(dict) : dict_new(); + if (!dict) + goto unwind; + + ret = marker_key_replace_with_ver(this, dict); + if (ret < 0) + goto unwind; - gf_log (this->name, GF_LOG_ERROR, - "please specify the volume-uuid" - "in the translator options"); + if (dict_get(dict, GET_ANCESTRY_DENTRY_KEY)) { + STACK_WIND(frame, marker_build_ancestry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + } else { + if (priv->feature_enabled & GF_QUOTA) { + local = mem_get0(this->local_pool); - return -1; + MARKER_INIT_LOCAL(frame, local); + + loc.parent = local->loc.inode = inode_ref(fd->inode); + + mq_req_xattr(this, &loc, dict, NULL, NULL); } - if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) { - priv->timestamp_file = data->data; + STACK_WIND(frame, marker_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + } - gf_log (this->name, GF_LOG_DEBUG, - "the timestamp-file is = %s", - priv->timestamp_file); + dict_unref(dict); + return 0; +unwind: + MARKER_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; +} - } else { - priv->timestamp_file = NULL; +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "please specify the timestamp-file" - "in the translator options"); + if (!this) + return ret; - goto err; - } + ret = xlator_mem_acct_init(this, gf_marker_mt_end + 1); - return 0; -err: - fini (this); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + " failed"); + return ret; + } + + return ret; +} + +int32_t +init_xtime_priv(xlator_t *this, dict_t *options) +{ + int32_t ret = -1; + marker_conf_t *priv = NULL; + char *tmp_opt = NULL; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + priv = this->private; + + ret = dict_get_str(options, "volume-uuid", &tmp_opt); + + if (ret) { + priv->volume_uuid = NULL; + tmp_opt = ""; + + gf_log(this->name, GF_LOG_ERROR, + "please specify the volume-uuid" + "in the translator options"); return -1; + } + gf_asprintf(&priv->volume_uuid, "%s", tmp_opt); + + ret = gf_uuid_parse(priv->volume_uuid, priv->volume_uuid_bin); + + if (ret == -1) { + gf_log(this->name, GF_LOG_ERROR, "invalid volume uuid %s", + priv->volume_uuid); + goto out; + } + + ret = gf_asprintf(&(priv->marker_xattr), "%s.%s.%s", MARKER_XATTR_PREFIX, + priv->volume_uuid, XTIME); + + if (ret == -1) { + priv->marker_xattr = NULL; + goto out; + } + + gf_log(this->name, GF_LOG_DEBUG, "volume-uuid = %s", priv->volume_uuid); + + ret = dict_get_str(options, "timestamp-file", &tmp_opt); + if (ret) { + priv->timestamp_file = NULL; + tmp_opt = ""; + + gf_log(this->name, GF_LOG_ERROR, + "please specify the timestamp-file" + "in the translator options"); + + goto out; + } + + ret = gf_asprintf(&priv->timestamp_file, "%s", tmp_opt); + if (ret == -1) { + priv->timestamp_file = NULL; + goto out; + } + + gf_log(this->name, GF_LOG_DEBUG, "the timestamp-file is = %s", + priv->timestamp_file); + + ret = 0; +out: + return ret; } void -fini (xlator_t *this) +marker_xtime_priv_cleanup(xlator_t *this) { - marker_conf_t *priv = NULL; + marker_conf_t *priv = NULL; - priv = (marker_conf_t *) this->private; + GF_VALIDATE_OR_GOTO("marker", this, out); - if (priv == NULL) - goto out; + priv = (marker_conf_t *)this->private; + + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + GF_FREE(priv->volume_uuid); + + GF_FREE(priv->timestamp_file); + + GF_FREE(priv->marker_xattr); +out: + return; +} + +void +marker_priv_cleanup(xlator_t *this) +{ + marker_conf_t *priv = NULL; - if (priv->volume_uuid != NULL) - GF_FREE (priv->volume_uuid); + GF_VALIDATE_OR_GOTO("marker", this, out); - if (priv->timestamp_file != NULL) - GF_FREE (priv->timestamp_file); + priv = (marker_conf_t *)this->private; - if (priv->marker_xattr != NULL) - GF_FREE (priv->marker_xattr); + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + marker_xtime_priv_cleanup(this); + + LOCK_DESTROY(&priv->lock); + + GF_FREE(priv); + + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } - GF_FREE (priv); out: - return ; + return; +} + +int32_t +reconfigure(xlator_t *this, dict_t *options) +{ + int32_t ret = 0; + data_t *data = NULL; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; + int32_t version = 0; + + GF_ASSERT(this); + GF_ASSERT(this->private); + + priv = this->private; + + priv->feature_enabled = 0; + + GF_VALIDATE_OR_GOTO(this->name, options, out); + + data = dict_get(options, "quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_QUOTA; + } + + data = dict_get(options, "inode-quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_INODE_QUOTA; + } + + data = dict_get(options, "quota-version"); + if (data) + ret = gf_string2int32(data->data, &version); + + if (priv->feature_enabled) { + if (version >= 0) + priv->version = version; + else + gf_log(this->name, GF_LOG_ERROR, + "Invalid quota " + "version %d", + priv->version); + } + + data = dict_get(options, "xtime"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) { + marker_xtime_priv_cleanup(this); + + ret = init_xtime_priv(this, options); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to initialize xtime private, " + "xtime updation will fail"); + } else { + priv->feature_enabled |= GF_XTIME; + data = dict_get(options, "gsync-force-xtime"); + if (!data) + goto out; + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + } +out: + return ret; +} + +int32_t +init(xlator_t *this) +{ + dict_t *options = NULL; + data_t *data = NULL; + int32_t ret = 0; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; + + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "marker translator needs subvolume defined."); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "Volume is dangling."); + return -1; + } + + options = this->options; + + ALLOCATE_OR_GOTO(this->private, marker_conf_t, err); + + priv = this->private; + + priv->feature_enabled = 0; + priv->version = 0; + + LOCK_INIT(&priv->lock); + + data = dict_get(options, "quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_QUOTA; + } + + data = dict_get(options, "inode-quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_INODE_QUOTA; + } + + data = dict_get(options, "quota-version"); + if (data) + ret = gf_string2int32(data->data, &priv->version); + + if ((ret == 0) && priv->feature_enabled && priv->version < 0) { + gf_log(this->name, GF_LOG_ERROR, "Invalid quota version %d", + priv->version); + goto err; + } + + data = dict_get(options, "xtime"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) { + ret = init_xtime_priv(this, options); + if (ret < 0) + goto err; + + priv->feature_enabled |= GF_XTIME; + data = dict_get(options, "gsync-force-xtime"); + if (!data) + goto cont; + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + +cont: + this->local_pool = mem_pool_new(marker_local_t, 128); + if (!this->local_pool) { + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto err; + } + + return 0; +err: + marker_priv_cleanup(this); + + return -1; +} + +int32_t +marker_forget(xlator_t *this, inode_t *inode) +{ + marker_inode_ctx_t *ctx = NULL; + uint64_t value = 0; + + if (inode_ctx_del(inode, this, &value) != 0) + goto out; + + ctx = (marker_inode_ctx_t *)(unsigned long)value; + if (ctx == NULL) { + goto out; + } + + mq_forget(this, ctx->quota_ctx); + + GF_FREE(ctx); +out: + return 0; +} + +void +fini(xlator_t *this) +{ + marker_priv_cleanup(this); } struct xlator_fops fops = { - .create = marker_create, - .unlink = marker_unlink, - .link = marker_link, - .mkdir = marker_mkdir, - .rmdir = marker_rmdir, - .writev = marker_writev, - .rename = marker_rename, - .truncate = marker_truncate, - .ftruncate = marker_ftruncate, - .symlink = marker_symlink, - .mknod = marker_mknod, - .setxattr = marker_setxattr, - .fsetxattr = marker_fsetxattr, - .setattr = marker_setattr, - .fsetattr = marker_fsetattr, - .removexattr = marker_removexattr, - .getxattr = marker_getxattr + .lookup = marker_lookup, + .create = marker_create, + .mkdir = marker_mkdir, + .writev = marker_writev, + .truncate = marker_truncate, + .ftruncate = marker_ftruncate, + .symlink = marker_symlink, + .link = marker_link, + .unlink = marker_unlink, + .rmdir = marker_rmdir, + .rename = marker_rename, + .mknod = marker_mknod, + .setxattr = marker_setxattr, + .fsetxattr = marker_fsetxattr, + .setattr = marker_setattr, + .fsetattr = marker_fsetattr, + .removexattr = marker_removexattr, + .getxattr = marker_getxattr, + .readdirp = marker_readdirp, + .fallocate = marker_fallocate, + .discard = marker_discard, + .zerofill = marker_zerofill, }; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {.forget = marker_forget}; struct volume_options options[] = { - {.key = {"volume-uuid"}}, - {.key = {"timestamp-file"}}, - {.key = {NULL}} + {.key = {"volume-uuid"}, .default_value = "{{ volume.id }}"}, + {.key = {"timestamp-file"}}, + { + .key = {"quota"}, + .op_version = {1}, + .flags = OPT_FLAG_NONE, + .tags = {}, + }, + { + .key = {"inode-quota"}, + .op_version = {1}, + .flags = OPT_FLAG_NONE, + .tags = {}, + }, + { + .key = {"xtime"}, + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {}, + }, + { + .key = {"gsync-force-xtime"}, + .op_version = {2}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {}, + }, + { + .key = {"quota-version"}, + .flags = OPT_FLAG_NONE, + }, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "marker", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h index eeb64a16a87..4821094c14b 100644 --- a/xlators/features/marker/src/marker.h +++ b/xlators/features/marker/src/marker.h @@ -1,68 +1,148 @@ -/*Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _MARKER_H +#define _MARKER_H -#include "xlator.h" -#include "defaults.h" -#include "uuid.h" +#include "marker-quota.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat-uuid.h> +#include <glusterfs/call-stub.h> #define MARKER_XATTR_PREFIX "trusted.glusterfs" -#define XTIME "xtime" -#define VOLUME_MARK "volume-mark" -#define VOLUME_UUID "volume-uuid" -#define TIMESTAMP_FILE "timestamp-file" +#define XTIME "xtime" +#define VOLUME_MARK "volume-mark" +#define VOLUME_UUID "volume-uuid" +#define TIMESTAMP_FILE "timestamp-file" + +enum { + GF_QUOTA = 1, + GF_XTIME = 2, + GF_XTIME_GSYNC_FORCE = 4, + GF_INODE_QUOTA = 8, +}; /*initialize the local variable*/ -#define MARKER_INIT_LOCAL(_frame,_local) do { \ - _frame->local = _local; \ - _local->pid = _frame->root->pid; \ - memset (&_local->loc, 0, sizeof (loc_t)); \ - _local->oplocal = NULL; \ - } while (0) +#define MARKER_INIT_LOCAL(_frame, _local) \ + do { \ + _frame->local = _local; \ + _local->pid = _frame->root->pid; \ + memset(&_local->loc, 0, sizeof(loc_t)); \ + _local->ref = 1; \ + _local->uid = -1; \ + _local->gid = -1; \ + LOCK_INIT(&_local->lock); \ + _local->oplocal = NULL; \ + } while (0) /* try alloc and if it fails, goto label */ -#define ALLOCATE_OR_GOTO(var, type, label) do { \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "out of memory :("); \ - goto label; \ - } \ - } while (0) - -struct marker_local{ - uint32_t timebuf[2]; - pid_t pid; - loc_t loc; - - struct marker_local *oplocal; +#define ALLOCATE_OR_GOTO(var, type, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + gf_log(this->name, GF_LOG_ERROR, "out of memory :("); \ + goto label; \ + } \ + } while (0) + +#define _MARKER_SET_UID_GID(dest, src) \ + do { \ + if (src->uid != -1 && src->gid != -1) { \ + dest->uid = src->uid; \ + dest->gid = src->gid; \ + } \ + } while (0) + +#define MARKER_SET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID(dest, src); \ + frame->root->uid = 0; \ + frame->root->gid = 0; \ + frame->cookie = (void *)_GF_UID_GID_CHANGED; \ + } while (0) + +#define MARKER_RESET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID(dest, src); \ + frame->cookie = NULL; \ + } while (0) + +#define MARKER_STACK_UNWIND(fop, frame, params...) \ + do { \ + quota_local_t *_local = NULL; \ + if (frame) { \ + _local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (_local) \ + marker_local_unref(_local); \ + } while (0) + +struct marker_local { + uint32_t timebuf[2]; + pid_t pid; + loc_t loc; + loc_t parent_loc; + uid_t uid; + gid_t gid; + int32_t ref; + uint32_t ia_nlink; + struct iatt buf; + gf_lock_t lock; + mode_t mode; + int32_t err; + call_stub_t *stub; + call_frame_t *lk_frame; + quota_meta_t contribution; + struct marker_local *oplocal; + + /* marker quota specific */ + int64_t delta; + int64_t d_off; + int64_t sum; + int64_t size; + int32_t hl_count; + int32_t dentry_child_count; + + fd_t *fd; + call_frame_t *frame; + + quota_inode_ctx_t *ctx; + inode_contribution_t *contri; + + int xflag; + dict_t *xdata; + gf_boolean_t skip_txn; }; typedef struct marker_local marker_local_t; -struct marker_conf{ - char *volume_uuid; - uuid_t volume_uuid_bin; - char *timestamp_file; - char *marker_xattr; +#define quota_local_t marker_local_t + +struct marker_inode_ctx { + struct quota_inode_ctx *quota_ctx; +}; +typedef struct marker_inode_ctx marker_inode_ctx_t; + +struct marker_conf { + char feature_enabled; + char *size_key; + char *dirty_key; + char *volume_uuid; + uuid_t volume_uuid_bin; + char *timestamp_file; + char *marker_xattr; + uint64_t quota_lk_owner; + gf_lock_t lock; + int32_t version; }; typedef struct marker_conf marker_conf_t; + +#endif diff --git a/xlators/features/marker/utils/Makefile.am b/xlators/features/marker/utils/Makefile.am deleted file mode 100644 index 8aefea4011b..00000000000 --- a/xlators/features/marker/utils/Makefile.am +++ /dev/null @@ -1,7 +0,0 @@ -SUBDIRS = syncdaemon - -gsyncddir = $(libexecdir) - -gsyncd_SCRIPTS = gsyncd - -CLEANFILES = diff --git a/xlators/features/marker/utils/gsyncd.in b/xlators/features/marker/utils/gsyncd.in deleted file mode 100755 index 9bbf8041f36..00000000000 --- a/xlators/features/marker/utils/gsyncd.in +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -prefix="@prefix@" -exec_prefix="@exec_prefix@" -libexecdir=`eval echo "@libexecdir@"` - -PYTHONPATH="$libexecdir"/python exec @PYTHON@ -c "from syncdaemon import gsyncd; gsyncd.main()" -c @sysconfdir@/glusterfs/gsyncd.conf "$@" diff --git a/xlators/features/marker/utils/syncdaemon/Makefile.am b/xlators/features/marker/utils/syncdaemon/Makefile.am deleted file mode 100644 index fc1b42e7f3f..00000000000 --- a/xlators/features/marker/utils/syncdaemon/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -syncdaemondir = $(libexecdir)/python/syncdaemon - -syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py resource.py configinterface.py - -CLEANFILES = diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/xlators/features/marker/utils/syncdaemon/README.md deleted file mode 100644 index d45006932d1..00000000000 --- a/xlators/features/marker/utils/syncdaemon/README.md +++ /dev/null @@ -1,81 +0,0 @@ -gsycnd, the Gluster Syncdaemon -============================== - -REQUIREMENTS ------------- - -_gsyncd_ is a program which can operate either in _master_ or in _slave_ mode. -Requirements are categorized according to this. - -* supported OS is GNU/Linux -* Python >= 2.5, or 2.4 with Ctypes (see below) (both) -* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave) -* rsync (both) -* glusterfs with marker support (master); glusterfs (optional on slave) -* FUSE; for supported versions consult glusterfs - -INSTALLATION ------------- - -As of now, the supported way of operation is running from the source directory. - -If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/). - -CONFIGURATION -------------- - -gsyncd tunables are a subset of the long command-line options; for listing them, -type - - gsyncd.py --help - -and see the long options up to "--config-file". (The leading double dash should be omitted; -interim underscores and dashes are interchangeable.) The set of options bear some resemblance -to those of glusterfs and rsync. - -The config file format matches the following syntax: - - <option1>: <value1> - <option2>: <value2> - # comment - -By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_ -in the source tree. - -USAGE ------ - -gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally. -Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors -for it with gysncd: - -1. _/data/mirror_ -2. local gluster volume _yow_ -3. _/data/far_mirror_ at example.com -4. gluster volume _moz_ at example.com - -The respective gsyncd invocations are (demoing some syntax sugaring): - -1. - - gsyncd.py gluster://localhost:pop file:///data/mirror - - or short form - - gsyncd.py :pop /data/mirror - -2. `gsyncd :pop :yow` -3. - - gsyncd.py :pop ssh://example.com:/data/far_mirror - - or short form - - gsyncd.py :pop example.com:/data/far_mirror - -4. `gsyncd.py :pop example.com::moz` - -gsyncd has to be available on both sides; it's location on the remote side has to be specified -via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be -used for setting options on the remote side, although the suggested mode of operation is to -set parameters like log file / pid file in the configuration file.) diff --git a/xlators/features/marker/utils/syncdaemon/__init__.py b/xlators/features/marker/utils/syncdaemon/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/xlators/features/marker/utils/syncdaemon/__init__.py +++ /dev/null diff --git a/xlators/features/marker/utils/syncdaemon/configinterface.py b/xlators/features/marker/utils/syncdaemon/configinterface.py deleted file mode 100644 index 25a2a526818..00000000000 --- a/xlators/features/marker/utils/syncdaemon/configinterface.py +++ /dev/null @@ -1,59 +0,0 @@ -try: - import ConfigParser -except ImportError: - # py 3 - import configparser as ConfigParser - - -DEF_SECT = 'global' - -class GConffile(object): - - def __init__(self, path, peers): - if peers: - self.section = 'peers ' + ' '.join(peers) - else: - self.section = DEF_SECT - self.path = path - self.config = ConfigParser.RawConfigParser() - self.config.read(path) - - def update_to(self, dct): - for sect in set([DEF_SECT, self.section]): - if self.config.has_section(sect): - for k, v in self.config._sections[sect].items(): - if k == '__name__': - continue - k = k.replace('-', '_') - dct[k] = v - - def get(self, opt=None): - d = {} - self.update_to(d) - if opt: - d = {opt: d.get(opt, "")} - for k, v in d.iteritems(): - if k == '__name__': - continue - print("%s: %s" % (k, v)) - - def write(self): - f = None - try: - f = open(self.path, 'wb') - self.config.write(f) - finally: - if f: - f.close() - - def set(self, opt, val): - if not self.config.has_section(self.section): - self.config.add_section(self.section) - self.config.set(self.section, opt, val) - self.write() - - def delete(self, opt): - if not self.config.has_section(self.section): - return - if self.config.remove_option(self.section, opt): - self.write() diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py deleted file mode 100644 index cec5be0789b..00000000000 --- a/xlators/features/marker/utils/syncdaemon/gconf.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - -class GConf(object): - ssh_ctl_dir = None - ssh_ctl_args = None - cpid = None - permanent_handles = [] - - @classmethod - def setup_ssh_ctl(cls, ctld): - cls.ssh_ctl_dir = ctld - cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")] - -gconf = GConf() diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py deleted file mode 100644 index b8b92056b54..00000000000 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python - -import os -import os.path -import sys -import time -import logging -import signal -import select -import shutil -import optparse -import fcntl -from optparse import OptionParser, SUPPRESS_HELP -from logging import Logger -from errno import EEXIST, ENOENT, EACCES, EAGAIN - -from gconf import gconf -from configinterface import GConffile -import resource - -class GLogger(Logger): - - def makeRecord(self, name, level, *a): - rv = Logger.makeRecord(self, name, level, *a) - rv.nsecs = (rv.created - int(rv.created)) * 1000000 - fr = sys._getframe(4) - callee = fr.f_locals.get('self') - if callee: - ctx = str(type(callee)).split("'")[1].split('.')[-1] - else: - ctx = '<top>' - if not hasattr(rv, 'funcName'): - rv.funcName = fr.f_code.co_name - rv.lvlnam = logging.getLevelName(level)[0] - rv.ctx = ctx - return rv - - @classmethod - def setup(cls, **kw): - if kw.get('slave'): - sls = "(slave)" - else: - sls = "" - lprm = {'datefmt': "%Y-%m-%d %H:%M:%S", - 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + sls + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"} - lprm.update(kw) - lvl = kw.get('level', logging.INFO) - if isinstance(lvl, str): - lvl = logging.getLevelName(lvl) - lprm['level'] = lvl - logging.root = cls("root", lvl) - logging.setLoggerClass(cls) - logging.getLogger().handlers = [] - logging.basicConfig(**lprm) - - -def grabfile(fname, content=None): - # damn those messy open() mode codes - fd = os.open(fname, os.O_CREAT|os.O_RDWR) - f = os.fdopen(fd, 'r+b', 0) - try: - fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB) - except: - ex = sys.exc_info()[1] - f.close() - if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): - # cannot grab, it's taken - return - raise - if content: - try: - f.truncate() - f.write(content) - except: - f.close() - raise - gconf.permanent_handles.append(f) - return f - -def grabpidfile(fname=None, setpid=True): - if not fname: - fname = gconf.pid_file - content = None - if setpid: - content = str(os.getpid()) + '\n' - return grabfile(fname, content=content) - -def startup(**kw): - if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn': - if not grabpidfile(): - sys.stderr.write("pidfile is taken, exiting.\n") - exit(2) - - if kw.get('go_daemon') == 'should': - x, y = os.pipe() - gconf.cpid = os.fork() - if gconf.cpid: - os.close(x) - sys.exit() - os.close(y) - os.setsid() - dn = os.open(os.devnull, os.O_RDWR) - for f in (sys.stdin, sys.stdout, sys.stderr): - os.dup2(dn, f.fileno()) - if getattr(gconf, 'pid_file', None): - if not grabpidfile(gconf.pid_file + '.tmp'): - raise RuntimeError("cannot grap temporary pidfile") - os.rename(gconf.pid_file + '.tmp', gconf.pid_file) - # wait for parent to terminate - # so we can start up with - # no messing from the dirty - # ol' bustard - select.select((x,), (), ()) - os.close(x) - - lkw = {'level': gconf.log_level} - if kw.get('log_file'): - lkw['filename'] = kw['log_file'] - GLogger.setup(slave=kw.get('slave'), **lkw) - -def finalize(*a): - if getattr(gconf, 'pid_file', None): - rm_pidf = True - if gconf.cpid: - # exit path from parent branch of daemonization - rm_pidf = False - while True: - f = grabpidfile(setpid=False) - if not f: - # child has already taken over pidfile - break - if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid: - # child has terminated - rm_pidf = True - break; - time.sleep(0.1) - if rm_pidf: - try: - os.unlink(gconf.pid_file) - except: - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - pass - else: - raise - if gconf.ssh_ctl_dir and not gconf.cpid: - shutil.rmtree(gconf.ssh_ctl_dir) - -def main(): - # ??? "finally" clause does not take effect with SIGTERM... - # but this handler neither does - # signal.signal(signal.SIGTERM, finalize) - GLogger.setup() - try: - try: - main_i() - except: - exc = sys.exc_info()[0] - if exc != SystemExit: - logging.exception("FAIL: ") - sys.stderr.write("failed with %s.\n" % exc.__name__) - sys.exit(1) - finally: - finalize() - # force exit in non-main thread too - os._exit(1) - -def main_i(): - rconf = {'go_daemon': 'should'} - - def store_abs(opt, optstr, val, parser): - setattr(parser.values, opt.dest, os.path.abspath(val)) - def store_local(opt, optstr, val, parser): - rconf[opt.dest] = val - def store_local_curry(val): - return lambda o, oo, vx, p: store_local(o, oo, val, p) - - op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1") - op.add_option('--gluster-command', metavar='CMD', default='glusterfs') - op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs) - op.add_option('--gluster-log-level', metavar='LVL') - op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs) - op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs) - op.add_option('-L', '--log-level', metavar='LVL') - op.add_option('-r', '--remote-gsyncd', metavar='CMD', default='/usr/libexec/gsyncd') - op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh') - op.add_option('--rsync-command', metavar='CMD', default='rsync') - op.add_option('--rsync-extra', metavar='ARGS', default='-sS', help=SUPPRESS_HELP) - op.add_option('--timeout', metavar='SEC', type=int, default=120) - op.add_option('--sync-jobs', metavar='N', type=int, default=3) - op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP) - - op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local) - # duh. need to specify dest or value will be mapped to None :S - op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True)) - op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont')) - op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a), - a[-1].values.__dict__.get('log_level') or \ - a[-1].values.__dict__.update(log_level='DEBUG'))) - op.add_option('--config-get', metavar='OPT', type=str, dest='config', action='callback', callback=store_local) - op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_curry(True)) - op.add_option('--config-set', metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback', callback=store_local) - op.add_option('--config-del', metavar='OPT', type=str, dest='config', action='callback', callback=lambda o, oo, vx, p: - store_local(o, oo, (vx, False), p)) - - # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults - # -- for this to work out we need to tell apart defaults from explicitly set - # options... so churn out the defaults here and call the parser with virgin - # values container. - defaults = op.get_default_values() - opts, args = op.parse_args(values=optparse.Values()) - if not (len(args) == 2 or (len(args) == 1 and rconf.get('listen')) or (len(args) <= 2 and rconf.get('config'))): - sys.stderr.write("error: incorrect number of arguments\n\n") - sys.stderr.write(op.get_usage() + "\n") - sys.exit(1) - - local = remote = None - if args: - local = resource.parse_url(args[0]) - if len(args) > 1: - remote = resource.parse_url(args[1]) - if not local.can_connect_to(remote): - raise RuntimeError("%s cannot work with %s" % (local.path, remote and remote.path)) - pa = ([], []) - canon = [False, True] - for x in (local, remote): - if x: - for i in range(2): - pa[i].append(x.get_url(canonical=canon[i])) - peers, canon_peers = pa - if not 'config_file' in rconf: - rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd.conf") - confp = os.path.dirname(sys.argv[0]) + "conf/" - try: - st = os.lstat (confp) - except OSError: - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - os.mkdir(confp) - else: - raise - gcnf = GConffile(rconf['config_file'], canon_peers) - - confdata = rconf.get('config') - if confdata: - if isinstance(confdata, tuple): - if confdata[1]: - gcnf.set(*confdata) - else: - gcnf.delete(confdata[0]) - else: - if confdata == True: - confdata = None - gcnf.get(confdata) - return - - gconf.__dict__.update(defaults.__dict__) - gcnf.update_to(gconf.__dict__) - gconf.__dict__.update(opts.__dict__) - - go_daemon = rconf['go_daemon'] - - if isinstance(remote, resource.SSH) and go_daemon == 'should': - go_daemon = 'postconn' - log_file = None - else: - log_file = gconf.log_file - startup(go_daemon=go_daemon, log_file=log_file, slave=(not remote)) - - logging.info("syncing: %s" % " -> ".join(peers)) - if remote: - go_daemon = remote.connect_remote(go_daemon=go_daemon) - if go_daemon: - startup(go_daemon=go_daemon, log_file=gconf.log_file) - # complete remote connection in child - remote.connect_remote(go_daemon='done') - local.connect() - local.service_loop(*[r for r in [remote] if r]) - - logging.info("exiting.") - - -if __name__ == "__main__": - main() diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py deleted file mode 100644 index 1abd0325268..00000000000 --- a/xlators/features/marker/utils/syncdaemon/master.py +++ /dev/null @@ -1,246 +0,0 @@ -import os -import sys -import time -import stat -import signal -import logging -import errno -from errno import ENOENT, ENODATA -from threading import Thread, currentThread, Condition, Lock - -from gconf import gconf - -URXTIME = (-1, 0) - -class GMaster(object): - - def get_volinfo(self): - self.volume_info = self.master.server.volume_info() - if self.volume_info['retval']: - raise RuntimeError("master is corrupt") - return self.volume_info - - @property - def uuid(self): - if not getattr(self, '_uuid', None): - self._uuid = self.volume_info['uuid'] - return self._uuid - - @property - def volmark(self): - return self.volume_info['volume_mark'] - - def xtime(self, path, *a, **opts): - if a: - rsc = a[0] - else: - rsc = self.master - if not 'create' in opts: - opts['create'] = rsc == self.master - xt = rsc.server.xtime(path, self.uuid) - if (isinstance(xt, int) or xt < self.volmark) and opts['create']: - t = time.time() - sec = int(t) - nsec = int((t - sec) * 1000000) - xt = (sec, nsec) - rsc.server.set_xtime(path, self.uuid, xt) - if xt == ENODATA: - xt = URXTIME - return xt - - def __init__(self, master, slave): - self.master = master - self.slave = slave - self.get_volinfo() - self.jobtab = {} - self.syncer = Syncer(slave) - self.total_turns = int(gconf.turns) - self.turns = 0 - self.start = None - self.change_seen = None - logging.info('master started on ' + self.uuid) - while True: - self.crawl() - - def add_job(self, path, label, job, *a, **kw): - if self.jobtab.get(path) == None: - self.jobtab[path] = [] - self.jobtab[path].append((label, a, lambda : job(*a, **kw))) - - def wait(self, path, *args): - jobs = self.jobtab.pop(path, []) - succeed = True - for j in jobs: - ret = j[-1]() - if not ret: - succeed = False - if succeed: - self.sendmark(path, *args) - return succeed - - def sendmark(self, path, mark, adct=None): - if adct: - self.slave.server.setattr(path, adct) - self.slave.server.set_xtime(path, self.uuid, mark) - - def crawl(self, path='.', xtl=None): - if path == '.': - if self.start: - logging.info("crawl took %.6f" % (time.time() - self.start)) - time.sleep(1) - self.start = time.time() - logging.info("crawling...") - self.get_volinfo() - if self.volume_info['uuid'] != self.uuid: - raise RuntimeError("master uuid mismatch") - logging.debug("entering " + path) - if not xtl: - xtl = self.xtime(path) - xtr0 = self.xtime(path, self.slave) - if isinstance(xtr0, int): - xtr = URXTIME - else: - xtr = xtr0 - if xtr0 == ENOENT: - self.slave.server.mkdir(path) - else: - if xtr > xtl: - raise RuntimeError("timestamp corruption for " + path) - if xtl == xtr: - if path == '.' and self.total_turns and self.change_seen: - self.turns += 1 - self.change_seen = False - logging.info("finished turn #%s/%s" % (self.turns, self.total_turns)) - if self.turns == self.total_turns: - logging.info("reached turn limit, terminating.") - os.kill(os.getpid(), signal.SIGTERM) - return - if path == '.': - self.change_seen = True - dem, des = ( x.server.entries(path) for x in (self.master, self.slave) ) - dd = set(des) - set(dem) - if dd: - self.slave.server.purge(path, dd) - chld = [] - for e in dem: - e = os.path.join(path, e) - xte = self.xtime(e) - if isinstance(xte, int): - logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte])) - elif xte > xtr: - chld.append((e, xte)) - def indulgently(e, fnc, blame=None): - if not blame: - blame = path - try: - return fnc(e) - except (IOError, OSError): - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - logging.warn("salvaged ENOENT for" + e) - self.add_job(blame, 'salvage', lambda: False) - return False - else: - raise - for e, xte in chld: - st = indulgently(e, lambda e: os.lstat(e)) - if st == False: - continue - mo = st.st_mode - adct = {'own': (st.st_uid, st.st_gid)} - if stat.S_ISLNK(mo): - if indulgently(e, lambda e: self.slave.server.symlink(os.readlink(e), e)) == False: - continue - self.sendmark(e, xte, adct) - elif stat.S_ISREG(mo): - logging.debug("syncing %s ..." % e) - pb = self.syncer.add(e) - def regjob(e, xte, pb): - if pb.wait(): - logging.debug("synced " + e) - self.sendmark(e, xte) - return True - else: - logging.error("failed to sync " + e) - self.add_job(path, 'reg', regjob, e, xte, pb) - elif stat.S_ISDIR(mo): - adct['mode'] = mo - if indulgently(e, lambda e: (self.add_job(path, 'cwait', self.wait, e, xte, adct), - self.crawl(e, xte), - True)[-1], blame=e) == False: - continue - else: - # ignore fifos, sockets and special files - pass - if path == '.': - self.wait(path, xtl) - -class BoxClosedErr(Exception): - pass - -class PostBox(list): - - def __init__(self, *a): - list.__init__(self, *a) - self.lever = Condition() - self.open = True - self.done = False - - def wait(self): - self.lever.acquire() - if not self.done: - self.lever.wait() - self.lever.release() - return self.result - - def wakeup(self, data): - self.result = data - self.lever.acquire() - self.done = True - self.lever.notifyAll() - self.lever.release() - - def append(self, e): - self.lever.acquire() - if not self.open: - raise BoxClosedErr - list.append(self, e) - self.lever.release() - - def close(self): - self.lever.acquire() - self.open = False - self.lever.release() - -class Syncer(object): - - def __init__(self, slave): - self.slave = slave - self.lock = Lock() - self.pb = PostBox() - for i in range(int(gconf.sync_jobs)): - t = Thread(target=self.syncjob) - t.setDaemon = True - t.start() - - def syncjob(self): - while True: - pb = None - while True: - self.lock.acquire() - if self.pb: - pb, self.pb = self.pb, PostBox() - self.lock.release() - if pb: - break - time.sleep(0.5) - pb.close() - pb.wakeup(self.slave.rsync(pb)) - - def add(self, e): - while True: - try: - self.pb.append(e) - return self.pb - except BoxClosedErr: - pass diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/xlators/features/marker/utils/syncdaemon/repce.py deleted file mode 100644 index 1b8d0203cff..00000000000 --- a/xlators/features/marker/utils/syncdaemon/repce.py +++ /dev/null @@ -1,162 +0,0 @@ -import os -import sys -import select -import time -import logging -from threading import Thread, Condition -try: - import thread -except ImportError: - # py 3 - import _thread as thread -try: - from Queue import Queue -except ImportError: - # py 3 - from queue import Queue -try: - import cPickle as pickle -except ImportError: - # py 3 - import pickle - -pickle_proto = -1 -repce_version = 1.0 - -def ioparse(i, o): - if isinstance(i, int): - i = os.fdopen(i) - # rely on duck typing for recognizing - # streams as that works uniformly - # in py2 and py3 - if hasattr(o, 'fileno'): - o = o.fileno() - return (i, o) - -def send(out, *args): - os.write(out, pickle.dumps(args, pickle_proto)) - -def recv(inf): - return pickle.load(inf) - - -class RepceServer(object): - - def __init__(self, obj, i, o, wnum=6): - self.obj = obj - self.inf, self.out = ioparse(i, o) - self.wnum = wnum - self.q = Queue() - - def service_loop(self): - for i in range(self.wnum): - t = Thread(target=self.worker) - t.setDaemon(True) - t.start() - try: - while True: - self.q.put(recv(self.inf)) - except EOFError: - logging.info("terminating on reaching EOF.") - - def worker(self): - while True: - in_data = self.q.get(True) - rid = in_data[0] - rmeth = in_data[1] - exc = False - if rmeth == '__repce_version__': - res = repce_version - else: - try: - res = getattr(self.obj, rmeth)(*in_data[2:]) - except: - res = sys.exc_info()[1] - exc = True - logging.exception("call failed: ") - send(self.out, rid, exc, res) - - -class RepceJob(object): - - def __init__(self, cbk): - self.rid = (os.getpid(), thread.get_ident(), time.time()) - self.cbk = cbk - self.lever = Condition() - self.done = False - - def __repr__(self): - return ':'.join([str(x) for x in self.rid]) - - def wait(self): - self.lever.acquire() - if not self.done: - self.lever.wait() - self.lever.release() - return self.result - - def wakeup(self, data): - self.result = data - self.lever.acquire() - self.done = True - self.lever.notify() - self.lever.release() - - -class RepceClient(object): - - def __init__(self, i, o): - self.inf, self.out = ioparse(i, o) - self.jtab = {} - t = Thread(target = self.listen) - t.setDaemon(True) - t.start() - - def listen(self): - while True: - select.select((self.inf,), (), ()) - rid, exc, res = recv(self.inf) - rjob = self.jtab.pop(rid) - if rjob.cbk: - rjob.cbk(rjob, [exc, res]) - - def push(self, meth, *args, **kw): - cbk = kw.get('cbk') - if not cbk: - def cbk(rj, res): - if res[0]: - raise res[1] - rjob = RepceJob(cbk) - self.jtab[rjob.rid] = rjob - logging.debug("call %s %s%s ..." % (repr(rjob), meth, repr(args))) - send(self.out, rjob.rid, meth, *args) - return rjob - - def __call__(self, meth, *args): - rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)}) - exc, res = rjob.wait() - if exc: - logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__))) - raise res - logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res))) - return res - - class mprx(object): - - def __init__(self, ins, meth): - self.ins = ins - self.meth = meth - - def __call__(self, *a): - return self.ins(self.meth, *a) - - def __getattr__(self, meth): - return self.mprx(self, meth) - - def __version__(self): - d = {'proto': self('__repce_version__')} - try: - d['object'] = self('version') - except AttributeError: - pass - return d diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py deleted file mode 100644 index efd1360758f..00000000000 --- a/xlators/features/marker/utils/syncdaemon/resource.py +++ /dev/null @@ -1,441 +0,0 @@ -import re -import os -import sys -import stat -import time -import errno -import struct -import select -import socket -import logging -import tempfile -import threading -from ctypes import * -from ctypes.util import find_library -from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP - -from gconf import gconf -import repce -from repce import RepceServer, RepceClient -from master import GMaster - -UrlRX = re.compile('\A(\w+)://(.*)') -HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I) -UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+") - -def sup(x, *a, **kw): - return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw) - -def desugar(ustr): - m = re.match('([^:]*):(.*)', ustr) - if m: - if not m.groups()[0]: - return "gluster://localhost" + ustr - elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]): - return "ssh://" + ustr - else: - return "gluster://#{str}" - else: - return "file://" + os.path.abspath(ustr) - -def parse_url(ustr): - m = UrlRX.match(ustr) - if not m: - ustr = desugar(ustr) - m = UrlRX.match(ustr) - if not m: - raise RuntimeError("malformed url") - sch, path = m.groups() - this = sys.modules[__name__] - if not hasattr(this, sch.upper()): - raise RuntimeError("unknown url scheme " + sch) - return getattr(this, sch.upper())(path) - - -class Xattr(object): - - libc = CDLL(find_library("libc")) - - @classmethod - def geterrno(cls): - return c_int.in_dll(cls.libc, 'errno').value - - @classmethod - def raise_oserr(cls): - errn = cls.geterrno() - raise OSError(errn, os.strerror(errn)) - - @classmethod - def lgetxattr(cls, path, attr, siz=0): - if siz: - buf = create_string_buffer('\0' * siz) - else: - buf = None - ret = cls.libc.lgetxattr(path, attr, buf, siz) - if ret == -1: - cls.raise_oserr() - if siz: - return buf.raw[:ret] - else: - return ret - - @classmethod - def lsetxattr(cls, path, attr, val): - ret = cls.libc.lsetxattr(path, attr, val, len(val), 0) - if ret == -1: - cls.raise_oserr() - - -class Server(object): - - GX_NSPACE = "trusted.glusterfs" - - @staticmethod - def entries(path): - try: - return os.listdir(path) - except OSError: - ex = sys.exc_info()[1] - if ex.errno == ENOTDIR: - return [] - else: - raise - - @classmethod - def purge(cls, path, entries=None): - me_also = entries == None - if not entries: - try: - entries = os.listdir(path) - except OSError: - ex = sys.exc_info()[1] - if ex.errno in (ENOTDIR, ENOENT, ELOOP): - try: - os.unlink(path) - return - except OSError: - ex = sys.exc_info()[1] - if ex.errno != ENOENT: - raise - else: - raise - for e in entries: - cls.purge(os.path.join(path, e)) - if me_also: - try: - os.rmdir(path) - except OSError: - ex = sys.exc_info()[1] - if ex.errno == ENOTDIR: - try: - os.unlink(path) - return - except OSError: - ex = sys.exc_info()[1] - if ex.errno != ENOENT: - raise - elif ex.errno == ENOENT: - logging.debug ("Trying to delete a file which is not present") - else: - raise - - @classmethod - def _create(cls, path, ctor): - try: - ctor(path) - except OSError: - ex = sys.exc_info()[1] - if ex.errno == EEXIST: - cls.purge(path) - return ctor(path) - raise - - @classmethod - def mkdir(cls, path): - cls._create(path, os.mkdir) - - @classmethod - def symlink(cls, lnk, path): - cls._create(path, lambda p: os.symlink(lnk, p)) - - @classmethod - def xtime(cls, path, uuid): - try: - return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8)) - except OSError: - ex = sys.exc_info()[1] - if ex.errno in (ENOENT, ENODATA): - return ex.errno - else: - raise - - @classmethod - def set_xtime(cls, path, uuid, mark): - try: - Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark)) - except OSError: - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - logging.error ("File for which the setxattr to be done is not present") - - @staticmethod - def setattr(path, adct): - own = adct.get('own') - if own: - os.lchown(path, *own) - mode = adct.get('mode') - if mode: - os.chmod(path, stat.S_IMODE(mode)) - times = adct.get('times') - if times: - os.utime(path, times) - - @staticmethod - def pid(): - return os.getpid() - - lastping = 0 - @classmethod - def ping(cls): - cls.lastping += 1 - return cls.lastping - - @staticmethod - def version(): - return 1.0 - - -class SlaveLocal(object): - - def can_connect_to(self, remote): - return not remote - - def service_loop(self): - repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs)) - t = threading.Thread(target=repce.service_loop) - t.setDaemon(True) - t.start() - logging.info("slave listening") - if gconf.timeout and int(gconf.timeout) > 0: - while True: - lp = self.server.lastping - time.sleep(int(gconf.timeout)) - if lp == self.server.lastping: - logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout)) - break - else: - select.select((), (), ()) - -class SlaveRemote(object): - - def connect_remote(self, rargs=[], **opts): - slave = opts.get('slave', self.url) - ix, ox = os.pipe() - iy, oy = os.pipe() - pid = os.fork() - if not pid: - os.close(ox) - os.dup2(ix, sys.stdin.fileno()) - os.close(iy) - os.dup2(oy, sys.stdout.fileno()) - argv = rargs + gconf.remote_gsyncd.split() + ['-N', '--listen', '--timeout', str(gconf.timeout), slave] - os.execvp(argv[0], argv) - os.close(ix) - os.close(oy) - return self.start_fd_client(iy, ox, **opts) - - def start_fd_client(self, i, o, **opts): - self.server = RepceClient(i, o) - rv = self.server.__version__() - exrv = {'proto': repce.repce_version, 'object': Server.version()} - da0 = (rv, exrv) - da1 = ({}, {}) - for i in range(2): - for k, v in da0[i].iteritems(): - da1[i][k] = int(v) - if da1[0] != da1[1]: - raise RuntimeError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv)) - if gconf.timeout and int(gconf.timeout) > 0: - def pinger(): - while True: - self.server.ping() - time.sleep(int(gconf.timeout) * 0.5) - t = threading.Thread(target=pinger) - t.setDaemon(True) - t.start() - - def rsync(self, files, *args): - if not files: - raise RuntimeError("no files to sync") - logging.debug("files: " + ", ".join(files)) - argv = gconf.rsync_command.split() + gconf.rsync_extra.split() + ['-aR'] + files + list(args) - return os.spawnvp(os.P_WAIT, argv[0], argv) == 0 - - -class AbstractUrl(object): - - def __init__(self, path, pattern): - m = re.search(pattern, path) - if not m: - raise RuntimeError("malformed path") - self.path = path - return m.groups() - - def scheme(self): - return type(self).__name__.lower() - - def canonical_path(self): - return self.path - - def get_url(self, canonical=False): - if canonical: - pa = self.canonical_path() - else: - pa = self.path - return "://".join((self.scheme(), pa)) - - @property - def url(self): - return self.get_url() - - - ### Concrete resource classes ### - - -class FILE(AbstractUrl, SlaveLocal, SlaveRemote): - - class FILEServer(Server): - pass - - server = FILEServer - - def __init__(self, path): - sup(self, path, '^/') - - def connect(self): - os.chdir(self.path) - - def rsync(self, files): - return sup(self, files, self.path) - - -class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): - - class GLUSTERServer(Server): - - @classmethod - def volume_info(cls): - vm = struct.unpack('!' + 'B'*19 + 'II', - Xattr.lgetxattr('.', '.'.join([cls.GX_NSPACE, 'volume-mark']), 27)) - m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]])) - uuid = '-'.join(m.groups()) - return { 'version': vm[0:2], - 'uuid' : uuid, - 'retval' : vm[18], - 'volume_mark': vm[-2:] } - - server = GLUSTERServer - - def __init__(self, path): - self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern) - - def canonical_path(self): - return ':'.join([socket.gethostbyname(self.host), self.volume]) - - def can_connect_to(self, remote): - return True - - def connect(self): - d = tempfile.mkdtemp() - try: - argv = [gconf.gluster_command] + \ - (gconf.gluster_log_level and ['-L', gConf.gluster_log_level] or []) + \ - ['-l', gconf.gluster_log_file, '-s', self.host, - '--volfile-id', self.volume, '--client-pid=-1', d] - if os.spawnvp(os.P_WAIT, argv[0], argv): - raise RuntimeError("command failed: " + " ".join(argv)) - logging.debug('auxiliary glusterfs mount in place') - os.chdir(d) - argv = ['umount', '-l', d] - if os.spawnvp(os.P_WAIT, argv[0], argv): - raise RuntimeError("command failed: " + " ".join(argv)) - finally: - try: - os.rmdir(d) - except: - logging.warn('stale mount possibly left behind on ' + d) - logging.debug('auxiliary glusterfs mount prepared') - - def connect_remote(self, *a, **kw): - sup(self, *a, **kw) - self.slavedir = "/proc/%d/cwd" % self.server.pid() - - def service_loop(self, *args): - if args: - GMaster(self, args[0]).crawl() - else: - sup(self, *args) - - def rsync(self, files): - return sup(self, files, self.slavedir) - - -class SSH(AbstractUrl, SlaveRemote): - - def __init__(self, path): - self.remote_addr, inner_url = sup(self, path, - '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ])) - self.inner_rsc = parse_url(inner_url) - - def canonical_path(self): - m = re.match('([^@]+)@(.+)', self.remote_addr) - if m: - u, h = m.groups() - else: - u, h = os.getlogin(), self.remote_addr - remote_addr = '@'.join([u, socket.gethostbyname(h)]) - return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)]) - - def can_connect_to(self, remote): - return False - - def start_fd_client(self, *a, **opts): - if opts.get('deferred'): - return a - sup(self, *a) - ityp = type(self.inner_rsc) - if ityp == FILE: - slavepath = self.inner_rsc.path - elif ityp == GLUSTER: - slavepath = "/proc/%d/cwd" % self.server.pid() - else: - raise NotImplementedError - self.slaveurl = ':'.join([self.remote_addr, slavepath]) - - def connect_remote(self, go_daemon=None): - if go_daemon == 'done': - return self.start_fd_client(*self.fd_pair) - gconf.setup_ssh_ctl(tempfile.mkdtemp()) - deferred = go_daemon == 'postconn' - ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred) - if deferred: - # send a message to peer so that we can wait for - # the answer from which we know connection is - # established and we can proceed with daemonization - # (doing that too early robs the ssh passwd prompt...) - # However, we'd better not start the RepceClient - # before daemonization (that's not preserved properly - # in daemon), we just do a an ad-hoc linear put/get. - i, o = ret - inf = os.fdopen(i) - repce.send(o, None, '__repce_version__') - select.select((inf,), (), ()) - repce.recv(inf) - # hack hack hack: store a global reference to the file - # to save it from getting GC'd which implies closing it - gconf.permanent_handles.append(inf) - self.fd_pair = (i, o) - return 'should' - - def rsync(self, files): - return sup(self, files, '-ze', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), self.slaveurl) |
