summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c12
-rw-r--r--xlators/features/bit-rot/src/Makefile.am19
-rw-r--r--xlators/features/bit-rot/src/bit-rot-mem-types.h24
-rw-r--r--xlators/features/bit-rot/src/bit-rot.c89
-rw-r--r--xlators/features/bit-rot/src/bit-rot.h33
-rw-r--r--xlators/features/bit-rot/src/bitd/Makefile.am20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c1351
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h126
-rw-r--r--xlators/features/bit-rot/src/stub/Makefile.am4
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h10
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am2
-rw-r--r--xlators/features/changelog/lib/src/changelog.h116
12 files changed, 1506 insertions, 300 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 0af46993a34..6c06fd9b7b5 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4164,18 +4164,6 @@ out:
return;
}
-int
-xlator_subvolume_count (xlator_t *this)
-{
- int i = 0;
- xlator_list_t *list = NULL;
-
- for (list = this->children; list; list = list->next)
- i++;
- return i;
-}
-
-
void
afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
{
diff --git a/xlators/features/bit-rot/src/Makefile.am b/xlators/features/bit-rot/src/Makefile.am
index 1f59a71ebea..b5e4a7d62a0 100644
--- a/xlators/features/bit-rot/src/Makefile.am
+++ b/xlators/features/bit-rot/src/Makefile.am
@@ -1,18 +1 @@
-
-SUBDIRS = stub
-
-xlator_LTLIBRARIES = bit-rot.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-
-bit_rot_la_LDFLAGS = -module -avoid-version
-
-bit_rot_la_SOURCES = bit-rot.c
-bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = bit-rot.h bit-rot-mem-types.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
+SUBDIRS = stub bitd
diff --git a/xlators/features/bit-rot/src/bit-rot-mem-types.h b/xlators/features/bit-rot/src/bit-rot-mem-types.h
deleted file mode 100644
index 19c2aca0f8a..00000000000
--- a/xlators/features/bit-rot/src/bit-rot-mem-types.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _BR_MEM_TYPES_H
-#define _BR_MEM_TYPES_H
-
-#include "mem-types.h"
-
-enum br_mem_types {
- gf_br_mt_br_private_t = gf_common_mt_end + 1,
- gf_br_mt_br_local_t,
- gf_br_mt_br_inode_t,
- gf_br_mt_br_fd_t,
- gf_br_mt_end
-};
-
-#endif
diff --git a/xlators/features/bit-rot/src/bit-rot.c b/xlators/features/bit-rot/src/bit-rot.c
deleted file mode 100644
index 0ba8b80825b..00000000000
--- a/xlators/features/bit-rot/src/bit-rot.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <ctype.h>
-#include <sys/uio.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-
-#include "bit-rot.h"
-#include "bit-rot-mem-types.h"
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int32_t ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_br_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
- " init failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- br_private_t *priv = NULL;
- int32_t ret = -1;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: no children");
- goto out;
- }
-
- priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t);
- if (!priv)
- goto out;
-
- this->private = priv;
-
- ret = 0;
-
-out:
- gf_log (this->name, GF_LOG_DEBUG, "bit-rot xlator loaded");
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- br_private_t *priv = this->private;
-
- if (!priv)
- return;
- this->private = NULL;
- GF_FREE (priv);
-
- return;
-}
-
-struct xlator_fops fops;
-
-struct xlator_cbks cbks;
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/features/bit-rot/src/bit-rot.h b/xlators/features/bit-rot/src/bit-rot.h
deleted file mode 100644
index b275c0e9535..00000000000
--- a/xlators/features/bit-rot/src/bit-rot.h
+++ /dev/null
@@ -1,33 +0,0 @@
- /*
- Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __BIT_ROT_H__
-#define __BIT_ROT_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "bit-rot-mem-types.h"
-#include "syncop.h"
-
-struct br_private {
- xlator_t *xl;
- gf_lock_t lock;
-};
-
-typedef struct br_private br_private_t;
-
-#endif /* __BIR_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
new file mode 100644
index 00000000000..d94a70dc97f
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
@@ -0,0 +1,20 @@
+xlator_LTLIBRARIES = bit-rot.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bit_rot_la_LDFLAGS = -module -avoid-version
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src/ \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/timer-wheel \
+ -I$(top_srcdir)/xlators/features/bit-rot/src/stub
+
+bit_rot_la_SOURCES = bit-rot.c
+bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
+
+noinst_HEADERS = bit-rot.h
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
new file mode 100644
index 00000000000..6234dd83864
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -0,0 +1,1351 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "compat-errno.h"
+
+#include "bit-rot.h"
+#include <pthread.h>
+
+static int
+br_find_child_index (xlator_t *this, xlator_t *child)
+{
+ br_private_t *priv = NULL;
+ int i = -1;
+ int index = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (child == priv->children[i].xl) {
+ index = i;
+ break;
+ }
+ }
+
+out:
+ return index;
+}
+
+static void
+br_free_children (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ int32_t i = 0;
+ br_child_t *child = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ child = &priv->children[i];
+ mem_pool_destroy (child->timer_pool);
+ list_del_init (&priv->children[i].list);
+ }
+
+ GF_FREE (priv->children);
+
+ priv->children = NULL;
+}
+
+br_child_t *
+br_get_child_from_brick_path (xlator_t *this, char *brick_path)
+{
+ br_private_t *priv = NULL;
+ br_child_t *child = NULL;
+ br_child_t *tmp = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, brick_path, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ tmp = &priv->children[i];
+ if (!strcmp (tmp->brick_path, brick_path)) {
+ child = tmp;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return child;
+}
+
+/**
+ * probably we'll encapsulate brick inside our own structure when
+ * needed -- later.
+ */
+void *
+br_brick_init (void *xl, struct gf_brick_spec *brick)
+{
+ return brick;
+}
+
+/**
+ * and cleanup things here when allocated br_brick_init().
+ */
+void
+br_brick_fini (void *xl, char *brick, void *data)
+{
+ return;
+}
+
+/**
+ * TODO: Signature can contain null terminators which causes bitrot
+ * stub to store truncated hash as it depends on string length of
+ * the hash.
+ *
+ * FIX: Send the string length as part of the signature struct and
+ * change stub to handle this change.
+ */
+static inline br_isignature_t *
+br_prepare_signature (const unsigned char *sign,
+ unsigned long hashlen,
+ int8_t hashtype, br_object_t *object)
+{
+ br_isignature_t *signature = NULL;
+
+ /* TODO: use mem-pool */
+ signature = GF_CALLOC (1, signature_size (hashlen + 1),
+ gf_br_stub_mt_signature_t);
+ if (!signature)
+ return NULL;
+
+ signature->signedversion = object->signedversion;
+ signature->signaturetype = hashtype;
+ memcpy (signature->signature, (char *)sign, hashlen);
+ signature->signature[hashlen+1] = '\0';
+
+ return signature;
+}
+
+/**
+ * Do a lookup on the gfid present within the object.
+ */
+static inline int32_t
+br_object_lookup (xlator_t *this, br_object_t *object,
+ struct iatt *iatt, inode_t **linked_inode)
+{
+ int ret = -EINVAL;
+ loc_t loc = {0, };
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, object, out);
+
+ inode = inode_find (object->child->table, object->gfid);
+
+ if (inode)
+ loc.inode = inode;
+ else
+ loc.inode = inode_new (object->child->table);
+
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ uuid_copy (loc.gfid, object->gfid);
+
+ ret = syncop_lookup (object->child->xl, &loc, NULL, iatt, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * The file might have been deleted by the application
+ * after getting the event, but before doing a lookup.
+ * So use linked_inode after inode_link is done.
+ */
+ *linked_inode = inode_link (loc.inode, NULL, NULL, iatt);
+ if (*linked_inode)
+ inode_lookup (*linked_inode);
+
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+/**
+ * open the object with O_RDONLY flags and return the fd. How to let brick
+ * know that open is being done by bitd because syncop framework does not allow
+ * passing xdata -- may be use frame->root->pid itself.
+ */
+static inline int32_t
+br_object_open (xlator_t *this,
+ br_object_t *object, inode_t *inode, fd_t **openfd)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ loc_t loc = {0, };
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, object, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = -EINVAL;
+ fd = fd_create (inode, 0);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to create fd for the "
+ "inode %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ ret = syncop_open (object->child->xl, &loc, O_RDONLY, fd);
+ if (ret) {
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ *openfd = fd;
+ }
+
+ loc_wipe (&loc);
+
+out:
+ return ret;
+}
+
+/**
+ * read 128k block from the object @object from the offset @offset
+ * and return the buffer.
+ */
+static int32_t
+br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child,
+ off_t offset, size_t size, SHA256_CTX *sha256)
+{
+ int32_t ret = -1;
+ struct iovec *iovec = NULL;
+ struct iobref *iobref = NULL;
+ int count = 0;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ ret = syncop_readv (child->xl, fd,
+ size, offset, 0, &iovec, &count, &iobref);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "readv on %s failed (%s)",
+ uuid_utoa (fd->inode->gfid), strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == 0)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ SHA256_Update (sha256,
+ (const unsigned char *) (iovec[i].iov_base),
+ iovec[i].iov_len);
+ }
+
+ out:
+ if (iovec)
+ GF_FREE (iovec);
+
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+int32_t
+br_object_checksum (unsigned char *md,
+ br_object_t *object, fd_t *fd, struct iatt *iatt)
+{
+ int32_t ret = -1;
+ off_t offset = 0;
+ size_t block = 128 * 1024; /* 128K block size */
+ xlator_t *this = NULL;
+
+ SHA256_CTX sha256;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", object, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", iatt, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fd, out);
+
+ this = object->this;
+
+ SHA256_Init (&sha256);
+
+ while (1) {
+ ret = br_object_read_block_and_sign (this, fd, object->child,
+ offset, block, &sha256);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "reading block with "
+ "offset %lu of object %s failed", offset,
+ uuid_utoa (fd->inode->gfid));
+ break;
+ }
+
+ if (ret == 0)
+ break;
+
+ offset += ret;
+ }
+
+ if (ret == 0)
+ SHA256_Final (md, &sha256);
+
+ out:
+ return ret;
+}
+
+static inline int32_t
+br_object_read_sign (inode_t *linked_inode, fd_t *fd, br_object_t *object,
+ struct iatt *iatt)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xattr = NULL;
+ unsigned char *md = NULL;
+ br_isignature_t *sign = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", object, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", linked_inode, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fd, out);
+
+ this = object->this;
+
+ md = GF_CALLOC (SHA256_DIGEST_LENGTH, sizeof (*md), gf_common_mt_char);
+ if (!md) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate memory "
+ "for saving hash of the object %s",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ ret = br_object_checksum (md, object, fd, iatt);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "calculating checksum for "
+ "the object %s failed", uuid_utoa (linked_inode->gfid));
+ goto free_signature;
+ }
+
+ sign = br_prepare_signature (md, SHA256_DIGEST_LENGTH,
+ BR_SIGNATURE_TYPE_SHA256, object);
+ if (!sign) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the signature "
+ "for the object %s", uuid_utoa (fd->inode->gfid));
+ goto free_signature;
+ }
+
+ xattr = dict_for_key_value
+ (GLUSTERFS_SET_OBJECT_SIGNATURE,
+ (void *)sign, signature_size (SHA256_DIGEST_LENGTH));
+
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR, "dict allocation for signing"
+ " failed for the object %s",
+ uuid_utoa (fd->inode->gfid));
+ goto free_isign;
+ }
+
+ ret = syncop_fsetxattr (object->child->xl, fd, xattr, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fsetxattr of signature to "
+ "the object %s failed", uuid_utoa (fd->inode->gfid));
+ goto unref_dict;
+ }
+
+ ret = 0;
+
+ unref_dict:
+ dict_unref (xattr);
+ free_isign:
+ GF_FREE (sign);
+ free_signature:
+ GF_FREE (md);
+ out:
+ return ret;
+}
+
+static inline int br_object_sign_softerror (int32_t op_errno)
+{
+ return ((op_errno == ENOENT) || (op_errno = ESTALE));
+}
+
+void
+br_log_object (xlator_t *this, char *op, uuid_t gfid, int32_t op_errno)
+{
+ int softerror = br_object_sign_softerror (op_errno);
+ gf_log (this->name, (softerror) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "%s() failed on object %s [reason: %s]",
+ op, uuid_utoa (gfid), strerror (op_errno));
+}
+
+void
+br_log_object_path (xlator_t *this, char *op,
+ const char *path, int32_t op_errno)
+{
+ int softerror = br_object_sign_softerror (op_errno);
+ gf_log (this->name, (softerror) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "%s() failed on object %s [reason: %s]",
+ op, path, strerror (op_errno));
+}
+
+/**
+ * Sign a given object. This routine runs full throttle. There needs to be
+ * some form of priority scheduling and/or read burstness to avoid starving
+ * (or kicking) client I/O's.
+ */
+static inline int32_t br_sign_object (br_object_t *object)
+{
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ struct iatt iatt = {0, };
+ pid_t pid = GF_CLIENT_PID_BITD;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", object, out);
+
+ this = object->this;
+
+ /**
+ * FIXME: This is required as signing an object is restricted to
+ * clients with special frame->root->pid. Change the way client
+ * pid is set.
+ */
+ syncopctx_setfspid (&pid);
+
+ ret = br_object_lookup (this, object, &iatt, &linked_inode);
+ if (ret) {
+ br_log_object (this, "lookup", object->gfid, -ret);
+ goto out;
+ }
+
+ ret = br_object_open (this, object, linked_inode, &fd);
+ if (!fd) {
+ br_log_object (this, "open", object->gfid, -ret);
+ goto unref_inode;
+ }
+
+ /**
+ * we have an open file descriptor on the object. from here on,
+ * do not be generous to file operation errors.
+ */
+
+ /* change this to DEBUG log level later */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Signing object [%s]", uuid_utoa (linked_inode->gfid));
+
+ ret = br_object_read_sign (linked_inode, fd, object, &iatt);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "reading and signing of the "
+ "object %s failed", uuid_utoa (linked_inode->gfid));
+ goto unref_fd;
+ }
+
+ ret = 0;
+
+ unref_fd:
+ fd_unref (fd);
+ unref_inode:
+ inode_unref (linked_inode);
+ out:
+ return ret;
+}
+
+static inline br_object_t *__br_pick_object (br_private_t *priv)
+{
+ br_object_t *object = NULL;
+
+ while (list_empty (&priv->obj_queue->objects)) {
+ pthread_cond_wait (&priv->object_cond, &priv->lock);
+ }
+
+ object = list_first_entry
+ (&priv->obj_queue->objects, br_object_t, list);
+ list_del_init (&object->list);
+
+ return object;
+}
+
+/**
+ * This is the place where the signing of the objects is triggered.
+ */
+void *
+br_process_object (void *arg)
+{
+ xlator_t *this = NULL;
+ br_object_t *object = NULL;
+ br_private_t *priv = NULL;
+ int32_t ret = -1;
+
+ this = arg;
+ priv = this->private;
+
+ THIS = this;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ object = __br_pick_object (priv);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ ret = br_sign_object (object);
+ if (ret && !br_object_sign_softerror (-ret))
+ gf_log (this->name, GF_LOG_ERROR,
+ "SIGNING FAILURE [%s]",
+ uuid_utoa (object->gfid));
+ GF_FREE (object);
+ }
+
+ return NULL;
+}
+
+/**
+ * This function gets kicked in once the object is expired from the
+ * timer wheel. This actually adds the object received via notification
+ * from the changelog to the queue from where the objects gets picked
+ * up for signing.
+ *
+ * This routine can be made lightweight by introducing an alternate
+ * timer-wheel API that dispatches _all_ expired objects in one-shot
+ * rather than an object at-a-time. This routine can then just simply
+ * be a call to list_splice_tail().
+ *
+ * NOTE: use call_time to instrument signing time in br_sign_object().
+ */
+void
+br_add_object_to_queue (struct gf_tw_timer_list *timer,
+ void *data, unsigned long call_time)
+{
+ br_object_t *object = NULL;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+
+ object = data;
+ this = object->this;
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ list_add_tail (&object->list, &priv->obj_queue->objects);
+ pthread_cond_broadcast (&priv->object_cond);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ mem_put (timer);
+ return;
+}
+
+static inline br_object_t *
+br_initialize_object (xlator_t *this, br_child_t *child, changelog_event_t *ev)
+{
+ br_object_t *object = NULL;
+
+ object = GF_CALLOC (1, sizeof (*object), gf_br_mt_br_object_t);
+ if (!object)
+ goto out;
+ INIT_LIST_HEAD (&object->list);
+
+ object->this = this;
+ object->child = child;
+ uuid_copy (object->gfid, ev->u.releasebr.gfid);
+
+ /* NOTE: it's BE, but no worry */
+ object->signedversion = ev->u.releasebr.version;
+
+out:
+ return object;
+}
+
+static inline struct gf_tw_timer_list *
+br_initialize_timer (xlator_t *this, br_object_t *object, br_child_t *child,
+ changelog_event_t *ev)
+{
+ br_private_t *priv = NULL;
+ struct gf_tw_timer_list *timer = NULL;
+
+ priv = this->private;
+
+ timer = mem_get0 (child->timer_pool);
+ if (!timer)
+ goto out;
+ INIT_LIST_HEAD (&timer->entry);
+
+ timer->data = object;
+ timer->expires = priv->expiry_time;
+ timer->function = br_add_object_to_queue;
+ gf_tw_add_timer (priv->timer_wheel, timer);
+
+out:
+ return timer;
+}
+
+/**
+ * This callback function registered with the changelog is executed
+ * whenever a notification from the changelog is received. This should
+ * add the object (or the gfid) on which the notification has come to
+ * the timer-wheel with some expiry time.
+ *
+ * TODO: use mem-pool for allocations and maybe allocate timer and
+ * object as a single alloc and bifurcate their respective pointers.
+ */
+void
+br_brick_callback (void *xl, char *brick,
+ void *data, changelog_event_t *ev)
+{
+ uuid_t gfid = {0,};
+ xlator_t *this = NULL;
+ br_object_t *object = NULL;
+ br_child_t *child = NULL;
+ int32_t flags = 0;
+ struct gf_tw_timer_list *timer = NULL;
+
+ this = xl;
+
+ GF_VALIDATE_OR_GOTO (this->name, ev, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ GF_ASSERT (ev->ev_type == CHANGELOG_OP_TYPE_BR_RELEASE);
+ GF_ASSERT (!uuid_is_null (ev->u.releasebr.gfid));
+
+ uuid_copy (gfid, ev->u.releasebr.gfid);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "RELEASE EVENT [GFID %s]", uuid_utoa (gfid));
+
+ flags = (int32_t)ntohl (ev->u.releasebr.flags);
+ if (flags == O_RDONLY) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Read only fd [GFID: %s], ignoring signing..",
+ uuid_utoa (gfid));
+ goto out;
+ }
+
+ child = br_get_child_from_brick_path (this, brick);
+ if (!child) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the subvolume "
+ "for the brick %s", brick);
+ goto out;
+ }
+
+ object = br_initialize_object (this, child, ev);
+ if (!object) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate "
+ "object memory [GFID: %s]", uuid_utoa (gfid));
+ goto out;
+ }
+
+ timer = br_initialize_timer (this, object, child, ev);
+ if (!timer) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate "
+ "object expiry timer [GFID: %s]", uuid_utoa (gfid));
+ goto free_object;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "->callback: brick [%s], type [%d]\n",
+ brick, ev->ev_type);
+
+ return;
+
+ free_object:
+ GF_FREE (object);
+out:
+ return;
+}
+
+void
+br_fill_brick_spec (struct gf_brick_spec *brick, char *path)
+{
+ brick->brick_path = gf_strdup (path);
+ brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE;
+
+ brick->init = br_brick_init;
+ brick->fini = br_brick_fini;
+ brick->callback = br_brick_callback;
+ brick->connected = NULL;
+ brick->disconnected = NULL;
+}
+
+static inline gf_boolean_t
+br_time_equal (br_child_t *child, struct timeval *tv)
+{
+ if ((child->tv.tv_sec == tv->tv_sec) &&
+ (child->tv.tv_usec == tv->tv_usec))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static inline gf_boolean_t
+br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child)
+{
+ int32_t ret = -1;
+ gf_boolean_t need_sign = _gf_false;
+ struct timeval tv = {0,};
+ br_isignature_out_t *sign = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, xattr, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ ret = dict_get_ptr (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ (void **)&sign);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get object signature info");
+ goto out;
+ }
+
+ tv.tv_sec = ntohl (sign->time[0]);
+ tv.tv_usec = ntohl (sign->time[1]);
+
+ /* Object has been opened and hence dirty. Do not sign it */
+ if (sign->stale && !br_time_equal (child, &tv))
+ need_sign = _gf_true;
+
+out:
+ return need_sign;
+}
+
+static inline void
+br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode,
+ loc_t *loc)
+{
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+
+ fd = fd_create (linked_inode, 0);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd [GFID %s]",
+ uuid_utoa (linked_inode->gfid));
+ goto out;
+ }
+
+ ret = syncop_open (child->xl, loc, O_RDWR, fd);
+ if (ret) {
+ br_log_object (this, "open", linked_inode->gfid, -ret);
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ }
+
+ if (fd)
+ syncop_close (fd);
+
+out:
+ return;
+}
+
+int32_t
+br_prepare_loc (xlator_t *this, br_child_t *child, loc_t *parent,
+ gf_dirent_t *entry, loc_t *loc)
+{
+ int32_t ret = -1;
+ inode_t *inode = NULL;
+
+ inode = inode_grep (child->table, parent->inode, entry->d_name);
+ if (!inode)
+ loc->inode = inode_new (child->table);
+ else {
+ loc->inode = inode;
+ if (loc->inode->ia_type != IA_IFREG) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s is not a regular "
+ "file", entry->d_name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ loc->parent = inode_ref (parent->inode);
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+
+ ret = inode_path (parent->inode, entry->d_name, (char **)&loc->path);
+ if (ret < 0 || !loc->path) {
+ gf_log (this->name, GF_LOG_ERROR, "inode_path on %s "
+ "(parent: %s) failed", entry->d_name,
+ uuid_utoa (parent->inode->gfid));
+ goto out;
+ }
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+
+ ret = 1;
+
+out:
+ return ret;
+}
+
+/**
+ * Oneshot crawler
+ * ---------------
+ * This is a catchup mechanism. Objects that remained unsigned from the
+ * last run for whatever reason (node crashes, reboots, etc..) become
+ * candidates for signing. This allows the signature to "catch up" with
+ * the current state of the object. Triggering signing is easy: perform
+ * an open() followed by a close() therby resulting in call boomerang.
+ * (though not back to itself :))
+ */
+int
+bitd_oneshot_crawl (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int op_errno = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ struct iatt parent_buf = {0, };
+ dict_t *xattr = NULL;
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ gf_boolean_t need_signing = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
+
+ child = data;
+ this = child->this;
+
+ ret = br_prepare_loc (this, child, parent, entry, &loc);
+ if (!ret)
+ goto out;
+
+ ret = syncop_lookup (child->xl, &loc, NULL, &iatt, NULL, &parent_buf);
+ if (ret) {
+ br_log_object_path (this, "lookup", loc.path, -ret);
+ goto out;
+ }
+
+ linked_inode = inode_link (loc.inode, parent->inode, loc.name, &iatt);
+ if (linked_inode)
+ inode_lookup (linked_inode);
+
+ if (iatt.ia_type != IA_IFREG) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s is not a regular file, skipping..", entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ /**
+ * As of now, 2 cases are possible and handled.
+ * 1) GlusterFS is upgraded from a previous version which does not
+ * have any idea about bit-rot and have data in the filesystem.
+ * In this case syncop_getxattr fails with ENODATA and the object
+ * is signed. (In real, when crawler sends lookup, bit-rot-stub
+ * creates the xattrs before returning lookup reply)
+ * 2) Bit-rot was not enabled or BitD was dows for some reasons, during
+ * which some files were created, but since BitD was down, were not
+ * signed.
+ * If the file was just created and was being written some data when
+ * the down BitD came up, then bit-rot stub should be intelligent to
+ * identify this case (by comparing the ongoing version or by checking
+ * if there are any fds present for that inode) and handle properly.
+ */
+
+ ret = syncop_getxattr (child->xl, &loc, &xattr,
+ GLUSTERFS_GET_OBJECT_SIGNATURE, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ br_log_object (this, "getxattr", linked_inode->gfid, op_errno);
+
+ if (op_errno == ENODATA)
+ need_signing = _gf_true;
+ if (op_errno == EINVAL)
+ gf_log (this->name, GF_LOG_WARNING, "Partial version "
+ "xattr presence detected, ignoring [GFID: %s]",
+ uuid_utoa (linked_inode->gfid));
+ } else {
+ need_signing = br_check_object_need_sign (this, xattr, child);
+ }
+
+ if (!need_signing)
+ goto unref_dict;
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Triggering signing for %s [GFID: %s | Brick: %s]",
+ loc.path, uuid_utoa (linked_inode->gfid), child->brick_path);
+ br_trigger_sign (this, child, linked_inode, &loc);
+
+ ret = 0;
+
+ unref_dict:
+ if (xattr)
+ dict_unref (xattr);
+ unref_inode:
+ inode_unref (linked_inode);
+ out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+#define BR_CRAWL_THROTTLE_COUNT 50
+#define BR_CRAWL_THROTTLE_ZZZ 5
+
+void *
+br_oneshot_signer (void *arg)
+{
+ loc_t loc = {0,};
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+
+ child = arg;
+ this = child->this;
+
+ THIS = this;
+
+ gf_log (this->name, GF_LOG_INFO, "Crawling brick [%s], scanning "
+ "for unsigned objects", child->brick_path);
+
+ loc.inode = child->table->root;
+ (void) syncop_ftw_throttle
+ (child->xl, &loc,
+ GF_CLIENT_PID_BITD, child, bitd_oneshot_crawl,
+ BR_CRAWL_THROTTLE_COUNT, BR_CRAWL_THROTTLE_ZZZ);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Completed crawling brick [%s]", child->brick_path);
+
+ return NULL;
+}
+
+/**
+ * At this point a thread is spawned to crawl the filesystem (in
+ * tortoise pace) to sign objects that were not signed in previous run(s).
+ * Such objects are identified by examining it's dirtyness and timestamp.
+ *
+ * pick object:
+ * signature_is_stale() && (object_timestamp() <= stub_init_time())
+ *
+ * Also, we register to the changelog library to subscribe for event
+ * notifications.
+ */
+static inline int32_t
+br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+ int32_t ret = 0;
+ struct gf_brick_spec *brick = NULL;
+
+ brick = GF_CALLOC (1, sizeof (struct gf_brick_spec),
+ gf_common_mt_gf_brick_spec_t);
+ if (!brick)
+ goto error_return;
+
+ br_fill_brick_spec (brick, stub->export);
+ ret = gf_changelog_register_generic
+ (brick, 1, 1, this->ctx->cmd_args.log_file, -1, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Register to changelog failed"
+ " [Reason: %s]", strerror (errno));
+ goto dealloc;
+ }
+
+ child->threadrunning = 0;
+ ret = gf_thread_create (&child->thread, NULL, br_oneshot_signer, child);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to spawn FS crawler thread");
+ else
+ child->threadrunning = 1;
+
+ /* it's OK to continue, "old" objects would be signed when modified */
+ return 0;
+
+ dealloc:
+ GF_FREE (brick);
+ error_return:
+ return -1;
+}
+
+/**
+ * This routine fetches various attributes associated with a child which
+ * is basically a subvolume. Attributes include brick path and the stub
+ * birth time. This is done by performing a lookup on the root followed
+ * by getxattr() on a virtual key.
+ */
+static inline int32_t
+br_brick_connect (xlator_t *this, br_child_t *child)
+{
+ int32_t ret = -1;
+ loc_t loc = {0, };
+ struct iatt buf = {0, };
+ struct iatt parent = {0, };
+ br_stub_init_t *stub = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ loc.inode = inode_ref (child->table->root);
+ uuid_copy (loc.gfid, loc.inode->gfid);
+ loc.path = gf_strdup ("/");
+
+ ret = syncop_lookup (child->xl, &loc, NULL, &buf, NULL, &parent);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "lookup on root failed "
+ "[Reason: %s]", strerror (op_errno));
+ goto wipeloc;
+ }
+
+ ret = syncop_getxattr (child->xl, &loc, &xattr,
+ GLUSTERFS_GET_BR_STUB_INIT_TIME, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "failed to get stub info "
+ "[Reason: %s]", strerror (op_errno));
+ goto wipeloc;
+ }
+
+ ret = dict_get_ptr (xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ (void **)&stub);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to extract stub information");
+ goto free_dict;
+ }
+
+ memcpy (child->brick_path, stub->export, strlen (stub->export) + 1);
+ child->tv.tv_sec = ntohl (stub->timebuf[0]);
+ child->tv.tv_usec = ntohl (stub->timebuf[0]);
+
+ ret = br_enact_signer (this, child, stub);
+
+ free_dict:
+ dict_unref (xattr);
+ wipeloc:
+ loc_wipe (&loc);
+ out:
+ return ret;
+}
+
+/**
+ * This function is executed in a separate thread. The thread gets the
+ * brick from where CHILD_UP has received from the queue and gets the
+ * information regarding that brick (such as brick path).
+ */
+void *
+br_handle_events (void *arg)
+{
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+ br_child_t *child = NULL;
+ int32_t ret = -1;
+
+ this = arg;
+ priv = this->private;
+
+ /*
+ * Since, this is the topmost xlator, THIS has to be set by bit-rot
+ * xlator itself (STACK_WIND wont help in this case). Also it has
+ * to be done for each thread that gets spawned. Otherwise, a new
+ * thread will get global_xlator's pointer when it does "THIS".
+ */
+ THIS = this;
+
+ while (1) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ while (list_empty (&priv->bricks)) {
+ pthread_cond_wait (&priv->cond,
+ &priv->lock);
+ }
+
+ child = list_entry (priv->bricks.next, br_child_t,
+ list);
+ if (child && child->child_up) {
+ ret = br_brick_connect (this, child);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to connect to the "
+ "child (subvolume: %s)",
+ child->xl->name);
+ else
+ list_del_init (&child->list);
+ }
+
+ }
+ pthread_mutex_unlock (&priv->lock);
+ }
+
+ return NULL;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_br_stub_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ br_private_t *priv = NULL;
+ int idx = -1;
+ br_child_t *child = NULL;
+
+ subvol = (xlator_t *)data;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_TRACE, "Notification received: %d",
+ event);
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ /* should this be done under lock? or is it ok to do it
+ without lock? */
+ idx = br_find_child_index (this, subvol);
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "got child "
+ "up from invalid subvolume");
+ } else {
+ child = &priv->children[idx];
+ if (child->child_up != 1)
+ child->child_up = 1;
+ if (!child->xl)
+ child->xl = subvol;
+ if (!child->table)
+ child->table = inode_table_new (4096,
+ subvol);
+ priv->up_children++;
+ list_add_tail (&child->list, &priv->bricks);
+ pthread_cond_signal (&priv->cond);
+ }
+ }
+ pthread_mutex_unlock (&priv->lock);
+ break;
+
+ case GF_EVENT_CHILD_MODIFIED:
+ idx = br_find_child_index (this, subvol);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "received child up "
+ "from invalid subvolume");
+ goto out;
+ }
+ priv = this->private;
+ /* ++(priv->generation); */
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ idx = br_find_child_index (this, subvol);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "received child down "
+ "from invalid subvolume");
+ goto out;
+ }
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->children[idx].child_up == 1) {
+ priv->children[idx].child_up = 0;
+ priv->up_children--;
+ }
+ }
+ pthread_mutex_unlock (&priv->lock);
+ break;
+ case GF_EVENT_PARENT_UP:
+ default_notify (this, GF_EVENT_PARENT_UP, data);
+ break;
+ }
+
+out:
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int i = 0;
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR, "FATAL: no children");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t);
+ if (!priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate memory (->priv)");
+ goto out;
+ }
+
+ /* initialize gfchangelog xlator context */
+ ret = gf_changelog_init (this);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out);
+
+ priv->child_count = xlator_subvolume_count (this);
+ priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
+ gf_br_mt_br_child_t);
+ if (!priv->children)
+ goto out;
+
+ trav = this->children;
+ while (trav) {
+ priv->children[i].this = this;
+ priv->children[i].xl = trav->xlator;
+
+ priv->children[i].timer_pool =
+ mem_pool_new (struct gf_tw_timer_list, 4096);
+ if (!priv->children[i].timer_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate mem-pool for timer");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ i++;
+ trav = trav->next;
+ }
+
+ pthread_mutex_init (&priv->lock, NULL);
+ pthread_cond_init (&priv->cond, NULL);
+
+ for (i = 0; i < priv->child_count; i++)
+ INIT_LIST_HEAD (&priv->children[i].list);
+ INIT_LIST_HEAD (&priv->bricks);
+
+ this->private = priv;
+
+ ret = gf_thread_create (&priv->thread, NULL, br_handle_events,
+ this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "thread creation failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ priv->timer_wheel = gf_tw_init_timers ();
+ if (!priv->timer_wheel) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to initialize the "
+ "timer wheel");
+ goto out;
+ }
+
+ pthread_cond_init (&priv->object_cond, NULL);
+ priv->obj_queue = GF_CALLOC (1, sizeof (*priv->obj_queue),
+ gf_br_mt_br_ob_n_wk_t);
+ if (!priv->obj_queue) {
+ gf_log (this->name, GF_LOG_ERROR, "memory allocation failed");
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&priv->obj_queue->objects);
+
+ for (i = 0; i < BR_WORKERS; i++) {
+ gf_thread_create (&priv->obj_queue->workers[i], NULL,
+ br_process_object, this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "thread creation failed (%s)",
+ strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (priv->children)
+ GF_FREE (priv->children);
+ if (priv->timer_wheel)
+ gf_tw_cleanup_timers (priv->timer_wheel);
+ GF_FREE (priv);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "bit-rot xlator loaded");
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+
+ br_free_children (this);
+ if (priv->timer_wheel)
+ gf_tw_cleanup_timers (priv->timer_wheel);
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ { .key = {"expiry-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "120",
+ .description = "default time duration for which an object waits "
+ "before it is signed",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
new file mode 100644
index 00000000000..ab9fd806232
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -0,0 +1,126 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __BIT_ROT_H__
+#define __BIT_ROT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "syncop.h"
+#include "syncop-utils.h"
+#include "changelog.h"
+#include "timer-wheel.h"
+
+#include "bit-rot-common.h"
+#include "bit-rot-stub-mem-types.h"
+
+#include <openssl/sha.h>
+
+/* TODO: make this configurable */
+#define BR_WORKERS 8
+
+#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
+
+struct br_child {
+ char child_up; /* Indicates whether this child is
+ up or not */
+ xlator_t *xl; /* client xlator corresponding to
+ this child */
+ inode_table_t *table; /* inode table for this child */
+ char brick_path[PATH_MAX]; /* brick export directory of this
+ child */
+ struct list_head list; /* hook to attach to the list of
+ UP children */
+ xlator_t *this; /* Bit rot xlator */
+
+ pthread_t thread; /* initial crawler for unsigned
+ object(s) */
+ int threadrunning; /* active thread */
+
+ struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
+
+ struct timeval tv;
+};
+
+typedef struct br_child br_child_t;
+
+struct br_obj_n_workers {
+ struct list_head objects; /* queue of objects expired from the
+ timer wheel and ready to be picked
+ up for signing */
+ pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects
+ from the above queue and start
+ signing each object */
+};
+
+typedef struct br_obj_n_workers br_obj_n_workers_t;
+
+struct br_private {
+ pthread_mutex_t lock;
+
+ struct list_head bricks; /* list of bricks from which CHILD_UP
+ has been received */
+
+ pthread_cond_t cond; /* handling CHILD_UP notifications */
+ pthread_cond_t object_cond; /* handling signing of objects */
+ int child_count;
+ br_child_t *children; /* list of subvolumes */
+ int up_children;
+ pthread_t thread; /* thread for connecting each UP
+ child with changelog */
+ struct tvec_base *timer_wheel; /* timer wheel where the objects which
+ changelog has sent sits and waits
+ for expiry */
+ br_obj_n_workers_t *obj_queue; /* place holder for all the objects
+ that are expired from timer wheel
+ and ready to be picked up for
+ signing and the workers which sign
+ the objects */
+ int32_t expiry_time; /* objects "wait" time */
+};
+
+typedef struct br_private br_private_t;
+
+struct br_object {
+ xlator_t *this;
+
+ uuid_t gfid;
+
+ unsigned long signedversion; /* version aginst which this object will
+ be signed */
+ br_child_t *child; /* object's subvolume */
+
+ struct list_head list; /* hook to add to the queue once the
+ object is expired from timer wheel */
+ void *data;
+};
+
+typedef struct br_object br_object_t;
+
+void
+br_log_object (xlator_t *, char *, uuid_t, int32_t);
+
+void
+br_log_object_path (xlator_t *, char *, const char *, int32_t);
+
+int32_t
+br_object_checksum (unsigned char *, br_object_t *, fd_t *, struct iatt *);
+
+int32_t
+br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
+
+#endif /* __BIT_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/stub/Makefile.am b/xlators/features/bit-rot/src/stub/Makefile.am
index 9abcbb76db2..ec6b1ef4506 100644
--- a/xlators/features/bit-rot/src/stub/Makefile.am
+++ b/xlators/features/bit-rot/src/stub/Makefile.am
@@ -8,9 +8,7 @@ bitrot_stub_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = bit-rot-stub.h bit-rot-common.h bit-rot-stub-mem-types.h
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/xlators/features/changelog/lib/src
-
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index 64779923fd6..492278639b4 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -15,9 +15,13 @@
enum br_mem_types {
gf_br_stub_mt_private_t = gf_common_mt_end + 1,
- gf_br_stub_mt_version_t = gf_common_mt_end + 2,
- gf_br_stub_mt_inode_ctx_t = gf_common_mt_end + 3,
- gf_br_stub_mt_signature_t = gf_common_mt_end + 4,
+ gf_br_stub_mt_version_t,
+ gf_br_stub_mt_inode_ctx_t,
+ gf_br_stub_mt_signature_t,
+ gf_br_mt_br_private_t,
+ gf_br_mt_br_child_t,
+ gf_br_mt_br_object_t,
+ gf_br_mt_br_ob_n_wk_t,
gf_br_stub_mt_end
};
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
index 306306bd585..456e211b89d 100644
--- a/xlators/features/changelog/lib/src/Makefile.am
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -32,8 +32,6 @@ noinst_HEADERS = gf-changelog-helpers.h gf-changelog-rpc.h gf-changelog-journal.
$(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h \
$(CONTRIBDIR)/uuid/uuidP.h $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
-libgfchangelog_HEADERS = changelog.h
-
CLEANFILES =
CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
diff --git a/xlators/features/changelog/lib/src/changelog.h b/xlators/features/changelog/lib/src/changelog.h
deleted file mode 100644
index 08307810704..00000000000
--- a/xlators/features/changelog/lib/src/changelog.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GF_CHANGELOG_H
-#define _GF_CHANGELOG_H
-
-struct gf_brick_spec;
-
-/**
- * Max bit shiter for event selection
- */
-#define CHANGELOG_EV_SELECTION_RANGE 5
-
-#define CHANGELOG_OP_TYPE_JOURNAL (1<<0)
-#define CHANGELOG_OP_TYPE_OPEN (1<<1)
-#define CHANGELOG_OP_TYPE_CREATE (1<<2)
-#define CHANGELOG_OP_TYPE_RELEASE (1<<3)
-#define CHANGELOG_OP_TYPE_BR_RELEASE (1<<4) /* logical release (last close()),
- sent by bitrot stub */
-#define CHANGELOG_OP_TYPE_MAX (1<<CHANGELOG_EV_SELECTION_RANGE)
-
-
-struct ev_open {
- unsigned char gfid[16];
- int32_t flags;
-};
-
-struct ev_creat {
- unsigned char gfid[16];
- int32_t flags;
-};
-
-struct ev_release {
- unsigned char gfid[16];
-};
-
-struct ev_release_br {
- int32_t flags;
- unsigned long version;
- unsigned char gfid[16];
-};
-
-struct ev_changelog {
- char path[PATH_MAX];
-};
-
-typedef struct changelog_event {
- unsigned int ev_type;
-
- union {
- struct ev_open open;
- struct ev_creat create;
- struct ev_release release;
- struct ev_changelog journal;
- struct ev_release_br releasebr;
- } u;
-} changelog_event_t;
-
-#define CHANGELOG_EV_SIZE (sizeof (changelog_event_t))
-
-/**
- * event callback, connected & disconnection defs
- */
-typedef void (CALLBACK) (void *, char *,
- void *, changelog_event_t *);
-typedef void *(INIT) (void *, struct gf_brick_spec *);
-typedef void (FINI) (void *, char *, void *);
-typedef void (CONNECT) (void *, char *, void *);
-typedef void (DISCONNECT) (void *, char *, void *);
-
-struct gf_brick_spec {
- char *brick_path;
- unsigned int filter;
-
- INIT *init;
- FINI *fini;
- CALLBACK *callback;
- CONNECT *connected;
- DISCONNECT *disconnected;
-
- void *ptr;
-};
-
-/* API set */
-
-int
-gf_changelog_register (char *brick_path, char *scratch_dir,
- char *log_file, int log_levl, int max_reconnects);
-ssize_t
-gf_changelog_scan ();
-
-int
-gf_changelog_start_fresh ();
-
-ssize_t
-gf_changelog_next_change (char *bufptr, size_t maxlen);
-
-int
-gf_changelog_done (char *file);
-
-/* newer flexible API */
-int
-gf_changelog_init (void *xl);
-
-int
-gf_changelog_register_generic (struct gf_brick_spec *bricks, int count,
- int ordered, char *logfile, int lvl, void *xl);
-
-#endif