19 files changed, 10347 insertions, 0 deletions
diff --git a/xlators/features/bit-rot/src/Makefile.am b/xlators/features/bit-rot/src/Makefile.am
new file mode 100644
index 00000000000..b5e4a7d62a0
--- /dev/null
+++ b/xlators/features/bit-rot/src/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = stub bitd
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
new file mode 100644
index 00000000000..6db800e6565
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
@@ -0,0 +1,23 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = bit-rot.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bit_rot_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+	-I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \
+	-I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/timer-wheel \
+	-I$(top_srcdir)/xlators/features/bit-rot/src/stub
+
+bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-ssm.c \
+		     bit-rot-scrub-status.c
+bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+	$(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
+
+noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-bitd-messages.h bit-rot-ssm.h \
+		 bit-rot-scrub-status.h
+
+AM_CFLAGS = -Wall -DBR_RATE_LIMIT_SIGNER $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
new file mode 100644
index 00000000000..5bc5103a27c
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _BITROT_BITD_MESSAGES_H_
+#define _BITROT_BITD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED,
+           BRB_MSG_BLOCK_READ_FAILED, BRB_MSG_CALC_CHECKSUM_FAILED,
+           BRB_MSG_NO_MEMORY, BRB_MSG_GET_SIGN_FAILED, BRB_MSG_SET_SIGN_FAILED,
+           BRB_MSG_OP_FAILED, BRB_MSG_READ_AND_SIGN_FAILED, BRB_MSG_SIGN_FAILED,
+           BRB_MSG_GET_SUBVOL_FAILED, BRB_MSG_SET_TIMER_FAILED,
+           BRB_MSG_GET_INFO_FAILED, BRB_MSG_PATH_FAILED, BRB_MSG_MARK_BAD_FILE,
+           BRB_MSG_TRIGGER_SIGN, BRB_MSG_REGISTER_FAILED,
+           BRB_MSG_CRAWLING_START, BRB_MSG_SPAWN_FAILED,
+           BRB_MSG_INVALID_SUBVOL_CHILD, BRB_MSG_SKIP_OBJECT, BRB_MSG_NO_CHILD,
+           BRB_MSG_CHECKSUM_MISMATCH, BRB_MSG_MARK_CORRUPTED,
+           BRB_MSG_CRAWLING_FINISH, BRB_MSG_CALC_ERROR, BRB_MSG_LOOKUP_FAILED,
+           BRB_MSG_PARTIAL_VERSION_PRESENCE, BRB_MSG_MEM_ACNT_FAILED,
+           BRB_MSG_TIMER_WHEEL_UNAVAILABLE, BRB_MSG_BITROT_LOADED,
+           BRB_MSG_SCALE_DOWN_FAILED, BRB_MSG_SCALE_UP_FAILED,
+           BRB_MSG_SCALE_DOWN_SCRUBBER, BRB_MSG_SCALING_UP_SCRUBBER,
+           BRB_MSG_UNKNOWN_THROTTLE, BRB_MSG_RATE_LIMIT_INFO,
+           BRB_MSG_SCRUB_INFO, BRB_MSG_CONNECTED_TO_BRICK, BRB_MSG_BRICK_INFO,
+           BRB_MSG_SUBVOL_CONNECT_FAILED, BRB_MSG_INVALID_SUBVOL,
+           BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, BRB_MSG_SCRUB_START,
+           BRB_MSG_SCRUB_FINISH, BRB_MSG_SCRUB_RUNNING,
+           BRB_MSG_SCRUB_RESCHEDULED, BRB_MSG_SCRUB_TUNABLE,
+           BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED,
+           BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG,
+           BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED,
+           BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED,
+           BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+           BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC,
+           BRB_MSG_SAVING_HASH_FAILED);
+
+#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode"
+#define BRB_MSG_READV_FAILED_STR "readv failed"
+#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed"
+#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed"
+#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature"
+#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed"
+#define BRB_MSG_OP_FAILED_STR "failed on object"
+#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing"
+#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed"
+#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer"
+#define BRB_MSG_GET_SUBVOL_FAILED_STR                                          \
+    "failed to get the subvolume for the brick"
+#define BRB_MSG_PATH_FAILED_STR "path failed"
+#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping"
+#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR                                   \
+    "PArtial version xattr presence detected, ignoring"
+#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing"
+#define BRB_MSG_CRAWLING_START_STR                                             \
+    "Crawling brick, scanning for unsigned objects"
+#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick"
+#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed"
+#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn"
+#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick"
+#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed"
+#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info"
+#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread"
+#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick"
+#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR                                      \
+    "callback handler for subvolume failed"
+#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child"
+#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume"
+#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR                                 \
+    "on demand scrub schedule failed. Scrubber is not in pending state."
+#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR                                   \
+    "Could not schedule ondemand scrubbing. Scrubbing will continue "          \
+    "according to old frequency."
+#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed"
+#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info"
+#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer"
+#define BRB_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable"
+#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded"
+#define BRB_MSG_SAVING_HASH_FAILED_STR                                         \
+    "failed to allocate memory for saving hash of the object"
+#endif /* !_BITROT_BITD_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
new file mode 100644
index 00000000000..5cef2ffa5e5
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -0,0 +1,78 @@
+/*
+  Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+#include <stdio.h>
+
+#include "bit-rot-scrub-status.h"
+
+void
+br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat)
+{
+    if (!scrub_stat)
+        return;
+
+    pthread_mutex_lock(&scrub_stat->lock);
+    {
+        scrub_stat->unsigned_files++;
+    }
+    pthread_mutex_unlock(&scrub_stat->lock);
+}
+
+void
+br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
+{
+    if (!scrub_stat)
+        return;
+
+    pthread_mutex_lock(&scrub_stat->lock);
+    {
+        scrub_stat->scrubbed_files++;
+    }
+    pthread_mutex_unlock(&scrub_stat->lock);
+}
+
+void
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
+{
+    if (!scrub_stat)
+        return;
+
+    pthread_mutex_lock(&scrub_stat->lock);
+    {
+        scrub_stat->scrub_start_time = time;
+    }
+    pthread_mutex_unlock(&scrub_stat->lock);
+}
+
+void
+br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
+                            time_t time)
+{
+    int lst_size = 0;
+
+    if (!scrub_stat)
+        return;
+
+    lst_size = sizeof(scrub_stat->last_scrub_time);
+    if (strlen(timestr) >= lst_size)
+        return;
+
+    pthread_mutex_lock(&scrub_stat->lock);
+    {
+        scrub_stat->scrub_end_time = time;
+
+        scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+                                     scrub_stat->scrub_start_time;
+
+        snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
+    }
+    pthread_mutex_unlock(&scrub_stat->lock);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
new file mode 100644
index 00000000000..f022aa831eb
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -0,0 +1,50 @@
+/*
+   Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_STATUS_H__
+#define __BIT_ROT_SCRUB_STATUS_H__
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include <glusterfs/common-utils.h>
+
+struct br_scrub_stats {
+    uint64_t scrubbed_files; /* Total number of scrubbed files. */
+
+    uint64_t unsigned_files; /* Total number of unsigned files. */
+
+    uint64_t scrub_duration; /* Duration of last scrub. */
+
+    char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
+
+    time_t scrub_start_time; /* Scrubbing starting time. */
+
+    time_t scrub_end_time; /* Scrubbing finishing time. */
+
+    int8_t scrub_running; /* Whether scrub running or not. */
+
+    pthread_mutex_t lock;
+};
+
+typedef struct br_scrub_stats br_scrub_stats_t;
+
+void
+br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
+void
+br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
+void
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
+void
+br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
+                            time_t time);
+
+#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
new file mode 100644
index 00000000000..289dd53f610
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -0,0 +1,2070 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include <math.h>
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+
+#include "bit-rot-scrub.h"
+#include <pthread.h>
+#include "bit-rot-bitd-messages.h"
+#include "bit-rot-scrub-status.h"
+#include <glusterfs/events.h>
+
+struct br_scrubbers {
+    pthread_t scrubthread;
+
+    struct list_head list;
+};
+
+struct br_fsscan_entry {
+    void *data;
+
+    loc_t parent;
+
+    gf_dirent_t *entry;
+
+    struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+    struct list_head list;
+};
+
+/**
+ * fetch signature extended attribute from an object's fd.
+ * NOTE: On success @xattr is not unref'd as @sign points
+ * to the dictionary value.
+ */
+static int32_t
+bitd_fetch_signature(xlator_t *this, br_child_t *child, fd_t *fd,
+                     dict_t **xattr, br_isignature_out_t **sign)
+{
+    int32_t ret = -1;
+
+    ret = syncop_fgetxattr(child->xl, fd, xattr, GLUSTERFS_GET_OBJECT_SIGNATURE,
+                           NULL, NULL);
+    if (ret < 0) {
+        br_log_object(this, "fgetxattr", fd->inode->gfid, -ret);
+        goto out;
+    }
+
+    ret = dict_get_ptr(*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)sign);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+               "failed to extract signature info [GFID: %s]",
+               uuid_utoa(fd->inode->gfid));
+        goto unref_dict;
+    }
+
+    return 0;
+
+unref_dict:
+    dict_unref(*xattr);
+out:
+    return -1;
+}
+
+/**
+ * POST COMPUTE CHECK
+ *
+ * Checks to be performed before verifying calculated signature
+ * Object is skipped if:
+ *  - has stale signature
+ *  - mismatches versions caches in pre-compute check
+ */
+
+int32_t
+bitd_scrub_post_compute_check(xlator_t *this, br_child_t *child, fd_t *fd,
+                              unsigned long version,
+                              br_isignature_out_t **signature,
+                              br_scrub_stats_t *scrub_stat,
+                              gf_boolean_t skip_stat)
+{
+    int32_t ret = 0;
+    size_t signlen = 0;
+    dict_t *xattr = NULL;
+    br_isignature_out_t *signptr = NULL;
+
+    ret = bitd_fetch_signature(this, child, fd, &xattr, &signptr);
+    if (ret < 0) {
+        if (!skip_stat)
+            br_inc_unsigned_file_count(scrub_stat);
+        goto out;
+    }
+
+    /**
+     * Either the object got dirtied during the time the signature was
+     * calculated OR the version we saved during pre-compute check does
+     * not match now, implying that the object got dirtied and signed in
+     * between scrubs pre & post compute checks (checksum window).
+     *
+     * The log entry looks pretty ugly, but helps in debugging..
+     */
+    if (signptr->stale || (signptr->version != version)) {
+        if (!skip_stat)
+            br_inc_unsigned_file_count(scrub_stat);
+        gf_msg_debug(this->name, 0,
+                     "<STAGE: POST> Object [GFID: %s] "
+                     "either has a stale signature OR underwent "
+                     "signing during checksumming {Stale: %d | "
+                     "Version: %lu,%lu}",
+                     uuid_utoa(fd->inode->gfid), (signptr->stale) ? 1 : 0,
+                     version, signptr->version);
+        ret = -1;
+        goto unref_dict;
+    }
+
+    signlen = signptr->signaturelen;
+    *signature = GF_MALLOC(sizeof(br_isignature_out_t) + signlen,
+                           gf_common_mt_char);
+
+    (void)memcpy(*signature, signptr, sizeof(br_isignature_out_t) + signlen);
+
+    (*signature)->signaturelen = signlen;
+
+unref_dict:
+    dict_unref(xattr);
+out:
+    return ret;
+}
+
+static int32_t
+bitd_signature_staleness(xlator_t *this, br_child_t *child, fd_t *fd,
+                         int *stale, unsigned long *version,
+                         br_scrub_stats_t *scrub_stat, gf_boolean_t skip_stat)
+{
+    int32_t ret = -1;
+    dict_t *xattr = NULL;
+    br_isignature_out_t *signptr = NULL;
+
+    ret = bitd_fetch_signature(this, child, fd, &xattr, &signptr);
+    if (ret < 0) {
+        if (!skip_stat)
+            br_inc_unsigned_file_count(scrub_stat);
+        goto out;
+    }
+
+    /**
+     * save version for validation in post compute stage
+     * c.f. bitd_scrub_post_compute_check()
+     */
+    *stale = signptr->stale ? 1 : 0;
+    *version = signptr->version;
+
+    dict_unref(xattr);
+
+out:
+    return ret;
+}
+
+/**
+ * PRE COMPUTE CHECK
+ *
+ * Checks to be performed before initiating object signature calculation.
+ * An object is skipped if:
+ *  - it's already marked corrupted
+ *  - has stale signature
+ */
+int32_t
+bitd_scrub_pre_compute_check(xlator_t *this, br_child_t *child, fd_t *fd,
+                             unsigned long *version,
+                             br_scrub_stats_t *scrub_stat,
+                             gf_boolean_t skip_stat)
+{
+    int stale = 0;
+    int32_t ret = -1;
+
+    if (bitd_is_bad_file(this, child, NULL, fd)) {
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT,
+               "Object [GFID: %s] is marked corrupted, skipping..",
+               uuid_utoa(fd->inode->gfid));
+        goto out;
+    }
+
+    ret = bitd_signature_staleness(this, child, fd, &stale, version, scrub_stat,
+                                   skip_stat);
+    if (!ret && stale) {
+        if (!skip_stat)
+            br_inc_unsigned_file_count(scrub_stat);
+        gf_msg_debug(this->name, 0,
+                     "<STAGE: PRE> Object [GFID: %s] "
+                     "has stale signature",
+                     uuid_utoa(fd->inode->gfid));
+        ret = -1;
+    }
+
+out:
+    return ret;
+}
+
+/* static int */
+int
+bitd_compare_ckum(xlator_t *this, br_isignature_out_t *sign, unsigned char *md,
+                  inode_t *linked_inode, gf_dirent_t *entry, fd_t *fd,
+                  br_child_t *child, loc_t *loc)
+{
+    int ret = -1;
+    dict_t *xattr = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, sign, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+    GF_VALIDATE_OR_GOTO(this->name, linked_inode, out);
+    GF_VALIDATE_OR_GOTO(this->name, md, out);
+    GF_VALIDATE_OR_GOTO(this->name, entry, out);
+
+    if (strncmp(sign->signature, (char *)md, sign->signaturelen) == 0) {
+        gf_msg_debug(this->name, 0,
+                     "%s [GFID: %s | Brick: %s] "
+                     "matches calculated checksum",
+                     loc->path, uuid_utoa(linked_inode->gfid),
+                     child->brick_path);
+        return 0;
+    }
+
+    gf_msg(this->name, GF_LOG_DEBUG, 0, BRB_MSG_CHECKSUM_MISMATCH,
+           "Object checksum mismatch: %s [GFID: %s | Brick: %s]", loc->path,
+           uuid_utoa(linked_inode->gfid), child->brick_path);
+    gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_CHECKSUM_MISMATCH,
+           "CORRUPTION DETECTED: Object %s {Brick: %s | GFID: %s}", loc->path,
+           child->brick_path, uuid_utoa(linked_inode->gfid));
+
+    /* Perform bad-file marking */
+    xattr = dict_new();
+    if (!xattr) {
+        ret = -1;
+        goto out;
+    }
+
+    ret = dict_set_int32(xattr, BITROT_OBJECT_BAD_KEY, _gf_true);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE,
+               "Error setting bad-file marker for %s [GFID: %s | "
+               "Brick: %s]",
+               loc->path, uuid_utoa(linked_inode->gfid), child->brick_path);
+        goto dictfree;
+    }
+
+    gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_MARK_CORRUPTED,
+           "Marking"
+           " %s [GFID: %s | Brick: %s] as corrupted..",
+           loc->path, uuid_utoa(linked_inode->gfid), child->brick_path);
+    gf_event(EVENT_BITROT_BAD_FILE, "gfid=%s;path=%s;brick=%s",
+             uuid_utoa(linked_inode->gfid), loc->path, child->brick_path);
+    ret = syncop_fsetxattr(child->xl, fd, xattr, 0, NULL, NULL);
+    if (ret)
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE,
+               "Error marking object %s [GFID: %s] as corrupted", loc->path,
+               uuid_utoa(linked_inode->gfid));
+
+dictfree:
+    dict_unref(xattr);
+out:
+    return ret;
+}
+
+/**
+ * "The Scrubber"
+ *
+ * Perform signature validation for a given object with the assumption
+ * that the signature is SHA256 (because signer as of now _always_
+ * signs with SHA256).
+ */
+int
+br_scrubber_scrub_begin(xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+    int32_t ret = -1;
+    fd_t *fd = NULL;
+    loc_t loc = {
+        0,
+    };
+    struct iatt iatt = {
+        0,
+    };
+    struct iatt parent_buf = {
+        0,
+    };
+    pid_t pid = 0;
+    br_child_t *child = NULL;
+    unsigned char *md = NULL;
+    inode_t *linked_inode = NULL;
+    br_isignature_out_t *sign = NULL;
+    unsigned long signedversion = 0;
+    gf_dirent_t *entry = NULL;
+    br_private_t *priv = NULL;
+    loc_t *parent = NULL;
+    gf_boolean_t skip_stat = _gf_false;
+    uuid_t shard_root_gfid = {
+        0,
+    };
+
+    GF_VALIDATE_OR_GOTO("bit-rot", fsentry, out);
+
+    entry = fsentry->entry;
+    parent = &fsentry->parent;
+    child = fsentry->data;
+
+    priv = this->private;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", entry, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", parent, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", child, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", priv, out);
+
+    pid = GF_CLIENT_PID_SCRUB;
+
+    ret = br_prepare_loc(this, child, parent, entry, &loc);
+    if (!ret)
+        goto out;
+
+    syncopctx_setfspid(&pid);
+
+    ret = syncop_lookup(child->xl, &loc, &iatt, &parent_buf, NULL, NULL);
+    if (ret) {
+        br_log_object_path(this, "lookup", loc.path, -ret);
+        goto out;
+    }
+
+    linked_inode = inode_link(loc.inode, parent->inode, loc.name, &iatt);
+    if (linked_inode)
+        inode_lookup(linked_inode);
+
+    gf_msg_debug(this->name, 0, "Scrubbing object %s [GFID: %s]", entry->d_name,
+                 uuid_utoa(linked_inode->gfid));
+
+    if (iatt.ia_type != IA_IFREG) {
+        gf_msg_debug(this->name, 0, "%s is not a regular file", entry->d_name);
+        ret = 0;
+        goto unref_inode;
+    }
+
+    if (IS_DHT_LINKFILE_MODE((&iatt))) {
+        gf_msg_debug(this->name, 0, "%s is a dht sticky bit file",
+                     entry->d_name);
+        ret = 0;
+        goto unref_inode;
+    }
+
+    /* skip updating scrub statistics for shard entries */
+    gf_uuid_parse(SHARD_ROOT_GFID, shard_root_gfid);
+    if (gf_uuid_compare(loc.pargfid, shard_root_gfid) == 0)
+        skip_stat = _gf_true;
+
+    /**
+     * open() an fd for subsequent operations
+     */
+    fd = fd_create(linked_inode, 0);
+    if (!fd) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+               "failed to create fd for inode %s",
+               uuid_utoa(linked_inode->gfid));
+        goto unref_inode;
+    }
+
+    ret = syncop_open(child->xl, &loc, O_RDWR, fd, NULL, NULL);
+    if (ret) {
+        br_log_object(this, "open", linked_inode->gfid, -ret);
+        ret = -1;
+        goto unrefd;
+    }
+
+    fd_bind(fd);
+
+    /**
+     * perform pre compute checks before initiating checksum
+     * computation
+     *  - presence of bad object
+     *  - signature staleness
+     */
+    ret = bitd_scrub_pre_compute_check(this, child, fd, &signedversion,
+                                       &priv->scrub_stat, skip_stat);
+    if (ret)
+        goto unrefd; /* skip this object */
+
+    /* if all's good, proceed to calculate the hash */
+    md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char);
+    if (!md)
+        goto unrefd;
+
+    ret = br_calculate_obj_checksum(md, child, fd, &iatt);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_ERROR,
+               "error calculating hash for object [GFID: %s]",
+               uuid_utoa(fd->inode->gfid));
+        ret = -1;
+        goto free_md;
+    }
+
+    /**
+     * perform post compute checks as an object's signature may have
+     * become stale while scrubber calculated checksum.
+     */
+    ret = bitd_scrub_post_compute_check(this, child, fd, signedversion, &sign,
+                                        &priv->scrub_stat, skip_stat);
+    if (ret)
+        goto free_md;
+
+    ret = bitd_compare_ckum(this, sign, md, linked_inode, entry, fd, child,
+                            &loc);
+
+    if (!skip_stat)
+        br_inc_scrubbed_file(&priv->scrub_stat);
+
+    GF_FREE(sign); /* allocated on post-compute */
+
+    /** fd_unref() takes care of closing fd.. like syncop_close() */
+
+free_md:
+    GF_FREE(md);
+unrefd:
+    fd_unref(fd);
+unref_inode:
+    inode_unref(linked_inode);
+out:
+    loc_wipe(&loc);
+    return ret;
+}
+
+static void
+_br_lock_cleaner(void *arg)
+{
+    pthread_mutex_t *mutex = arg;
+
+    pthread_mutex_unlock(mutex);
+}
+
+static void
+wait_for_scrubbing(xlator_t *this, struct br_scanfs *fsscan)
+{
+    br_private_t *priv = NULL;
+    struct br_scrubber *fsscrub = NULL;
+
+    priv = this->private;
+    fsscrub = &priv->fsscrub;
+
+    pthread_cleanup_push(_br_lock_cleaner, &fsscan->waitlock);
+    pthread_mutex_lock(&fsscan->waitlock);
+    {
+        pthread_cleanup_push(_br_lock_cleaner, &fsscrub->mutex);
+        pthread_mutex_lock(&fsscrub->mutex);
+        {
+            list_replace_init(&fsscan->queued, &fsscan->ready);
+
+            /* wake up scrubbers */
+            pthread_cond_broadcast(&fsscrub->cond);
+        }
+        pthread_mutex_unlock(&fsscrub->mutex);
+        pthread_cleanup_pop(0);
+
+        while (fsscan->entries != 0)
+            pthread_cond_wait(&fsscan->waitcond, &fsscan->waitlock);
+    }
+    pthread_mutex_unlock(&fsscan->waitlock);
+    pthread_cleanup_pop(0);
+}
+
+static void
+_br_fsscan_inc_entry_count(struct br_scanfs *fsscan)
+{
+    fsscan->entries++;
+}
+
+static void
+_br_fsscan_dec_entry_count(struct br_scanfs *fsscan)
+{
+    if (--fsscan->entries == 0) {
+        pthread_mutex_lock(&fsscan->waitlock);
+        {
+            pthread_cond_signal(&fsscan->waitcond);
+        }
+        pthread_mutex_unlock(&fsscan->waitlock);
+    }
+}
+
+static void
+_br_fsscan_collect_entry(struct br_scanfs *fsscan,
+                         struct br_fsscan_entry *fsentry)
+{
+    list_add_tail(&fsentry->list, &fsscan->queued);
+    _br_fsscan_inc_entry_count(fsscan);
+}
+
+#define NR_ENTRIES (1 << 7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+                          void *data)
+{
+    int32_t ret = -1;
+    int scrub = 0;
+    br_child_t *child = NULL;
+    xlator_t *this = NULL;
+    struct br_scanfs *fsscan = NULL;
+    struct br_fsscan_entry *fsentry = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", subvol, error_return);
+    GF_VALIDATE_OR_GOTO("bit-rot", data, error_return);
+
+    child = data;
+    this = child->this;
+    fsscan = &child->fsscan;
+
+    _mask_cancellation();
+
+    fsentry = GF_CALLOC(1, sizeof(*fsentry), gf_br_mt_br_fsscan_entry_t);
+    if (!fsentry)
+        goto error_return;
+
+    {
+        fsentry->data = data;
+        fsentry->fsscan = &child->fsscan;
+
+        /* copy parent loc */
+        ret = loc_copy(&fsentry->parent, parent);
+        if (ret)
+            goto dealloc;
+
+        /* copy child entry */
+        fsentry->entry = entry_copy(entry);
+        if (!fsentry->entry)
+            goto locwipe;
+
+        INIT_LIST_HEAD(&fsentry->list);
+    }
+
+    LOCK(&fsscan->entrylock);
+    {
+        _br_fsscan_collect_entry(fsscan, fsentry);
+
+        /**
+         * need not be a equality check as entries may be pushed
+         * back onto the scanned queue when thread(s) are cleaned.
+         */
+        if (fsscan->entries >= NR_ENTRIES)
+            scrub = 1;
+    }
+    UNLOCK(&fsscan->entrylock);
+
+    _unmask_cancellation();
+
+    if (scrub)
+        wait_for_scrubbing(this, fsscan);
+
+    return 0;
+
+locwipe:
+    loc_wipe(&fsentry->parent);
+dealloc:
+    GF_FREE(fsentry);
+error_return:
+    return -1;
+}
+
+int32_t
+br_fsscan_deactivate(xlator_t *this)
+{
+    int ret = 0;
+    br_private_t *priv = NULL;
+    br_scrub_state_t nstate = 0;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    ret = gf_tw_del_timer(priv->timer_wheel, scrub_monitor->timer);
+    if (ret == 0) {
+        nstate = BR_SCRUB_STATE_STALLED;
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+               "Volume is under active scrubbing. Pausing scrub..");
+    } else {
+        nstate = BR_SCRUB_STATE_PAUSED;
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+               "Scrubber paused");
+    }
+
+    _br_monitor_set_scrub_state(scrub_monitor, nstate);
+
+    return 0;
+}
+
+static void
+br_scrubber_log_time(xlator_t *this, const char *sfx)
+{
+    char timestr[GF_TIMESTR_SIZE] = {
+        0,
+    };
+    br_private_t *priv = NULL;
+    time_t now = 0;
+
+    now = gf_time();
+    priv = this->private;
+
+    gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
+
+    if (strcasecmp(sfx, "started") == 0) {
+        br_update_scrub_start_time(&priv->scrub_stat, now);
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
+               "Scrubbing %s at %s", sfx, timestr);
+    } else {
+        br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
+               "Scrubbing %s at %s", sfx, timestr);
+    }
+}
+
+static void
+br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
+{
+    char timestr[GF_TIMESTR_SIZE] = {
+        0,
+    };
+    time_t now = 0;
+
+    now = gf_time();
+    gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
+
+    if (strcasecmp(sfx, "started") == 0) {
+        gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
+                     child->brick_path, sfx, timestr);
+    } else {
+        gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
+                     child->brick_path, sfx, timestr);
+    }
+}
+
+void
+br_child_set_scrub_state(br_child_t *child, gf_boolean_t state)
+{
+    child->active_scrubbing = state;
+}
+
+static void
+br_fsscanner_wait_until_kicked(xlator_t *this, br_child_t *child)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->wakelock);
+    pthread_mutex_lock(&scrub_monitor->wakelock);
+    {
+        while (!scrub_monitor->kick)
+            pthread_cond_wait(&scrub_monitor->wakecond,
+                              &scrub_monitor->wakelock);
+
+        /* Child lock is to synchronize with disconnect events */
+        pthread_cleanup_push(_br_lock_cleaner, &child->lock);
+        pthread_mutex_lock(&child->lock);
+        {
+            scrub_monitor->active_child_count++;
+            br_child_set_scrub_state(child, _gf_true);
+        }
+        pthread_mutex_unlock(&child->lock);
+        pthread_cleanup_pop(0);
+    }
+    pthread_mutex_unlock(&scrub_monitor->wakelock);
+    pthread_cleanup_pop(0);
+}
+
+static void
+br_scrubber_entry_control(xlator_t *this)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    LOCK(&scrub_monitor->lock);
+    {
+        /* Move the state to BR_SCRUB_STATE_ACTIVE */
+        if (scrub_monitor->state == BR_SCRUB_STATE_PENDING)
+            scrub_monitor->state = BR_SCRUB_STATE_ACTIVE;
+        br_scrubber_log_time(this, "started");
+        priv->scrub_stat.scrub_running = 1;
+    }
+    UNLOCK(&scrub_monitor->lock);
+}
+
+static void
+br_scrubber_exit_control(xlator_t *this)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    LOCK(&scrub_monitor->lock);
+    {
+        br_scrubber_log_time(this, "finished");
+        priv->scrub_stat.scrub_running = 0;
+
+        if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) {
+            (void)br_fsscan_activate(this);
+        } else {
+            UNLOCK(&scrub_monitor->lock);
+            gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+                   "Volume waiting to get rescheduled..");
+            return;
+        }
+    }
+    UNLOCK(&scrub_monitor->lock);
+}
+
+static void
+br_fsscanner_entry_control(xlator_t *this, br_child_t *child)
+{
+    br_fsscanner_log_time(this, child, "started");
+}
+
+static void
+br_fsscanner_exit_control(xlator_t *this, br_child_t *child)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    if (!_br_is_child_connected(child)) {
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO,
+               "Brick [%s] disconnected while scrubbing. Scrubbing "
+               "might be incomplete",
+               child->brick_path);
+    }
+
+    br_fsscanner_log_time(this, child, "finished");
+
+    pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->wakelock);
+    pthread_mutex_lock(&scrub_monitor->wakelock);
+    {
+        scrub_monitor->active_child_count--;
+        pthread_cleanup_push(_br_lock_cleaner, &child->lock);
+        pthread_mutex_lock(&child->lock);
+        {
+            br_child_set_scrub_state(child, _gf_false);
+        }
+        pthread_mutex_unlock(&child->lock);
+        pthread_cleanup_pop(0);
+
+        if (scrub_monitor->active_child_count == 0) {
+            /* The last child has finished scrubbing.
+             * Set the kick to false and  wake up other
+             * children who are waiting for the last
+             * child to complete scrubbing.
+             */
+            scrub_monitor->kick = _gf_false;
+            pthread_cond_broadcast(&scrub_monitor->wakecond);
+
+            /* Signal monitor thread waiting for the all
+             * the children to finish scrubbing.
+             */
+            pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->donelock);
+            pthread_mutex_lock(&scrub_monitor->donelock);
+            {
+                scrub_monitor->done = _gf_true;
+                pthread_cond_signal(&scrub_monitor->donecond);
+            }
+            pthread_mutex_unlock(&scrub_monitor->donelock);
+            pthread_cleanup_pop(0);
+        } else {
+            while (scrub_monitor->active_child_count)
+                pthread_cond_wait(&scrub_monitor->wakecond,
+                                  &scrub_monitor->wakelock);
+        }
+    }
+    pthread_mutex_unlock(&scrub_monitor->wakelock);
+    pthread_cleanup_pop(0);
+}
+
+void *
+br_fsscanner(void *arg)
+{
+    loc_t loc = {
+        0,
+    };
+    br_child_t *child = NULL;
+    xlator_t *this = NULL;
+    struct br_scanfs *fsscan = NULL;
+
+    child = arg;
+    this = child->this;
+    fsscan = &child->fsscan;
+
+    THIS = this;
+    loc.inode = child->table->root;
+
+    while (1) {
+        br_fsscanner_wait_until_kicked(this, child);
+        {
+            /* precursor for scrub */
+            br_fsscanner_entry_control(this, child);
+
+            /* scrub */
+            (void)syncop_ftw(child->xl, &loc, GF_CLIENT_PID_SCRUB, child,
+                             br_fsscanner_handle_entry);
+            if (!list_empty(&fsscan->queued))
+                wait_for_scrubbing(this, fsscan);
+
+            /* scrub exit criteria */
+            br_fsscanner_exit_control(this, child);
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * Keep this routine extremely simple and do not ever try to acquire
+ * child->lock here: it may lead to deadlock. Scrubber state is
+ * modified in br_fsscanner(). An intermediate state change to pause
+ * changes the scrub state to the _correct_ state by identifying a
+ * non-pending timer.
+ */
+void
+br_kickstart_scanner(struct gf_tw_timer_list *timer, void *data,
+                     unsigned long calltime)
+{
+    xlator_t *this = NULL;
+    struct br_monitor *scrub_monitor = data;
+    br_private_t *priv = NULL;
+
+    THIS = this = scrub_monitor->this;
+    priv = this->private;
+
+    /* Reset scrub statistics */
+    priv->scrub_stat.scrubbed_files = 0;
+    priv->scrub_stat.unsigned_files = 0;
+
+    /* Moves state from PENDING to ACTIVE */
+    (void)br_scrubber_entry_control(this);
+
+    /* kickstart scanning.. */
+    pthread_mutex_lock(&scrub_monitor->wakelock);
+    {
+        scrub_monitor->kick = _gf_true;
+        GF_ASSERT(scrub_monitor->active_child_count == 0);
+        pthread_cond_broadcast(&scrub_monitor->wakecond);
+    }
+    pthread_mutex_unlock(&scrub_monitor->wakelock);
+
+    return;
+}
+
+static uint32_t
+br_fsscan_calculate_delta(uint32_t times)
+{
+    return times;
+}
+
+#define BR_SCRUB_ONDEMAND (1)
+#define BR_SCRUB_MINUTE (60)
+#define BR_SCRUB_HOURLY (60 * 60)
+#define BR_SCRUB_DAILY (1 * 24 * 60 * 60)
+#define BR_SCRUB_WEEKLY (7 * 24 * 60 * 60)
+#define BR_SCRUB_BIWEEKLY (14 * 24 * 60 * 60)
+#define BR_SCRUB_MONTHLY (30 * 24 * 60 * 60)
+
+static unsigned int
+br_fsscan_calculate_timeout(scrub_freq_t freq)
+{
+    uint32_t timo = 0;
+
+    switch (freq) {
+        case BR_FSSCRUB_FREQ_MINUTE:
+            timo = br_fsscan_calculate_delta(BR_SCRUB_MINUTE);
+            break;
+        case BR_FSSCRUB_FREQ_HOURLY:
+            timo = br_fsscan_calculate_delta(BR_SCRUB_HOURLY);
+            break;
+        case BR_FSSCRUB_FREQ_DAILY:
+            timo = br_fsscan_calculate_delta(BR_SCRUB_DAILY);
+            break;
+        case BR_FSSCRUB_FREQ_WEEKLY:
+            timo = br_fsscan_calculate_delta(BR_SCRUB_WEEKLY);
+            break;
+        case BR_FSSCRUB_FREQ_BIWEEKLY:
+            timo = br_fsscan_calculate_delta(BR_SCRUB_BIWEEKLY);
+            break;
+        case BR_FSSCRUB_FREQ_MONTHLY:
+            timo = br_fsscan_calculate_delta(BR_SCRUB_MONTHLY);
+            break;
+        default:
+            timo = 0;
+    }
+
+    return timo;
+}
+
+int32_t
+br_fsscan_schedule(xlator_t *this)
+{
+    uint32_t timo = 0;
+    br_private_t *priv = NULL;
+    char timestr[GF_TIMESTR_SIZE] = {
+        0,
+    };
+    struct br_scrubber *fsscrub = NULL;
+    struct gf_tw_timer_list *timer = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    fsscrub = &priv->fsscrub;
+    scrub_monitor = &priv->scrub_monitor;
+
+    scrub_monitor->boot = gf_time();
+
+    timo = br_fsscan_calculate_timeout(fsscrub->frequency);
+    if (timo == 0) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+               "BUG: Zero schedule timeout");
+        goto error_return;
+    }
+
+    scrub_monitor->timer = GF_CALLOC(1, sizeof(*scrub_monitor->timer),
+                                     gf_br_stub_mt_br_scanner_freq_t);
+    if (!scrub_monitor->timer)
+        goto error_return;
+
+    timer = scrub_monitor->timer;
+    INIT_LIST_HEAD(&timer->entry);
+
+    timer->data = scrub_monitor;
+    timer->expires = timo;
+    timer->function = br_kickstart_scanner;
+
+    gf_tw_add_timer(priv->timer_wheel, timer);
+    _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+
+    gf_time_fmt(timestr, sizeof(timestr), (scrub_monitor->boot + timo),
+                gf_timefmt_FT);
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+           "Scrubbing is "
+           "scheduled to run at %s",
+           timestr);
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+int32_t
+br_fsscan_activate(xlator_t *this)
+{
+    uint32_t timo = 0;
+    char timestr[GF_TIMESTR_SIZE] = {
+        0,
+    };
+    time_t now = 0;
+    br_private_t *priv = NULL;
+    struct br_scrubber *fsscrub = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    fsscrub = &priv->fsscrub;
+    scrub_monitor = &priv->scrub_monitor;
+
+    now = gf_time();
+    timo = br_fsscan_calculate_timeout(fsscrub->frequency);
+    if (timo == 0) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+               "BUG: Zero schedule timeout");
+        return -1;
+    }
+
+    pthread_mutex_lock(&scrub_monitor->donelock);
+    {
+        scrub_monitor->done = _gf_false;
+    }
+    pthread_mutex_unlock(&scrub_monitor->donelock);
+
+    gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
+    (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
+
+    _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+           "Scrubbing is "
+           "rescheduled to run at %s",
+           timestr);
+
+    return 0;
+}
+
+int32_t
+br_fsscan_reschedule(xlator_t *this)
+{
+    int32_t ret = 0;
+    uint32_t timo = 0;
+    char timestr[GF_TIMESTR_SIZE] = {
+        0,
+    };
+    time_t now = 0;
+    br_private_t *priv = NULL;
+    struct br_scrubber *fsscrub = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    fsscrub = &priv->fsscrub;
+    scrub_monitor = &priv->scrub_monitor;
+
+    if (!fsscrub->frequency_reconf)
+        return 0;
+
+    now = gf_time();
+    timo = br_fsscan_calculate_timeout(fsscrub->frequency);
+    if (timo == 0) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+               "BUG: Zero schedule timeout");
+        return -1;
+    }
+
+    gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
+
+    pthread_mutex_lock(&scrub_monitor->donelock);
+    {
+        scrub_monitor->done = _gf_false;
+    }
+    pthread_mutex_unlock(&scrub_monitor->donelock);
+
+    ret = gf_tw_mod_timer_pending(priv->timer_wheel, scrub_monitor->timer,
+                                  timo);
+    if (ret == 0)
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+               "Scrubber is currently running and would be "
+               "rescheduled after completion");
+    else {
+        _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+               "Scrubbing rescheduled to run at %s", timestr);
+    }
+
+    return 0;
+}
+
+int32_t
+br_fsscan_ondemand(xlator_t *this)
+{
+    int32_t ret = 0;
+    uint32_t timo = 0;
+    char timestr[GF_TIMESTR_SIZE] = {
+        0,
+    };
+    time_t now = 0;
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    now = gf_time();
+    timo = BR_SCRUB_ONDEMAND;
+    gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
+
+    pthread_mutex_lock(&scrub_monitor->donelock);
+    {
+        scrub_monitor->done = _gf_false;
+    }
+    pthread_mutex_unlock(&scrub_monitor->donelock);
+
+    ret = gf_tw_mod_timer_pending(priv->timer_wheel, scrub_monitor->timer,
+                                  timo);
+    if (ret == 0)
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+               "Scrubber is currently running and would be "
+               "rescheduled after completion");
+    else {
+        _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+               "Ondemand Scrubbing scheduled to run at %s", timestr);
+    }
+
+    return 0;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale(xlator_t *this, br_private_t *priv,
+                       scrub_throttle_t throttle)
+{
+    unsigned int scale = 0;
+
+    switch (throttle) {
+        case BR_SCRUB_THROTTLE_VOID:
+        case BR_SCRUB_THROTTLE_STALLED:
+            scale = 0;
+            break;
+        case BR_SCRUB_THROTTLE_LAZY:
+            scale = priv->child_count * pow(M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+            break;
+        case BR_SCRUB_THROTTLE_NORMAL:
+            scale = priv->child_count * pow(M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+            break;
+        case BR_SCRUB_THROTTLE_AGGRESSIVE:
+            scale = priv->child_count *
+                    pow(M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+            break;
+        default:
+            gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_UNKNOWN_THROTTLE,
+                   "Unknown throttle %d", throttle);
+    }
+
+    return scale;
+}
+
+static br_child_t *
+_br_scrubber_get_next_child(struct br_scrubber *fsscrub)
+{
+    br_child_t *child = NULL;
+
+    child = list_first_entry(&fsscrub->scrublist, br_child_t, list);
+    list_rotate_left(&fsscrub->scrublist);
+
+    return child;
+}
+
+static void
+_br_scrubber_get_entry(br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+    struct br_scanfs *fsscan = &child->fsscan;
+
+    if (list_empty(&fsscan->ready))
+        return;
+    *fsentry = list_first_entry(&fsscan->ready, struct br_fsscan_entry, list);
+    list_del_init(&(*fsentry)->list);
+}
+
+static void
+_br_scrubber_find_scrubbable_entry(struct br_scrubber *fsscrub,
+                                   struct br_fsscan_entry **fsentry)
+{
+    br_child_t *child = NULL;
+    br_child_t *firstchild = NULL;
+
+    while (1) {
+        while (list_empty(&fsscrub->scrublist))
+            pthread_cond_wait(&fsscrub->cond, &fsscrub->mutex);
+
+        firstchild = NULL;
+        for (child = _br_scrubber_get_next_child(fsscrub); child != firstchild;
+             child = _br_scrubber_get_next_child(fsscrub)) {
+            if (!firstchild)
+                firstchild = child;
+
+            _br_scrubber_get_entry(child, fsentry);
+            if (*fsentry)
+                break;
+        }
+
+        if (*fsentry)
+            break;
+
+        /* nothing to work on.. wait till available */
+        pthread_cond_wait(&fsscrub->cond, &fsscrub->mutex);
+    }
+}
+
+static void
+br_scrubber_pick_entry(struct br_scrubber *fsscrub,
+                       struct br_fsscan_entry **fsentry)
+{
+    pthread_cleanup_push(_br_lock_cleaner, &fsscrub->mutex);
+
+    pthread_mutex_lock(&fsscrub->mutex);
+    {
+        *fsentry = NULL;
+        _br_scrubber_find_scrubbable_entry(fsscrub, fsentry);
+    }
+    pthread_mutex_unlock(&fsscrub->mutex);
+
+    pthread_cleanup_pop(0);
+}
+
+struct br_scrub_entry {
+    gf_boolean_t scrubbed;
+    struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle(void *arg)
+{
+    struct br_scanfs *fsscan = NULL;
+    struct br_scrub_entry *sentry = NULL;
+    struct br_fsscan_entry *fsentry = NULL;
+
+    sentry = arg;
+
+    fsentry = sentry->fsentry;
+    fsscan = fsentry->fsscan;
+
+    LOCK(&fsscan->entrylock);
+    {
+        if (sentry->scrubbed) {
+            _br_fsscan_dec_entry_count(fsscan);
+
+            /* cleanup ->entry */
+            fsentry->data = NULL;
+            fsentry->fsscan = NULL;
+            loc_wipe(&fsentry->parent);
+            gf_dirent_entry_free(fsentry->entry);
+
+            GF_FREE(sentry->fsentry);
+        } else {
+            /* (re)queue the entry again for scrub */
+            _br_fsscan_collect_entry(fsscan, sentry->fsentry);
+        }
+    }
+    UNLOCK(&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry(xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+    struct br_scrub_entry sentry = {
+        0,
+    };
+
+    sentry.scrubbed = 0;
+    sentry.fsentry = fsentry;
+
+    pthread_cleanup_push(br_scrubber_entry_handle, &sentry);
+    {
+        (void)br_scrubber_scrub_begin(this, fsentry);
+        sentry.scrubbed = 1;
+    }
+    pthread_cleanup_pop(1);
+}
+
+void *
+br_scrubber_proc(void *arg)
+{
+    xlator_t *this = NULL;
+    struct br_scrubber *fsscrub = NULL;
+    struct br_fsscan_entry *fsentry = NULL;
+
+    fsscrub = arg;
+    THIS = this = fsscrub->this;
+
+    while (1) {
+        br_scrubber_pick_entry(fsscrub, &fsentry);
+        br_scrubber_scrub_entry(this, fsentry);
+        sleep(1);
+    }
+
+    return NULL;
+}
+
+static int32_t
+br_scrubber_scale_up(xlator_t *this, struct br_scrubber *fsscrub,
+                     unsigned int v1, unsigned int v2)
+{
+    int i = 0;
+    int32_t ret = -1;
+    int diff = 0;
+    struct br_scrubbers *scrub = NULL;
+
+    diff = (int)(v2 - v1);
+
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCALING_UP_SCRUBBER,
+           "Scaling up scrubbers [%d => %d]", v1, v2);
+
+    for (i = 0; i < diff; i++) {
+        scrub = GF_CALLOC(diff, sizeof(*scrub), gf_br_mt_br_scrubber_t);
+        if (!scrub)
+            break;
+
+        INIT_LIST_HEAD(&scrub->list);
+        ret = gf_thread_create(&scrub->scrubthread, NULL, br_scrubber_proc,
+                               fsscrub, "brsproc");
+        if (ret)
+            break;
+
+        fsscrub->nr_scrubbers++;
+        list_add_tail(&scrub->list, &fsscrub->scrubbers);
+    }
+
+    if ((i != diff) && !scrub)
+        goto error_return;
+
+    if (i != diff) /* degraded scaling.. */
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_UP_FAILED,
+               "Could not fully scale up to %d scrubber(s). Spawned "
+               "%d/%d [total scrubber(s): %d]",
+               v2, i, diff, (v1 + i));
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static int32_t
+br_scrubber_scale_down(xlator_t *this, struct br_scrubber *fsscrub,
+                       unsigned int v1, unsigned int v2)
+{
+    int i = 0;
+    int diff = 0;
+    int32_t ret = -1;
+    struct br_scrubbers *scrub = NULL;
+
+    diff = (int)(v1 - v2);
+
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCALE_DOWN_SCRUBBER,
+           "Scaling down scrubbers [%d => %d]", v1, v2);
+
+    for (i = 0; i < diff; i++) {
+        scrub = list_first_entry(&fsscrub->scrubbers, struct br_scrubbers,
+                                 list);
+
+        list_del_init(&scrub->list);
+        ret = gf_thread_cleanup_xint(scrub->scrubthread);
+        if (ret)
+            break;
+        GF_FREE(scrub);
+
+        fsscrub->nr_scrubbers--;
+    }
+
+    if (ret) {
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_DOWN_FAILED,
+               "Could not fully scale down "
+               "to %d scrubber(s). Terminated %d/%d [total "
+               "scrubber(s): %d]",
+               v1, i, diff, (v2 - i));
+        ret = 0;
+    }
+
+    return ret;
+}
+
+static int32_t
+br_scrubber_configure(xlator_t *this, br_private_t *priv,
+                      struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+    int32_t ret = 0;
+    unsigned int v1 = 0;
+    unsigned int v2 = 0;
+
+    v1 = fsscrub->nr_scrubbers;
+    v2 = br_scrubber_calc_scale(this, priv, nthrottle);
+
+    if (v1 == v2)
+        return 0;
+
+    if (v1 > v2)
+        ret = br_scrubber_scale_down(this, fsscrub, v1, v2);
+    else
+        ret = br_scrubber_scale_up(this, fsscrub, v1, v2);
+
+    return ret;
+}
+
+static int32_t
+br_scrubber_fetch_option(xlator_t *this, char *opt, dict_t *options,
+                         char **value)
+{
+    if (options)
+        GF_OPTION_RECONF(opt, *value, options, str, error_return);
+    else
+        GF_OPTION_INIT(opt, *value, str, error_return);
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+/* internal "throttle" override */
+#define BR_SCRUB_STALLED "STALLED"
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle(xlator_t *this, br_private_t *priv, dict_t *options,
+                            gf_boolean_t scrubstall)
+{
+    int32_t ret = 0;
+    char *tmp = NULL;
+    struct br_scrubber *fsscrub = NULL;
+    scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+    fsscrub = &priv->fsscrub;
+    fsscrub->throttle_reconf = _gf_false;
+
+    ret = br_scrubber_fetch_option(this, "scrub-throttle", options, &tmp);
+    if (ret)
+        goto error_return;
+
+    if (scrubstall)
+        tmp = BR_SCRUB_STALLED;
+
+    if (strcasecmp(tmp, "lazy") == 0)
+        nthrottle = BR_SCRUB_THROTTLE_LAZY;
+    else if (strcasecmp(tmp, "normal") == 0)
+        nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+    else if (strcasecmp(tmp, "aggressive") == 0)
+        nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+    else if (strcasecmp(tmp, BR_SCRUB_STALLED) == 0)
+        nthrottle = BR_SCRUB_THROTTLE_STALLED;
+    else
+        goto error_return;
+
+    /* on failure old throttling value is preserved */
+    ret = br_scrubber_configure(this, priv, fsscrub, nthrottle);
+    if (ret)
+        goto error_return;
+
+    if (fsscrub->throttle != nthrottle)
+        fsscrub->throttle_reconf = _gf_true;
+
+    fsscrub->throttle = nthrottle;
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static int32_t
+br_scrubber_handle_stall(xlator_t *this, br_private_t *priv, dict_t *options,
+                         gf_boolean_t *scrubstall)
+{
+    int32_t ret = 0;
+    char *tmp = NULL;
+
+    ret = br_scrubber_fetch_option(this, "scrub-state", options, &tmp);
+    if (ret)
+        goto error_return;
+
+    if (strcasecmp(tmp, "pause") == 0) /* anything else is active */
+        *scrubstall = _gf_true;
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static int32_t
+br_scrubber_handle_freq(xlator_t *this, br_private_t *priv, dict_t *options,
+                        gf_boolean_t scrubstall)
+{
+    int32_t ret = -1;
+    char *tmp = NULL;
+    scrub_freq_t frequency = BR_FSSCRUB_FREQ_HOURLY;
+    struct br_scrubber *fsscrub = NULL;
+
+    fsscrub = &priv->fsscrub;
+    fsscrub->frequency_reconf = _gf_true;
+
+    ret = br_scrubber_fetch_option(this, "scrub-freq", options, &tmp);
+    if (ret)
+        goto error_return;
+
+    if (scrubstall)
+        tmp = BR_SCRUB_STALLED;
+
+    if (strcasecmp(tmp, "hourly") == 0) {
+        frequency = BR_FSSCRUB_FREQ_HOURLY;
+    } else if (strcasecmp(tmp, "daily") == 0) {
+        frequency = BR_FSSCRUB_FREQ_DAILY;
+    } else if (strcasecmp(tmp, "weekly") == 0) {
+        frequency = BR_FSSCRUB_FREQ_WEEKLY;
+    } else if (strcasecmp(tmp, "biweekly") == 0) {
+        frequency = BR_FSSCRUB_FREQ_BIWEEKLY;
+    } else if (strcasecmp(tmp, "monthly") == 0) {
+        frequency = BR_FSSCRUB_FREQ_MONTHLY;
+    } else if (strcasecmp(tmp, "minute") == 0) {
+        frequency = BR_FSSCRUB_FREQ_MINUTE;
+    } else if (strcasecmp(tmp, BR_SCRUB_STALLED) == 0) {
+        frequency = BR_FSSCRUB_FREQ_STALLED;
+    } else
+        goto error_return;
+
+    if (fsscrub->frequency == frequency)
+        fsscrub->frequency_reconf = _gf_false;
+    else
+        fsscrub->frequency = frequency;
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static void
+br_scrubber_log_option(xlator_t *this, br_private_t *priv,
+                       gf_boolean_t scrubstall)
+{
+    struct br_scrubber *fsscrub = &priv->fsscrub;
+    char *scrub_throttle_str[] = {
+        [BR_SCRUB_THROTTLE_LAZY] = "lazy",
+        [BR_SCRUB_THROTTLE_NORMAL] = "normal",
+        [BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive",
+        [BR_SCRUB_THROTTLE_STALLED] = "stalled",
+    };
+
+    char *scrub_freq_str[] = {
+        [0] = "",
+        [BR_FSSCRUB_FREQ_HOURLY] = "hourly",
+        [BR_FSSCRUB_FREQ_DAILY] = "daily",
+        [BR_FSSCRUB_FREQ_WEEKLY] = "weekly",
+        [BR_FSSCRUB_FREQ_BIWEEKLY] = "biweekly",
+        [BR_FSSCRUB_FREQ_MONTHLY] = "monthly (30 days)",
+        [BR_FSSCRUB_FREQ_MINUTE] = "every minute",
+    };
+
+    if (scrubstall)
+        return; /* logged as pause */
+
+    if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) {
+        if (fsscrub->throttle == BR_SCRUB_THROTTLE_VOID)
+            return;
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE,
+               "SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]",
+               scrub_freq_str[fsscrub->frequency],
+               scrub_throttle_str[fsscrub->throttle]);
+    }
+}
+
+int32_t
+br_scrubber_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
+{
+    int32_t ret = 0;
+    gf_boolean_t scrubstall = _gf_false; /* not as dangerous as it sounds */
+
+    ret = br_scrubber_handle_stall(this, priv, options, &scrubstall);
+    if (ret)
+        goto error_return;
+
+    ret = br_scrubber_handle_throttle(this, priv, options, scrubstall);
+    if (ret)
+        goto error_return;
+
+    ret = br_scrubber_handle_freq(this, priv, options, scrubstall);
+    if (ret)
+        goto error_return;
+
+    br_scrubber_log_option(this, priv, scrubstall);
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+inode_t *
+br_lookup_bad_obj_dir(xlator_t *this, br_child_t *child, uuid_t gfid)
+{
+    struct iatt statbuf = {
+        0,
+    };
+    inode_table_t *table = NULL;
+    int32_t ret = -1;
+    loc_t loc = {
+        0,
+    };
+    inode_t *linked_inode = NULL;
+    int32_t op_errno = 0;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-scrubber", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+
+    table = child->table;
+
+    loc.inode = inode_new(table);
+    if (!loc.inode) {
+        gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+               "failed to allocate a new inode for"
+               "bad object directory");
+        goto out;
+    }
+
+    gf_uuid_copy(loc.gfid, gfid);
+
+    ret = syncop_lookup(child->xl, &loc, &statbuf, NULL, NULL, NULL);
+    if (ret < 0) {
+        op_errno = -ret;
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_LOOKUP_FAILED,
+               "failed to lookup the bad "
+               "objects directory (gfid: %s (%s))",
+               uuid_utoa(gfid), strerror(op_errno));
+        goto out;
+    }
+
+    linked_inode = inode_link(loc.inode, NULL, NULL, &statbuf);
+    if (linked_inode)
+        inode_lookup(linked_inode);
+
+out:
+    loc_wipe(&loc);
+    return linked_inode;
+}
+
+int32_t
+br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd,
+                       dict_t *dict)
+{
+    gf_dirent_t entries;
+    gf_dirent_t *entry = NULL;
+    int32_t ret = -1;
+    off_t offset = 0;
+    int32_t count = 0;
+    char key[32] = {
+        0,
+    };
+    dict_t *out_dict = NULL;
+
+    INIT_LIST_HEAD(&entries.list);
+
+    while ((ret = syncop_readdir(child->xl, fd, 131072, offset, &entries, NULL,
+                                 &out_dict))) {
+        if (ret < 0)
+            goto out;
+
+        list_for_each_entry(entry, &entries.list, list)
+        {
+            offset = entry->d_off;
+
+            snprintf(key, sizeof(key), "quarantine-%d", count);
+
+            /*
+             * ignore the dict_set errors for now. The intention is
+             * to get as many bad objects as possible instead of
+             * erroring out at the first failure.
+             */
+            ret = dict_set_dynstr_with_alloc(dict, key, entry->d_name);
+            if (!ret)
+                count++;
+
+            if (out_dict) {
+                dict_copy(out_dict, dict);
+                dict_unref(out_dict);
+                out_dict = NULL;
+            }
+        }
+
+        gf_dirent_free(&entries);
+    }
+
+    ret = count;
+    ret = dict_set_int32_sizen(dict, "count", count);
+
+out:
+    return ret;
+}
+
+int32_t
+br_get_bad_objects_from_child(xlator_t *this, dict_t *dict, br_child_t *child)
+{
+    inode_t *inode = NULL;
+    inode_table_t *table = NULL;
+    fd_t *fd = NULL;
+    int32_t ret = -1;
+    loc_t loc = {
+        0,
+    };
+    int32_t op_errno = 0;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-scrubber", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+    GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+    table = child->table;
+
+    inode = inode_find(table, BR_BAD_OBJ_CONTAINER);
+    if (!inode) {
+        inode = br_lookup_bad_obj_dir(this, child, BR_BAD_OBJ_CONTAINER);
+        if (!inode)
+            goto out;
+    }
+
+    fd = fd_create(inode, 0);
+    if (!fd) {
+        gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_FD_CREATE_FAILED,
+               "fd creation for the bad "
+               "objects directory failed (gfid: %s)",
+               uuid_utoa(BR_BAD_OBJ_CONTAINER));
+        goto out;
+    }
+
+    loc.inode = inode;
+    gf_uuid_copy(loc.gfid, inode->gfid);
+
+    ret = syncop_opendir(child->xl, &loc, fd, NULL, NULL);
+    if (ret < 0) {
+        op_errno = -ret;
+        fd_unref(fd);
+        fd = NULL;
+        gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_FD_CREATE_FAILED,
+               "failed to open the bad "
+               "objects directory %s",
+               uuid_utoa(BR_BAD_OBJ_CONTAINER));
+        goto out;
+    }
+
+    fd_bind(fd);
+
+    ret = br_read_bad_object_dir(this, child, fd, dict);
+    if (ret < 0) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BAD_OBJ_READDIR_FAIL,
+               "readdir of the bad "
+               "objects directory (%s) failed ",
+               uuid_utoa(BR_BAD_OBJ_CONTAINER));
+        goto out;
+    }
+
+    ret = 0;
+
+out:
+    loc_wipe(&loc);
+    if (fd)
+        fd_unref(fd);
+    return ret;
+}
+
+int32_t
+br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
+                                dict_t *child_dict, int32_t total_count)
+{
+    int32_t ret = -1;
+    int32_t count = 0;
+    char key[32] = {
+        0,
+    };
+    char main_key[32] = {
+        0,
+    };
+    int32_t j = 0;
+    int32_t tmp_count = 0;
+    char *entry = NULL;
+    char tmp[PATH_MAX] = {
+        0,
+    };
+    char *path = NULL;
+    int32_t len = 0;
+
+    ret = dict_get_int32_sizen(child_dict, "count", &count);
+    if (ret)
+        goto out;
+
+    tmp_count = total_count;
+
+    for (j = 0; j < count; j++) {
+        len = snprintf(key, sizeof(key), "quarantine-%d", j);
+        ret = dict_get_strn(child_dict, key, len, &entry);
+        if (ret)
+            continue;
+
+        ret = dict_get_str(child_dict, entry, &path);
+        len = snprintf(tmp, PATH_MAX, "%s ==> BRICK: %s\n path: %s", entry,
+                       child->brick_path, path);
+        if ((len < 0) || (len >= PATH_MAX)) {
+            continue;
+        }
+        snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count);
+
+        ret = dict_set_dynstr_with_alloc(dict, main_key, tmp);
+        if (!ret)
+            tmp_count++;
+        path = NULL;
+    }
+
+    ret = tmp_count;
+
+out:
+    return ret;
+}
+
+int32_t
+br_collect_bad_objects_from_children(xlator_t *this, dict_t *dict)
+{
+    int32_t ret = -1;
+    dict_t *child_dict = NULL;
+    int32_t i = 0;
+    int32_t total_count = 0;
+    br_child_t *child = NULL;
+    br_private_t *priv = NULL;
+    dict_t *tmp_dict = NULL;
+
+    priv = this->private;
+    tmp_dict = dict;
+
+    for (i = 0; i < priv->child_count; i++) {
+        child = &priv->children[i];
+        GF_ASSERT(child);
+        if (!_br_is_child_connected(child))
+            continue;
+
+        child_dict = dict_new();
+        if (!child_dict) {
+            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+                   "failed to allocate dict");
+            continue;
+        }
+        ret = br_get_bad_objects_from_child(this, child_dict, child);
+        /*
+         * Continue asking the remaining children for the list of
+         * bad objects even though getting the list from one of them
+         * fails.
+         */
+        if (ret) {
+            dict_unref(child_dict);
+            continue;
+        }
+
+        ret = br_collect_bad_objects_of_child(this, child, tmp_dict, child_dict,
+                                              total_count);
+        if (ret < 0) {
+            dict_unref(child_dict);
+            continue;
+        }
+
+        total_count = ret;
+        dict_unref(child_dict);
+        child_dict = NULL;
+    }
+
+    ret = dict_set_int32(tmp_dict, "total-count", total_count);
+
+    return ret;
+}
+
+int32_t
+br_get_bad_objects_list(xlator_t *this, dict_t **dict)
+{
+    int32_t ret = -1;
+    dict_t *tmp_dict = NULL;
+
+    GF_VALIDATE_OR_GOTO("bir-rot-scrubber", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+    tmp_dict = *dict;
+    if (!tmp_dict) {
+        tmp_dict = dict_new();
+        if (!tmp_dict) {
+            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+                   "failed to allocate dict");
+            goto out;
+        }
+        *dict = tmp_dict;
+    }
+
+    ret = br_collect_bad_objects_from_children(this, tmp_dict);
+
+out:
+    return ret;
+}
+
+static int
+wait_for_scrub_to_finish(xlator_t *this)
+{
+    int ret = -1;
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", scrub_monitor, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+           "Waiting for all children to start and finish scrub");
+
+    pthread_mutex_lock(&scrub_monitor->donelock);
+    {
+        while (!scrub_monitor->done)
+            pthread_cond_wait(&scrub_monitor->donecond,
+                              &scrub_monitor->donelock);
+    }
+    pthread_mutex_unlock(&scrub_monitor->donelock);
+    ret = 0;
+out:
+    return ret;
+}
+
+/**
+ * This function is executed in a separate thread. This is scrubber monitor
+ * thread that takes care of state machine.
+ */
+void *
+br_monitor_thread(void *arg)
+{
+    int32_t ret = 0;
+    xlator_t *this = NULL;
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    this = arg;
+    priv = this->private;
+
+    /*
+     * Since, this is the topmost xlator, THIS has to be set by bit-rot
+     * xlator itself (STACK_WIND won't help in this case). Also it has
+     * to be done for each thread that gets spawned. Otherwise, a new
+     * thread will get global_xlator's pointer when it does "THIS".
+     */
+    THIS = this;
+
+    scrub_monitor = &priv->scrub_monitor;
+
+    pthread_mutex_lock(&scrub_monitor->mutex);
+    {
+        while (!scrub_monitor->inited)
+            pthread_cond_wait(&scrub_monitor->cond, &scrub_monitor->mutex);
+    }
+    pthread_mutex_unlock(&scrub_monitor->mutex);
+
+    /* this needs to be serialized with reconfigure() */
+    pthread_mutex_lock(&priv->lock);
+    {
+        ret = br_scrub_state_machine(this, _gf_false);
+    }
+    pthread_mutex_unlock(&priv->lock);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SSM_FAILED,
+               "Scrub state machine failed");
+        goto out;
+    }
+
+    while (1) {
+        /* Wait for all children to finish scrubbing */
+        ret = wait_for_scrub_to_finish(this);
+        if (ret) {
+            gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SCRUB_WAIT_FAILED,
+                   "Scrub wait failed");
+            goto out;
+        }
+
+        /* scrub exit criteria: Move the state to PENDING */
+        br_scrubber_exit_control(this);
+    }
+
+out:
+    return NULL;
+}
+
+static void
+br_set_scrub_state(struct br_monitor *scrub_monitor, br_scrub_state_t state)
+{
+    LOCK(&scrub_monitor->lock);
+    {
+        _br_monitor_set_scrub_state(scrub_monitor, state);
+    }
+    UNLOCK(&scrub_monitor->lock);
+}
+
+int32_t
+br_scrubber_monitor_init(xlator_t *this, br_private_t *priv)
+{
+    struct br_monitor *scrub_monitor = NULL;
+    int ret = 0;
+
+    scrub_monitor = &priv->scrub_monitor;
+
+    LOCK_INIT(&scrub_monitor->lock);
+    scrub_monitor->this = this;
+
+    scrub_monitor->inited = _gf_false;
+    pthread_mutex_init(&scrub_monitor->mutex, NULL);
+    pthread_cond_init(&scrub_monitor->cond, NULL);
+
+    scrub_monitor->kick = _gf_false;
+    scrub_monitor->active_child_count = 0;
+    pthread_mutex_init(&scrub_monitor->wakelock, NULL);
+    pthread_cond_init(&scrub_monitor->wakecond, NULL);
+
+    scrub_monitor->done = _gf_false;
+    pthread_mutex_init(&scrub_monitor->donelock, NULL);
+    pthread_cond_init(&scrub_monitor->donecond, NULL);
+
+    /* Set the state to INACTIVE */
+    br_set_scrub_state(&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE);
+
+    /* Start the monitor thread */
+    ret = gf_thread_create(&scrub_monitor->thread, NULL, br_monitor_thread,
+                           this, "brmon");
+    if (ret != 0) {
+        gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED,
+               "monitor thread creation failed");
+        ret = -1;
+        goto err;
+    }
+
+    return 0;
+err:
+    pthread_mutex_destroy(&scrub_monitor->mutex);
+    pthread_cond_destroy(&scrub_monitor->cond);
+
+    pthread_mutex_destroy(&scrub_monitor->wakelock);
+    pthread_cond_destroy(&scrub_monitor->wakecond);
+
+    pthread_mutex_destroy(&scrub_monitor->donelock);
+    pthread_cond_destroy(&scrub_monitor->donecond);
+
+    LOCK_DESTROY(&scrub_monitor->lock);
+
+    return ret;
+}
+
+int32_t
+br_scrubber_init(xlator_t *this, br_private_t *priv)
+{
+    struct br_scrubber *fsscrub = NULL;
+    int ret = 0;
+
+    priv->tbf = tbf_init(NULL, 0);
+    if (!priv->tbf)
+        return -1;
+
+    ret = br_scrubber_monitor_init(this, priv);
+    if (ret)
+        return -1;
+
+    fsscrub = &priv->fsscrub;
+
+    fsscrub->this = this;
+    fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+    pthread_mutex_init(&fsscrub->mutex, NULL);
+    pthread_cond_init(&fsscrub->cond, NULL);
+
+    fsscrub->nr_scrubbers = 0;
+    INIT_LIST_HEAD(&fsscrub->scrubbers);
+    INIT_LIST_HEAD(&fsscrub->scrublist);
+
+    return 0;
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
new file mode 100644
index 00000000000..4e5f67bc021
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -0,0 +1,46 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_H__
+#define __BIT_ROT_SCRUB_H__
+
+#include <glusterfs/xlator.h>
+#include "bit-rot.h"
+
+void *
+br_fsscanner(void *);
+
+int32_t
+br_fsscan_schedule(xlator_t *);
+int32_t
+br_fsscan_reschedule(xlator_t *);
+int32_t
+br_fsscan_activate(xlator_t *);
+int32_t
+br_fsscan_deactivate(xlator_t *);
+int32_t
+br_fsscan_ondemand(xlator_t *);
+
+int32_t
+br_scrubber_handle_options(xlator_t *, br_private_t *, dict_t *);
+
+int32_t
+br_scrubber_monitor_init(xlator_t *, br_private_t *);
+
+int32_t
+br_scrubber_init(xlator_t *, br_private_t *);
+
+int32_t
+br_collect_bad_objects_from_children(xlator_t *this, dict_t *dict);
+
+void
+br_child_set_scrub_state(br_child_t *, gf_boolean_t);
+
+#endif /* __BIT_ROT_SCRUB_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
new file mode 100644
index 00000000000..753e31a3b23
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
@@ -0,0 +1,124 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-ssm.h"
+#include "bit-rot-scrub.h"
+#include "bit-rot-bitd-messages.h"
+
+int
+br_scrub_ssm_noop(xlator_t *this)
+{
+    return 0;
+}
+
+int
+br_scrub_ssm_state_pause(xlator_t *this)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+           "Scrubber paused");
+    _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PAUSED);
+    return 0;
+}
+
+int
+br_scrub_ssm_state_ipause(xlator_t *this)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+           "Scrubber paused");
+    _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_IPAUSED);
+    return 0;
+}
+
+int
+br_scrub_ssm_state_active(xlator_t *this)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    if (scrub_monitor->done) {
+        (void)br_fsscan_activate(this);
+    } else {
+        gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+               "Scrubbing resumed");
+        _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_ACTIVE);
+    }
+
+    return 0;
+}
+
+int
+br_scrub_ssm_state_stall(xlator_t *this)
+{
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+           "Volume is under active scrubbing. Pausing scrub..");
+    _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_STALLED);
+    return 0;
+}
+
+static br_scrub_ssm_call *br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] =
+    {
+        /* INACTIVE */
+        {br_fsscan_schedule, br_scrub_ssm_state_ipause, br_scrub_ssm_noop},
+        /* PENDING  */
+        {br_fsscan_reschedule, br_fsscan_deactivate, br_fsscan_ondemand},
+        /* ACTIVE   */
+        {br_scrub_ssm_noop, br_scrub_ssm_state_stall, br_scrub_ssm_noop},
+        /* PAUSED   */
+        {br_fsscan_activate, br_scrub_ssm_noop, br_scrub_ssm_noop},
+        /* IPAUSED  */
+        {br_fsscan_schedule, br_scrub_ssm_noop, br_scrub_ssm_noop},
+        /* STALLED  */
+        {br_scrub_ssm_state_active, br_scrub_ssm_noop, br_scrub_ssm_noop},
+};
+
+int32_t
+br_scrub_state_machine(xlator_t *this, gf_boolean_t scrub_ondemand)
+{
+    br_private_t *priv = NULL;
+    br_scrub_ssm_call *call = NULL;
+    struct br_scrubber *fsscrub = NULL;
+    br_scrub_state_t currstate = 0;
+    br_scrub_event_t event = 0;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    fsscrub = &priv->fsscrub;
+    scrub_monitor = &priv->scrub_monitor;
+
+    currstate = scrub_monitor->state;
+    if (scrub_ondemand)
+        event = BR_SCRUB_EVENT_ONDEMAND;
+    else
+        event = _br_child_get_scrub_event(fsscrub);
+
+    call = br_scrub_ssm[currstate][event];
+    return call(this);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
new file mode 100644
index 00000000000..37b45a42eac
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -0,0 +1,38 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SSM_H__
+#define __BIT_ROT_SSM_H__
+
+#include <glusterfs/xlator.h>
+
+typedef enum br_scrub_state {
+    BR_SCRUB_STATE_INACTIVE = 0,
+    BR_SCRUB_STATE_PENDING,
+    BR_SCRUB_STATE_ACTIVE,
+    BR_SCRUB_STATE_PAUSED,
+    BR_SCRUB_STATE_IPAUSED,
+    BR_SCRUB_STATE_STALLED,
+    BR_SCRUB_MAXSTATES,
+} br_scrub_state_t;
+
+typedef enum br_scrub_event {
+    BR_SCRUB_EVENT_SCHEDULE = 0,
+    BR_SCRUB_EVENT_PAUSE,
+    BR_SCRUB_EVENT_ONDEMAND,
+    BR_SCRUB_MAXEVENTS,
+} br_scrub_event_t;
+
+struct br_monitor;
+
+int32_t
+br_scrub_state_machine(xlator_t *, gf_boolean_t);
+
+#endif /* __BIT_ROT_SSM_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
new file mode 100644
index 00000000000..a2f1c343a1d
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -0,0 +1,2232 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+
+#include <glusterfs/logging.h>
+#include <glusterfs/compat-errno.h>
+
+#include "bit-rot.h"
+#include "bit-rot-scrub.h"
+#include <pthread.h>
+#include "bit-rot-bitd-messages.h"
+
+#define BR_HASH_CALC_READ_SIZE (128 * 1024)
+
+typedef int32_t(br_child_handler)(xlator_t *, br_child_t *);
+
+struct br_child_event {
+    xlator_t *this;
+
+    br_child_t *child;
+
+    br_child_handler *call;
+
+    struct list_head list;
+};
+
+static int
+br_find_child_index(xlator_t *this, xlator_t *child)
+{
+    br_private_t *priv = NULL;
+    int i = -1;
+    int index = -1;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+
+    priv = this->private;
+
+    for (i = 0; i < priv->child_count; i++) {
+        if (child == priv->children[i].xl) {
+            index = i;
+            break;
+        }
+    }
+
+out:
+    return index;
+}
+
+br_child_t *
+br_get_child_from_brick_path(xlator_t *this, char *brick_path)
+{
+    br_private_t *priv = NULL;
+    br_child_t *child = NULL;
+    br_child_t *tmp = NULL;
+    int i = 0;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+    GF_VALIDATE_OR_GOTO(this->name, brick_path, out);
+
+    priv = this->private;
+
+    pthread_mutex_lock(&priv->lock);
+    {
+        for (i = 0; i < priv->child_count; i++) {
+            tmp = &priv->children[i];
+            if (!strcmp(tmp->brick_path, brick_path)) {
+                child = tmp;
+                break;
+            }
+        }
+    }
+    pthread_mutex_unlock(&priv->lock);
+
+out:
+    return child;
+}
+
+/**
+ * probably we'll encapsulate brick inside our own structure when
+ * needed -- later.
+ */
+void *
+br_brick_init(void *xl, struct gf_brick_spec *brick)
+{
+    return brick;
+}
+
+/**
+ * and cleanup things here when allocated br_brick_init().
+ */
+void
+br_brick_fini(void *xl, char *brick, void *data)
+{
+    return;
+}
+
+/**
+ * TODO: Signature can contain null terminators which causes bitrot
+ * stub to store truncated hash as it depends on string length of
+ * the hash.
+ *
+ * FIX: Send the string length as part of the signature struct and
+ *      change stub to handle this change.
+ */
+static br_isignature_t *
+br_prepare_signature(const unsigned char *sign, unsigned long hashlen,
+                     int8_t hashtype, br_object_t *object)
+{
+    br_isignature_t *signature = NULL;
+
+    /* TODO: use mem-pool */
+    signature = GF_CALLOC(1, signature_size(hashlen + 1),
+                          gf_br_stub_mt_signature_t);
+    if (!signature)
+        return NULL;
+
+    /* object version */
+    signature->signedversion = object->signedversion;
+
+    /* signature length & type */
+    signature->signaturelen = hashlen;
+    signature->signaturetype = hashtype;
+
+    /* signature itself */
+    memcpy(signature->signature, (char *)sign, hashlen);
+    signature->signature[hashlen + 1] = '\0';
+
+    return signature;
+}
+
+gf_boolean_t
+bitd_is_bad_file(xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)
+{
+    int32_t ret = -1;
+    dict_t *xattr = NULL;
+    inode_t *inode = NULL;
+    gf_boolean_t bad_file = _gf_false;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+
+    inode = (loc) ? loc->inode : fd->inode;
+
+    if (fd)
+        ret = syncop_fgetxattr(child->xl, fd, &xattr, BITROT_OBJECT_BAD_KEY,
+                               NULL, NULL);
+    else if (loc)
+        ret = syncop_getxattr(child->xl, loc, &xattr, BITROT_OBJECT_BAD_KEY,
+                              NULL, NULL);
+
+    if (!ret) {
+        gf_msg_debug(this->name, 0, "[GFID: %s] is marked corrupted",
+                     uuid_utoa(inode->gfid));
+        bad_file = _gf_true;
+    }
+
+    if (xattr)
+        dict_unref(xattr);
+
+out:
+    return bad_file;
+}
+
+/**
+ * Do a lookup on the gfid present within the object.
+ */
+static int32_t
+br_object_lookup(xlator_t *this, br_object_t *object, struct iatt *iatt,
+                 inode_t **linked_inode)
+{
+    int ret = -EINVAL;
+    loc_t loc = {
+        0,
+    };
+    inode_t *inode = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, object, out);
+
+    inode = inode_find(object->child->table, object->gfid);
+
+    if (inode)
+        loc.inode = inode;
+    else
+        loc.inode = inode_new(object->child->table);
+
+    if (!loc.inode) {
+        ret = -ENOMEM;
+        goto out;
+    }
+
+    gf_uuid_copy(loc.gfid, object->gfid);
+
+    ret = syncop_lookup(object->child->xl, &loc, iatt, NULL, NULL, NULL);
+    if (ret < 0)
+        goto out;
+
+    /*
+     * The file might have been deleted by the application
+     * after getting the event, but before doing a lookup.
+     * So use linked_inode after inode_link is done.
+     */
+    *linked_inode = inode_link(loc.inode, NULL, NULL, iatt);
+    if (*linked_inode)
+        inode_lookup(*linked_inode);
+
+out:
+    loc_wipe(&loc);
+    return ret;
+}
+
+/**
+ * open the object with O_RDONLY flags and return the fd. How to let brick
+ * know that open is being done by bitd because syncop framework does not allow
+ * passing xdata -- may be use frame->root->pid itself.
+ */
+static int32_t
+br_object_open(xlator_t *this, br_object_t *object, inode_t *inode,
+               fd_t **openfd)
+{
+    int32_t ret = -1;
+    fd_t *fd = NULL;
+    loc_t loc = {
+        0,
+    };
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, object, out);
+    GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+    ret = -EINVAL;
+    fd = fd_create(inode, 0);
+    if (!fd) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+                "gfid=%s", uuid_utoa(inode->gfid), NULL);
+        goto out;
+    }
+
+    loc.inode = inode_ref(inode);
+    gf_uuid_copy(loc.gfid, inode->gfid);
+
+    ret = syncop_open(object->child->xl, &loc, O_RDONLY, fd, NULL, NULL);
+    if (ret) {
+        br_log_object(this, "open", inode->gfid, -ret);
+        fd_unref(fd);
+        fd = NULL;
+    } else {
+        fd_bind(fd);
+        *openfd = fd;
+    }
+
+    loc_wipe(&loc);
+
+out:
+    return ret;
+}
+
+/**
+ * read 128k block from the object @object from the offset @offset
+ * and return the buffer.
+ */
+static int32_t
+br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child,
+                              off_t offset, size_t size, SHA256_CTX *sha256)
+{
+    int32_t ret = -1;
+    tbf_t *tbf = NULL;
+    struct iovec *iovec = NULL;
+    struct iobref *iobref = NULL;
+    br_private_t *priv = NULL;
+    int count = 0;
+    int i = 0;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+    priv = this->private;
+
+    GF_VALIDATE_OR_GOTO(this->name, priv->tbf, out);
+    tbf = priv->tbf;
+
+    ret = syncop_readv(child->xl, fd, size, offset, 0, &iovec, &count, &iobref,
+                       NULL, NULL, NULL);
+
+    if (ret < 0) {
+        gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        ret = -1;
+        goto out;
+    }
+
+    if (ret == 0)
+        goto out;
+
+    for (i = 0; i < count; i++) {
+        TBF_THROTTLE_BEGIN(tbf, TBF_OP_HASH, iovec[i].iov_len);
+        {
+            SHA256_Update(sha256, (const unsigned char *)(iovec[i].iov_base),
+                          iovec[i].iov_len);
+        }
+        TBF_THROTTLE_BEGIN(tbf, TBF_OP_HASH, iovec[i].iov_len);
+    }
+
+out:
+    if (iovec)
+        GF_FREE(iovec);
+
+    if (iobref)
+        iobref_unref(iobref);
+
+    return ret;
+}
+
+int32_t
+br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd,
+                          struct iatt *iatt)
+{
+    int32_t ret = -1;
+    off_t offset = 0;
+    size_t block = BR_HASH_CALC_READ_SIZE;
+    xlator_t *this = NULL;
+
+    SHA256_CTX sha256;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", child, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", iatt, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", fd, out);
+
+    this = child->this;
+
+    SHA256_Init(&sha256);
+
+    while (1) {
+        ret = br_object_read_block_and_sign(this, fd, child, offset, block,
+                                            &sha256);
+        if (ret < 0) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED,
+                    "offset=%" PRIu64, offset, "object-gfid=%s",
+                    uuid_utoa(fd->inode->gfid), NULL);
+            break;
+        }
+
+        if (ret == 0)
+            break;
+
+        offset += ret;
+    }
+
+    if (ret == 0)
+        SHA256_Final(md, &sha256);
+
+out:
+    return ret;
+}
+
+static int32_t
+br_object_checksum(unsigned char *md, br_object_t *object, fd_t *fd,
+                   struct iatt *iatt)
+{
+    return br_calculate_obj_checksum(md, object->child, fd, iatt);
+}
+
+static int32_t
+br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object,
+                    struct iatt *iatt)
+{
+    int32_t ret = -1;
+    xlator_t *this = NULL;
+    dict_t *xattr = NULL;
+    unsigned char *md = NULL;
+    br_isignature_t *sign = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", object, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", linked_inode, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", fd, out);
+
+    this = object->this;
+
+    md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char);
+    if (!md) {
+        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED,
+                "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto out;
+    }
+
+    ret = br_object_checksum(md, object, fd, iatt);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED,
+                "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
+        goto free_signature;
+    }
+
+    sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH,
+                                BR_SIGNATURE_TYPE_SHA256, object);
+    if (!sign) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+                "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto free_signature;
+    }
+
+    xattr = dict_for_key_value(GLUSTERFS_SET_OBJECT_SIGNATURE, (void *)sign,
+                               signature_size(SHA256_DIGEST_LENGTH), _gf_true);
+
+    if (!xattr) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+                "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid),
+                NULL);
+        goto free_isign;
+    }
+
+    ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+                "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto unref_dict;
+    }
+
+    ret = 0;
+
+unref_dict:
+    dict_unref(xattr);
+free_isign:
+    GF_FREE(sign);
+free_signature:
+    GF_FREE(md);
+out:
+    return ret;
+}
+
+static int
+br_object_sign_softerror(int32_t op_errno)
+{
+    return ((op_errno == ENOENT) || (op_errno == ESTALE) ||
+            (op_errno == ENODATA));
+}
+
+void
+br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno)
+{
+    int softerror = br_object_sign_softerror(op_errno);
+    if (softerror) {
+        gf_msg_debug(this->name, 0,
+                     "%s() failed on object %s "
+                     "[reason: %s]",
+                     op, uuid_utoa(gfid), strerror(op_errno));
+    } else {
+        gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+                op, "gfid=%s", uuid_utoa(gfid), NULL);
+    }
+}
+
+void
+br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno)
+{
+    int softerror = br_object_sign_softerror(op_errno);
+    if (softerror) {
+        gf_msg_debug(this->name, 0,
+                     "%s() failed on object %s "
+                     "[reason: %s]",
+                     op, path, strerror(op_errno));
+    } else {
+        gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+                op, "path=%s", path, NULL);
+    }
+}
+
+static void
+br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode,
+                loc_t *loc, gf_boolean_t need_reopen)
+{
+    fd_t *fd = NULL;
+    int32_t ret = -1;
+    uint32_t val = 0;
+    dict_t *dict = NULL;
+    pid_t pid = GF_CLIENT_PID_BITD;
+
+    syncopctx_setfspid(&pid);
+
+    val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN;
+
+    dict = dict_new();
+    if (!dict)
+        goto out;
+
+    ret = dict_set_uint32(dict, BR_REOPEN_SIGN_HINT_KEY, val);
+    if (ret)
+        goto cleanup_dict;
+
+    ret = -1;
+    fd = fd_create(linked_inode, 0);
+    if (!fd) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+                "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
+        goto cleanup_dict;
+    }
+
+    ret = syncop_open(child->xl, loc, O_RDWR, fd, NULL, NULL);
+    if (ret) {
+        br_log_object(this, "open", linked_inode->gfid, -ret);
+        goto unref_fd;
+    }
+
+    fd_bind(fd);
+
+    ret = syncop_fsetxattr(child->xl, fd, dict, 0, NULL, NULL);
+    if (ret)
+        br_log_object(this, "fsetxattr", linked_inode->gfid, -ret);
+
+    /* passthough: fd_unref() */
+
+unref_fd:
+    fd_unref(fd);
+cleanup_dict:
+    dict_unref(dict);
+out:
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED,
+                "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d",
+                val, NULL);
+    }
+}
+
+static void
+br_object_resign(xlator_t *this, br_object_t *object, inode_t *linked_inode)
+{
+    loc_t loc = {
+        0,
+    };
+
+    loc.inode = inode_ref(linked_inode);
+    gf_uuid_copy(loc.gfid, linked_inode->gfid);
+
+    br_trigger_sign(this, object->child, linked_inode, &loc, _gf_false);
+
+    loc_wipe(&loc);
+}
+
+/**
+ * Sign a given object. This routine runs full throttle. There needs to be
+ * some form of priority scheduling and/or read burstness to avoid starving
+ * (or kicking) client I/O's.
+ */
+static int32_t
+br_sign_object(br_object_t *object)
+{
+    int32_t ret = -1;
+    inode_t *linked_inode = NULL;
+    xlator_t *this = NULL;
+    fd_t *fd = NULL;
+    struct iatt iatt = {
+        0,
+    };
+    pid_t pid = GF_CLIENT_PID_BITD;
+    br_sign_state_t sign_info = BR_SIGN_NORMAL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", object, out);
+
+    this = object->this;
+
+    /**
+     * FIXME: This is required as signing an object is restricted to
+     * clients with special frame->root->pid. Change the way client
+     * pid is set.
+     */
+    syncopctx_setfspid(&pid);
+
+    ret = br_object_lookup(this, object, &iatt, &linked_inode);
+    if (ret) {
+        br_log_object(this, "lookup", object->gfid, -ret);
+        goto out;
+    }
+
+    /**
+     * For fd's that have notified for reopening, we send an explicit
+     * open() followed by a dummy write() call. This triggers the
+     * actual signing of the object.
+     */
+    sign_info = ntohl(object->sign_info);
+    if (sign_info == BR_SIGN_REOPEN_WAIT) {
+        br_object_resign(this, object, linked_inode);
+        goto unref_inode;
+    }
+
+    ret = br_object_open(this, object, linked_inode, &fd);
+    if (!fd) {
+        br_log_object(this, "open", object->gfid, -ret);
+        goto unref_inode;
+    }
+
+    /**
+     * we have an open file descriptor on the object. from here on,
+     * do not be generous to file operation errors.
+     */
+    gf_msg_debug(this->name, 0, "Signing object [%s]",
+                 uuid_utoa(linked_inode->gfid));
+
+    ret = br_object_read_sign(linked_inode, fd, object, &iatt);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED,
+                "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
+        goto unref_fd;
+    }
+
+    ret = 0;
+
+unref_fd:
+    fd_unref(fd);
+unref_inode:
+    inode_unref(linked_inode);
+out:
+    return ret;
+}
+
+static br_object_t *
+__br_pick_object(br_private_t *priv)
+{
+    br_object_t *object = NULL;
+
+    while (list_empty(&priv->obj_queue->objects)) {
+        pthread_cond_wait(&priv->object_cond, &priv->lock);
+    }
+
+    object = list_first_entry(&priv->obj_queue->objects, br_object_t, list);
+    list_del_init(&object->list);
+
+    return object;
+}
+
+/**
+ * This is the place where the signing of the objects is triggered.
+ */
+void *
+br_process_object(void *arg)
+{
+    xlator_t *this = NULL;
+    br_object_t *object = NULL;
+    br_private_t *priv = NULL;
+    int32_t ret = -1;
+
+    this = arg;
+    priv = this->private;
+
+    THIS = this;
+
+    for (;;) {
+        pthread_mutex_lock(&priv->lock);
+        {
+            object = __br_pick_object(priv);
+        }
+        pthread_mutex_unlock(&priv->lock);
+
+        ret = br_sign_object(object);
+        if (ret && !br_object_sign_softerror(-ret))
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+                    "gfid=%s", uuid_utoa(object->gfid), NULL);
+        GF_FREE(object);
+    }
+
+    return NULL;
+}
+
+/**
+ * This function gets kicked in once the object is expired from the
+ * timer wheel. This actually adds the object received via notification
+ * from the changelog to the queue from where the objects gets picked
+ * up for signing.
+ *
+ * This routine can be made lightweight by introducing an alternate
+ * timer-wheel API that dispatches _all_ expired objects in one-shot
+ * rather than an object at-a-time. This routine can then just simply
+ * be a call to list_splice_tail().
+ *
+ * NOTE: use call_time to instrument signing time in br_sign_object().
+ */
+void
+br_add_object_to_queue(struct gf_tw_timer_list *timer, void *data,
+                       unsigned long call_time)
+{
+    br_object_t *object = NULL;
+    xlator_t *this = NULL;
+    br_private_t *priv = NULL;
+
+    object = data;
+    this = object->this;
+    priv = this->private;
+
+    THIS = this;
+
+    pthread_mutex_lock(&priv->lock);
+    {
+        list_add_tail(&object->list, &priv->obj_queue->objects);
+        pthread_cond_broadcast(&priv->object_cond);
+    }
+    pthread_mutex_unlock(&priv->lock);
+
+    if (timer)
+        mem_put(timer);
+    return;
+}
+
+static br_object_t *
+br_initialize_object(xlator_t *this, br_child_t *child, changelog_event_t *ev)
+{
+    br_object_t *object = NULL;
+
+    object = GF_CALLOC(1, sizeof(*object), gf_br_mt_br_object_t);
+    if (!object)
+        goto out;
+    INIT_LIST_HEAD(&object->list);
+
+    object->this = this;
+    object->child = child;
+    gf_uuid_copy(object->gfid, ev->u.releasebr.gfid);
+
+    /* NOTE: it's BE, but no worry */
+    object->signedversion = ev->u.releasebr.version;
+    object->sign_info = ev->u.releasebr.sign_info;
+
+out:
+    return object;
+}
+
+static struct gf_tw_timer_list *
+br_initialize_timer(xlator_t *this, br_object_t *object, br_child_t *child,
+                    changelog_event_t *ev)
+{
+    br_private_t *priv = NULL;
+    struct gf_tw_timer_list *timer = NULL;
+
+    priv = this->private;
+
+    timer = mem_get0(child->timer_pool);
+    if (!timer)
+        goto out;
+    INIT_LIST_HEAD(&timer->entry);
+
+    timer->expires = priv->expiry_time;
+    if (!timer->expires)
+        timer->expires = 1;
+
+    timer->data = object;
+    timer->function = br_add_object_to_queue;
+    gf_tw_add_timer(priv->timer_wheel, timer);
+
+out:
+    return timer;
+}
+
+static int32_t
+br_schedule_object_reopen(xlator_t *this, br_object_t *object,
+                          br_child_t *child, changelog_event_t *ev)
+{
+    struct gf_tw_timer_list *timer = NULL;
+
+    timer = br_initialize_timer(this, object, child, ev);
+    if (!timer)
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
+                "gfid=%s", uuid_utoa(object->gfid), NULL);
+    return timer ? 0 : -1;
+}
+
+static int32_t
+br_object_quicksign(xlator_t *this, br_object_t *object)
+{
+    br_add_object_to_queue(NULL, object, 0ULL);
+    return 0;
+}
+
+/**
+ * This callback function registered with the changelog is executed
+ * whenever a notification from the changelog is received. This should
+ * add the object (or the gfid) on which the notification has come to
+ * the timer-wheel with some expiry time.
+ *
+ * TODO: use mem-pool for allocations and maybe allocate timer and
+ * object as a single alloc and bifurcate their respective pointers.
+ */
+void
+br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev)
+{
+    int32_t ret = 0;
+    uuid_t gfid = {
+        0,
+    };
+    xlator_t *this = NULL;
+    br_object_t *object = NULL;
+    br_child_t *child = NULL;
+    br_sign_state_t sign_info = BR_SIGN_INVALID;
+
+    this = xl;
+
+    GF_VALIDATE_OR_GOTO(this->name, ev, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+    GF_ASSERT(ev->ev_type == CHANGELOG_OP_TYPE_BR_RELEASE);
+    GF_ASSERT(!gf_uuid_is_null(ev->u.releasebr.gfid));
+
+    gf_uuid_copy(gfid, ev->u.releasebr.gfid);
+
+    gf_msg_debug(this->name, 0, "RELEASE EVENT [GFID %s]", uuid_utoa(gfid));
+
+    child = br_get_child_from_brick_path(this, brick);
+    if (!child) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
+                "brick=%s", brick, NULL);
+        goto out;
+    }
+
+    object = br_initialize_object(this, child, ev);
+    if (!object) {
+        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+                "object-gfid=%s", uuid_utoa(gfid), NULL);
+        goto out;
+    }
+
+    /* sanity check */
+    sign_info = ntohl(object->sign_info);
+    GF_ASSERT(sign_info != BR_SIGN_NORMAL);
+
+    if (sign_info == BR_SIGN_REOPEN_WAIT)
+        ret = br_schedule_object_reopen(this, object, child, ev);
+    else
+        ret = br_object_quicksign(this, object);
+
+    if (ret)
+        goto free_object;
+
+    gf_msg_debug(this->name, 0, "->callback: brick [%s], type [%d]\n", brick,
+                 ev->ev_type);
+    return;
+
+free_object:
+    GF_FREE(object);
+out:
+    return;
+}
+
+void
+br_fill_brick_spec(struct gf_brick_spec *brick, char *path)
+{
+    brick->brick_path = gf_strdup(path);
+    brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE;
+
+    brick->init = br_brick_init;
+    brick->fini = br_brick_fini;
+    brick->callback = br_brick_callback;
+    brick->connected = NULL;
+    brick->disconnected = NULL;
+}
+
+static gf_boolean_t
+br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child)
+{
+    int32_t ret = -1;
+    gf_boolean_t need_sign = _gf_false;
+    br_isignature_out_t *sign = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, xattr, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+
+    ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+                "object-info", NULL);
+        goto out;
+    }
+
+    /* Object has been opened and hence dirty. Do not sign it */
+    if (sign->stale)
+        need_sign = _gf_true;
+
+out:
+    return need_sign;
+}
+
+int32_t
+br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent,
+               gf_dirent_t *entry, loc_t *loc)
+{
+    int32_t ret = -1;
+    inode_t *inode = NULL;
+
+    inode = inode_grep(child->table, parent->inode, entry->d_name);
+    if (!inode)
+        loc->inode = inode_new(child->table);
+    else {
+        loc->inode = inode;
+        if (loc->inode->ia_type != IA_IFREG) {
+            gf_msg_debug(this->name, 0,
+                         "%s is not a regular "
+                         "file",
+                         entry->d_name);
+            ret = 0;
+            goto out;
+        }
+    }
+
+    loc->parent = inode_ref(parent->inode);
+    gf_uuid_copy(loc->pargfid, parent->inode->gfid);
+
+    ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path);
+    if (ret < 0 || !loc->path) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
+                "inode_path=%s", entry->d_name, "parent-gfid=%s",
+                uuid_utoa(parent->inode->gfid), NULL);
+        goto out;
+    }
+
+    loc->name = strrchr(loc->path, '/');
+    if (loc->name)
+        loc->name++;
+
+    ret = 1;
+
+out:
+    return ret;
+}
+
+/**
+ * Oneshot crawler
+ * ---------------
+ * This is a catchup mechanism. Objects that remained unsigned from the
+ * last run for whatever reason (node crashes, reboots, etc..) become
+ * candidates for signing. This allows the signature to "catch up" with
+ * the current state of the object. Triggering signing is easy: perform
+ * an open() followed by a close() thereby resulting in call boomerang.
+ * (though not back to itself :))
+ */
+int
+bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+                   void *data)
+{
+    int op_errno = 0;
+    br_child_t *child = NULL;
+    xlator_t *this = NULL;
+    loc_t loc = {
+        0,
+    };
+    struct iatt iatt = {
+        0,
+    };
+    struct iatt parent_buf = {
+        0,
+    };
+    dict_t *xattr = NULL;
+    int32_t ret = -1;
+    inode_t *linked_inode = NULL;
+    gf_boolean_t need_signing = _gf_false;
+    gf_boolean_t need_reopen = _gf_true;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
+    GF_VALIDATE_OR_GOTO("bit-rot", data, out);
+
+    child = data;
+    this = child->this;
+
+    ret = br_prepare_loc(this, child, parent, entry, &loc);
+    if (!ret)
+        goto out;
+
+    ret = syncop_lookup(child->xl, &loc, &iatt, &parent_buf, NULL, NULL);
+    if (ret) {
+        br_log_object_path(this, "lookup", loc.path, -ret);
+        goto out;
+    }
+
+    linked_inode = inode_link(loc.inode, parent->inode, loc.name, &iatt);
+    if (linked_inode)
+        inode_lookup(linked_inode);
+
+    if (iatt.ia_type != IA_IFREG) {
+        gf_msg_debug(this->name, 0,
+                     "%s is not a regular file, "
+                     "skipping..",
+                     entry->d_name);
+        ret = 0;
+        goto unref_inode;
+    }
+
+    /**
+     * As of now, 2 cases  are possible and handled.
+     * 1) GlusterFS is upgraded from a previous version which does not
+     *    have any idea about bit-rot and have data in the filesystem.
+     *    In this case syncop_getxattr fails with ENODATA and the object
+     *    is signed. (In real, when crawler sends lookup, bit-rot-stub
+     *    creates the xattrs before returning lookup reply)
+     * 2) Bit-rot was not enabled or BitD was does for some reasons, during
+     *    which some files were created, but since BitD was down, were not
+     *    signed.
+     * If the file was just created and was being written some data when
+     * the down BitD came up, then bit-rot stub should be intelligent to
+     * identify this case (by comparing the ongoing version or by checking
+     * if there are any fds present for that inode) and handle properly.
+     */
+
+    if (bitd_is_bad_file(this, child, &loc, NULL)) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s",
+                loc.path, NULL);
+        goto unref_inode;
+    }
+
+    ret = syncop_getxattr(child->xl, &loc, &xattr,
+                          GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL);
+    if (ret < 0) {
+        op_errno = -ret;
+        br_log_object(this, "getxattr", linked_inode->gfid, op_errno);
+
+        /**
+         * No need to sign the zero byte objects as the signing
+         * happens upon first modification of the object.
+         */
+        if (op_errno == ENODATA && (iatt.ia_size != 0))
+            need_signing = _gf_true;
+        if (op_errno == EINVAL)
+            gf_smsg(this->name, GF_LOG_WARNING, 0,
+                    BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s",
+                    uuid_utoa(linked_inode->gfid), NULL);
+    } else {
+        need_signing = br_check_object_need_sign(this, xattr, child);
+
+        /*
+         * If we are here means, bitrot daemon has started. Is it just
+         * a simple restart of the daemon or is it started because the
+         * feature is enabled is something hard to determine. Hence,
+         * if need_signing is false (because bit-rot version and signature
+         * are present), then still go ahead and sign it.
+         */
+        if (!need_signing) {
+            need_signing = _gf_true;
+            need_reopen = _gf_true;
+        }
+    }
+
+    if (!need_signing)
+        goto unref_dict;
+
+    gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s",
+            loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s",
+            child->brick_path, NULL);
+    br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
+
+    ret = 0;
+
+unref_dict:
+    if (xattr)
+        dict_unref(xattr);
+unref_inode:
+    inode_unref(linked_inode);
+out:
+    loc_wipe(&loc);
+
+    return ret;
+}
+
+#define BR_CRAWL_THROTTLE_COUNT 50
+#define BR_CRAWL_THROTTLE_ZZZ 5
+
+void *
+br_oneshot_signer(void *arg)
+{
+    loc_t loc = {
+        0,
+    };
+    xlator_t *this = NULL;
+    br_child_t *child = NULL;
+
+    child = arg;
+    this = child->this;
+
+    THIS = this;
+
+    gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s",
+            child->brick_path, NULL);
+
+    loc.inode = child->table->root;
+    (void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child,
+                              bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT,
+                              BR_CRAWL_THROTTLE_ZZZ);
+
+    gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
+            "brick-path=%s", child->brick_path, NULL);
+
+    return NULL;
+}
+
+static void
+br_set_child_state(br_child_t *child, br_child_state_t state)
+{
+    pthread_mutex_lock(&child->lock);
+    {
+        _br_set_child_state(child, state);
+    }
+    pthread_mutex_unlock(&child->lock);
+}
+
+/**
+ * At this point a thread is spawned to crawl the filesystem (in
+ * tortoise pace) to sign objects that were not signed in previous run(s).
+ * Such objects are identified by examining it's dirtyness and timestamp.
+ *
+ *    pick object:
+ *       signature_is_stale() && (object_timestamp() <= stub_init_time())
+ *
+ * Also, we register to the changelog library to subscribe for event
+ * notifications.
+ */
+static int32_t
+br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+    int32_t ret = 0;
+    br_private_t *priv = NULL;
+    struct gf_brick_spec *brick = NULL;
+
+    priv = this->private;
+
+    brick = GF_CALLOC(1, sizeof(struct gf_brick_spec),
+                      gf_common_mt_gf_brick_spec_t);
+    if (!brick)
+        goto error_return;
+
+    br_fill_brick_spec(brick, stub->export);
+    ret = gf_changelog_register_generic(brick, 1, 1,
+                                        this->ctx->cmd_args.log_file, -1, this);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL);
+        goto dealloc;
+    }
+
+    child->threadrunning = 0;
+    ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child,
+                           "brosign");
+    if (ret)
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
+                "FS-crawler-thread", NULL);
+    else
+        child->threadrunning = 1;
+
+    /* it's OK to continue, "old" objects would be signed when modified */
+    list_add_tail(&child->list, &priv->signing);
+    return 0;
+
+dealloc:
+    GF_FREE(brick);
+error_return:
+    return -1;
+}
+
+static int32_t
+br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan,
+                   struct br_scrubber *fsscrub)
+{
+    int32_t ret = -1;
+    br_private_t *priv = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+
+    scrub_monitor = &priv->scrub_monitor;
+    ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child,
+                           "brfsscan");
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
+                "bitrot-scrubber-daemon Brick-path=%s", child->brick_path,
+                NULL);
+        goto error_return;
+    }
+
+    /* Signal monitor to kick off state machine*/
+    pthread_mutex_lock(&scrub_monitor->mutex);
+    {
+        if (!scrub_monitor->inited)
+            pthread_cond_signal(&scrub_monitor->cond);
+        scrub_monitor->inited = _gf_true;
+    }
+    pthread_mutex_unlock(&scrub_monitor->mutex);
+
+    /**
+     * Everything has been setup.. add this subvolume to scrubbers
+     * list.
+     */
+    pthread_mutex_lock(&fsscrub->mutex);
+    {
+        list_add_tail(&child->list, &fsscrub->scrublist);
+        pthread_cond_broadcast(&fsscrub->cond);
+    }
+    pthread_mutex_unlock(&fsscrub->mutex);
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static int32_t
+br_enact_scrubber(xlator_t *this, br_child_t *child)
+{
+    int32_t ret = 0;
+    br_private_t *priv = NULL;
+    struct br_scanfs *fsscan = NULL;
+    struct br_scrubber *fsscrub = NULL;
+
+    priv = this->private;
+
+    fsscan = &child->fsscan;
+    fsscrub = &priv->fsscrub;
+
+    /**
+     * if this child already witnesses a successful connection earlier
+     * there's no need to initialize mutexes, condvars, etc..
+     */
+    if (_br_child_witnessed_connection(child))
+        return br_launch_scrubber(this, child, fsscan, fsscrub);
+
+    LOCK_INIT(&fsscan->entrylock);
+    pthread_mutex_init(&fsscan->waitlock, NULL);
+    pthread_cond_init(&fsscan->waitcond, NULL);
+
+    fsscan->entries = 0;
+    INIT_LIST_HEAD(&fsscan->queued);
+    INIT_LIST_HEAD(&fsscan->ready);
+
+    ret = br_launch_scrubber(this, child, fsscan, fsscrub);
+    if (ret)
+        goto error_return;
+
+    return 0;
+
+error_return:
+    LOCK_DESTROY(&fsscan->entrylock);
+    pthread_mutex_destroy(&fsscan->waitlock);
+    pthread_cond_destroy(&fsscan->waitcond);
+
+    return -1;
+}
+
+static int32_t
+br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+    int32_t ret = -1;
+    br_private_t *priv = this->private;
+
+    pthread_mutex_lock(&child->lock);
+    {
+        if (priv->iamscrubber)
+            ret = br_enact_scrubber(this, child);
+        else
+            ret = br_enact_signer(this, child, stub);
+
+        if (!ret) {
+            child->witnessed = 1;
+            _br_set_child_state(child, BR_CHILD_STATE_CONNECTED);
+            gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
+                    "brick-path=%s", child->brick_path, NULL);
+        }
+    }
+    pthread_mutex_unlock(&child->lock);
+
+    return ret;
+}
+
+/**
+ * This routine fetches various attributes associated with a child which
+ * is basically a subvolume. Attributes include brick path and the stub
+ * birth time. This is done by performing a lookup on the root followed
+ * by getxattr() on a virtual key. Depending on the configuration, the
+ * process either acts as a signer or a scrubber.
+ */
+int32_t
+br_brick_connect(xlator_t *this, br_child_t *child)
+{
+    int32_t ret = -1;
+    loc_t loc = {
+        0,
+    };
+    struct iatt buf = {
+        0,
+    };
+    struct iatt parent = {
+        0,
+    };
+    br_stub_init_t *stub = NULL;
+    dict_t *xattr = NULL;
+    int op_errno = 0;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, child, out);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+    br_child_set_scrub_state(child, _gf_false);
+    br_set_child_state(child, BR_CHILD_STATE_INITIALIZING);
+
+    loc.inode = inode_ref(child->table->root);
+    gf_uuid_copy(loc.gfid, loc.inode->gfid);
+    loc.path = gf_strdup("/");
+
+    ret = syncop_lookup(child->xl, &loc, &buf, &parent, NULL, NULL);
+    if (ret) {
+        op_errno = -ret;
+        ret = -1;
+        gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED,
+                NULL);
+        goto wipeloc;
+    }
+
+    ret = syncop_getxattr(child->xl, &loc, &xattr,
+                          GLUSTERFS_GET_BR_STUB_INIT_TIME, NULL, NULL);
+    if (ret) {
+        op_errno = -ret;
+        ret = -1;
+        gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED,
+                NULL);
+        goto wipeloc;
+    }
+
+    ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL);
+        goto free_dict;
+    }
+
+    memcpy(child->brick_path, stub->export, strlen(stub->export) + 1);
+    child->tv.tv_sec = ntohl(stub->timebuf[0]);
+    child->tv.tv_usec = ntohl(stub->timebuf[1]);
+
+    ret = br_child_enaction(this, child, stub);
+
+free_dict:
+    dict_unref(xattr);
+wipeloc:
+    loc_wipe(&loc);
+out:
+    if (ret)
+        br_set_child_state(child, BR_CHILD_STATE_CONNFAILED);
+    return ret;
+}
+
+/* TODO: cleanup signer */
+static int32_t
+br_cleanup_signer(xlator_t *this, br_child_t *child)
+{
+    return 0;
+}
+
+static int32_t
+br_cleanup_scrubber(xlator_t *this, br_child_t *child)
+{
+    int32_t ret = 0;
+    br_private_t *priv = NULL;
+    struct br_scrubber *fsscrub = NULL;
+    struct br_monitor *scrub_monitor = NULL;
+
+    priv = this->private;
+    fsscrub = &priv->fsscrub;
+    scrub_monitor = &priv->scrub_monitor;
+
+    if (_br_is_child_scrub_active(child)) {
+        scrub_monitor->active_child_count--;
+        br_child_set_scrub_state(child, _gf_false);
+    }
+
+    /**
+     * 0x0: child (brick) goes out of rotation
+     *
+     * This is fully safe w.r.t. entries for this child being actively
+     * scrubbed. Each of the scrubber thread(s) would finish scrubbing
+     * the entry (probably failing due to disconnection) and either
+     * putting the entry back into the queue or continuing further.
+     * Either way, pending entries for this child's queue need not be
+     * drained; entries just sit there in the queued/ready list to be
+     * consumed later upon re-connection.
+     */
+    pthread_mutex_lock(&fsscrub->mutex);
+    {
+        list_del_init(&child->list);
+    }
+    pthread_mutex_unlock(&fsscrub->mutex);
+
+    /**
+     * 0x1: cleanup scanner thread
+     *
+     * The pending timer needs to be removed _after_ cleaning up the
+     * filesystem scanner (scheduling the next scrub time is not a
+     * cancellation point).
+     */
+    ret = gf_thread_cleanup_xint(child->thread);
+    if (ret)
+        gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL);
+
+    gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED,
+            "brick-path=%s", child->brick_path, NULL);
+
+    return 0;
+}
+
+/**
+ * OK.. this child has made it's mind to go down the drain. So,
+ * let's clean up what it touched. (NOTE: there's no need to clean
+ * the inode table, it's just reused taking care of stale inodes)
+ */
+int32_t
+br_brick_disconnect(xlator_t *this, br_child_t *child)
+{
+    int32_t ret = 0;
+    struct br_monitor *scrub_monitor = NULL;
+    br_private_t *priv = this->private;
+
+    scrub_monitor = &priv->scrub_monitor;
+
+    /* Lock order should be wakelock and then child lock to
+     * dead locks.
+     */
+    pthread_mutex_lock(&scrub_monitor->wakelock);
+    {
+        pthread_mutex_lock(&child->lock);
+        {
+            if (!_br_is_child_connected(child))
+                goto unblock;
+
+            /* child is on death row.. */
+            _br_set_child_state(child, BR_CHILD_STATE_DISCONNECTED);
+
+            if (priv->iamscrubber)
+                ret = br_cleanup_scrubber(this, child);
+            else
+                ret = br_cleanup_signer(this, child);
+        }
+    unblock:
+        pthread_mutex_unlock(&child->lock);
+    }
+    pthread_mutex_unlock(&scrub_monitor->wakelock);
+
+    return ret;
+}
+
+/**
+ * This function is executed in a separate thread. The thread gets the
+ * brick from where CHILD_UP has received from the queue and gets the
+ * information regarding that brick (such as brick path).
+ */
+void *
+br_handle_events(void *arg)
+{
+    int32_t ret = 0;
+    xlator_t *this = NULL;
+    br_private_t *priv = NULL;
+    br_child_t *child = NULL;
+    struct br_child_event *childev = NULL;
+
+    this = arg;
+    priv = this->private;
+
+    /*
+     * Since, this is the topmost xlator, THIS has to be set by bit-rot
+     * xlator itself (STACK_WIND won't help in this case). Also it has
+     * to be done for each thread that gets spawned. Otherwise, a new
+     * thread will get global_xlator's pointer when it does "THIS".
+     */
+    THIS = this;
+
+    while (1) {
+        pthread_mutex_lock(&priv->lock);
+        {
+            while (list_empty(&priv->bricks))
+                pthread_cond_wait(&priv->cond, &priv->lock);
+
+            childev = list_first_entry(&priv->bricks, struct br_child_event,
+                                       list);
+            list_del_init(&childev->list);
+        }
+        pthread_mutex_unlock(&priv->lock);
+
+        child = childev->child;
+        ret = childev->call(this, child);
+        if (ret)
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED,
+                    "name=%s", child->xl->name, NULL);
+        GF_FREE(childev);
+    }
+
+    return NULL;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+    int32_t ret = -1;
+
+    if (!this)
+        return ret;
+
+    ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
+
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL);
+        return ret;
+    }
+
+    return ret;
+}
+
+static void
+_br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call)
+{
+    br_private_t *priv = NULL;
+    struct br_child_event *childev = NULL;
+
+    priv = this->private;
+
+    childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t);
+    if (!childev) {
+        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED,
+                "Brick-name=%s", child->xl->name, NULL);
+        return;
+    }
+
+    INIT_LIST_HEAD(&childev->list);
+    childev->this = this;
+    childev->child = child;
+    childev->call = call;
+
+    list_add_tail(&childev->list, &priv->bricks);
+}
+
+int
+br_scrubber_status_get(xlator_t *this, dict_t **dict)
+{
+    int ret = -1;
+    br_private_t *priv = NULL;
+    struct br_scrub_stats *scrub_stats = NULL;
+
+    priv = this->private;
+
+    GF_VALIDATE_OR_GOTO("bit-rot", priv, out);
+
+    scrub_stats = &priv->scrub_stat;
+
+    ret = br_get_bad_objects_list(this, dict);
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "Failed to collect corrupt "
+                     "files");
+    }
+
+    ret = dict_set_int8(*dict, "scrub-running", scrub_stats->scrub_running);
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "Failed setting scrub_running "
+                     "entry to the dictionary");
+    }
+
+    ret = dict_set_uint64(*dict, "scrubbed-files", scrub_stats->scrubbed_files);
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "Failed to setting scrubbed file "
+                     "entry to the dictionary");
+    }
+
+    ret = dict_set_uint64(*dict, "unsigned-files", scrub_stats->unsigned_files);
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "Failed to set unsigned file count"
+                     " entry to the dictionary");
+    }
+
+    ret = dict_set_uint64(*dict, "scrub-duration", scrub_stats->scrub_duration);
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "Failed to set scrub duration"
+                     " entry to the dictionary");
+    }
+
+    ret = dict_set_dynstr_with_alloc(*dict, "last-scrub-time",
+                                     scrub_stats->last_scrub_time);
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "Failed to set "
+                     "last scrub time value");
+    }
+
+out:
+    return ret;
+}
+
+int
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+    int idx = -1;
+    int ret = -1;
+    xlator_t *subvol = NULL;
+    br_child_t *child = NULL;
+    br_private_t *priv = NULL;
+    dict_t *output = NULL;
+    va_list ap;
+    struct br_monitor *scrub_monitor = NULL;
+
+    subvol = (xlator_t *)data;
+    priv = this->private;
+    scrub_monitor = &priv->scrub_monitor;
+
+    gf_msg_trace(this->name, 0, "Notification received: %d", event);
+
+    idx = br_find_child_index(this, subvol);
+
+    switch (event) {
+        case GF_EVENT_CHILD_UP:
+            if (idx < 0) {
+                gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+                        "event=%d", event, NULL);
+                goto out;
+            }
+
+            pthread_mutex_lock(&priv->lock);
+            {
+                child = &priv->children[idx];
+                if (child->child_up == 1)
+                    goto unblock_0;
+                priv->up_children++;
+
+                child->child_up = 1;
+                child->xl = subvol;
+                if (!child->table)
+                    child->table = inode_table_new(4096, subvol);
+
+                _br_qchild_event(this, child, br_brick_connect);
+                pthread_cond_signal(&priv->cond);
+            }
+        unblock_0:
+            pthread_mutex_unlock(&priv->lock);
+
+            if (priv->up_children == priv->child_count)
+                default_notify(this, event, data);
+            break;
+
+        case GF_EVENT_CHILD_DOWN:
+            if (idx < 0) {
+                gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+                        "event=%d", event, NULL);
+                goto out;
+            }
+
+            pthread_mutex_lock(&priv->lock);
+            {
+                child = &priv->children[idx];
+                if (child->child_up == 0)
+                    goto unblock_1;
+
+                child->child_up = 0;
+                priv->up_children--;
+
+                _br_qchild_event(this, child, br_brick_disconnect);
+                pthread_cond_signal(&priv->cond);
+            }
+        unblock_1:
+            pthread_mutex_unlock(&priv->lock);
+
+            if (priv->up_children == 0)
+                default_notify(this, event, data);
+            break;
+
+        case GF_EVENT_SCRUB_STATUS:
+            gf_msg_debug(this->name, GF_LOG_INFO,
+                         "BitRot scrub status "
+                         "called");
+            va_start(ap, data);
+            output = va_arg(ap, dict_t *);
+            va_end(ap);
+
+            ret = br_scrubber_status_get(this, &output);
+            gf_msg_debug(this->name, 0, "returning %d", ret);
+            break;
+
+        case GF_EVENT_SCRUB_ONDEMAND:
+            gf_log(this->name, GF_LOG_INFO,
+                   "BitRot scrub ondemand "
+                   "called");
+
+            if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) {
+                gf_smsg(this->name, GF_LOG_ERROR, 0,
+                        BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d",
+                        scrub_monitor->state, NULL);
+                return -2;
+            }
+
+            /* Needs synchronization with reconfigure thread */
+            pthread_mutex_lock(&priv->lock);
+            {
+                ret = br_scrub_state_machine(this, _gf_true);
+            }
+            pthread_mutex_unlock(&priv->lock);
+
+            if (ret) {
+                gf_smsg(this->name, GF_LOG_ERROR, 0,
+                        BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL);
+            }
+            gf_msg_debug(this->name, 0, "returning %d", ret);
+            break;
+        default:
+            default_notify(this, event, data);
+    }
+
+out:
+    return 0;
+}
+
+static void
+br_fini_signer(xlator_t *this, br_private_t *priv)
+{
+    int i = 0;
+
+    if (priv == NULL)
+        return;
+
+    for (; i < priv->signer_th_count; i++) {
+        (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
+    }
+    GF_FREE(priv->obj_queue->workers);
+
+    pthread_cond_destroy(&priv->object_cond);
+}
+
+/**
+ * Initialize signer specific structures, spawn worker threads.
+ */
+
+static int32_t
+br_init_signer(xlator_t *this, br_private_t *priv)
+{
+    int i = 0;
+    int32_t ret = -1;
+
+    /* initialize gfchangelog xlator context */
+    ret = gf_changelog_init(this);
+    if (ret)
+        goto out;
+
+    pthread_cond_init(&priv->object_cond, NULL);
+
+    priv->obj_queue = GF_CALLOC(1, sizeof(*priv->obj_queue),
+                                gf_br_mt_br_ob_n_wk_t);
+    if (!priv->obj_queue)
+        goto cleanup_cond;
+    INIT_LIST_HEAD(&priv->obj_queue->objects);
+
+    priv->obj_queue->workers = GF_CALLOC(
+        priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t);
+    if (!priv->obj_queue->workers)
+        goto cleanup_obj_queue;
+
+    for (i = 0; i < priv->signer_th_count; i++) {
+        ret = gf_thread_create(&priv->obj_queue->workers[i], NULL,
+                               br_process_object, this, "brpobj");
+        if (ret != 0) {
+            gf_smsg(this->name, GF_LOG_ERROR, -ret,
+                    BRB_MSG_THREAD_CREATION_FAILED, NULL);
+            ret = -1;
+            goto cleanup_threads;
+        }
+    }
+
+    return 0;
+
+cleanup_threads:
+    for (i--; i >= 0; i--) {
+        (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
+    }
+    GF_FREE(priv->obj_queue->workers);
+
+cleanup_obj_queue:
+    GF_FREE(priv->obj_queue);
+
+cleanup_cond:
+    /* that's explicit */
+    pthread_cond_destroy(&priv->object_cond);
+out:
+    return -1;
+}
+
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
+{
+    br_private_t *priv = NULL;
+    tbf_opspec_t spec = {
+        0,
+    };
+
+    priv = this->private;
+
+    spec.op = TBF_OP_HASH;
+    spec.rate = 0;
+    spec.maxlimit = 0;
+
+    /**
+     * OK. Most implementations of TBF I've come across generate tokens
+     * every second (UML, etc..) and some chose sub-second granularity
+     * (blk-iothrottle cgroups). TBF algorithm itself does not enforce
+     * any logic for choosing generation interval and it seems pretty
+     * logical as one could jack up token count per interval w.r.t.
+     * generation rate.
+     *
+     * Value used here is chosen based on a series of test(s) performed
+     * to balance object signing time and not maxing out on all available
+     * CPU cores. It's obvious to have seconds granularity and jack up
+     * token count per interval, thereby achieving close to similar
+     * results. Let's stick to this as it seems to be working fine for
+     * the set of ops that are throttled.
+     **/
+    spec.token_gen_interval = 600000; /* In usec */
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+    double contribution = 0;
+    contribution = ((double)1 - ((double)child_count / (double)numbricks));
+    if (contribution == 0)
+        contribution = 1;
+    spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+    spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+    if (!spec.rate)
+        gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+                "FULL THROTTLE", NULL);
+    else
+        gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+                "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit,
+                NULL);
+
+    priv->tbf = tbf_init(&spec, 1);
+    return priv->tbf ? 0 : -1;
+}
+
+static int32_t
+br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
+{
+    if (options) {
+        GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32,
+                         error_return);
+        GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options,
+                         uint32, error_return);
+    } else {
+        GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+        GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+                       error_return);
+    }
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static int32_t
+br_signer_init(xlator_t *this, br_private_t *priv)
+{
+    int32_t ret = 0;
+    int numbricks = 0;
+
+    GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+    GF_OPTION_INIT("brick-count", numbricks, int32, error_return);
+    GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+                   error_return);
+
+    ret = br_rate_limit_signer(this, priv->child_count, numbricks);
+    if (ret)
+        goto error_return;
+
+    ret = br_init_signer(this, priv);
+    if (ret)
+        goto cleanup_tbf;
+
+    return 0;
+
+cleanup_tbf:
+    /* cleanup TBF */
+error_return:
+    return -1;
+}
+
+static void
+br_free_scrubber_monitor(xlator_t *this, br_private_t *priv)
+{
+    struct br_monitor *scrub_monitor = &priv->scrub_monitor;
+
+    if (scrub_monitor->timer) {
+        (void)gf_tw_del_timer(priv->timer_wheel, scrub_monitor->timer);
+
+        GF_FREE(scrub_monitor->timer);
+        scrub_monitor->timer = NULL;
+    }
+
+    (void)gf_thread_cleanup_xint(scrub_monitor->thread);
+
+    /* Clean up cond and mutex variables */
+    pthread_mutex_destroy(&scrub_monitor->mutex);
+    pthread_cond_destroy(&scrub_monitor->cond);
+
+    pthread_mutex_destroy(&scrub_monitor->wakelock);
+    pthread_cond_destroy(&scrub_monitor->wakecond);
+
+    pthread_mutex_destroy(&scrub_monitor->donelock);
+    pthread_cond_destroy(&scrub_monitor->donecond);
+
+    LOCK_DESTROY(&scrub_monitor->lock);
+}
+
+static void
+br_free_children(xlator_t *this, br_private_t *priv, int count)
+{
+    br_child_t *child = NULL;
+
+    for (--count; count >= 0; count--) {
+        child = &priv->children[count];
+        mem_pool_destroy(child->timer_pool);
+        pthread_mutex_destroy(&child->lock);
+    }
+
+    GF_FREE(priv->children);
+    priv->children = NULL;
+}
+
+static int
+br_init_children(xlator_t *this, br_private_t *priv)
+{
+    int i = 0;
+    br_child_t *child = NULL;
+    xlator_list_t *trav = NULL;
+
+    priv->child_count = xlator_subvolume_count(this);
+    priv->children = GF_CALLOC(priv->child_count, sizeof(*priv->children),
+                               gf_br_mt_br_child_t);
+    if (!priv->children)
+        goto err;
+
+    trav = this->children;
+    while (trav) {
+        child = &priv->children[i];
+
+        pthread_mutex_init(&child->lock, NULL);
+        child->witnessed = 0;
+
+        br_set_child_state(child, BR_CHILD_STATE_DISCONNECTED);
+
+        child->this = this;
+        child->xl = trav->xlator;
+
+        child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096);
+        if (!child->timer_pool) {
+            gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC,
+                    NULL);
+            errno = ENOMEM;
+            goto freechild;
+        }
+
+        INIT_LIST_HEAD(&child->list);
+
+        i++;
+        trav = trav->next;
+    }
+
+    return 0;
+
+freechild:
+    br_free_children(this, priv, i);
+err:
+    return -1;
+}
+
+int32_t
+init(xlator_t *this)
+{
+    int32_t ret = -1;
+    br_private_t *priv = NULL;
+
+    if (!this->children) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL);
+        goto out;
+    }
+
+    priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t);
+    if (!priv) {
+        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL);
+        goto out;
+    }
+
+    GF_OPTION_INIT("scrubber", priv->iamscrubber, bool, free_priv);
+
+    ret = br_init_children(this, priv);
+    if (ret)
+        goto free_priv;
+
+    pthread_mutex_init(&priv->lock, NULL);
+    pthread_cond_init(&priv->cond, NULL);
+
+    INIT_LIST_HEAD(&priv->bricks);
+    INIT_LIST_HEAD(&priv->signing);
+
+    priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
+    if (!priv->timer_wheel) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
+                NULL);
+        goto cleanup;
+    }
+
+    this->private = priv;
+
+    if (!priv->iamscrubber) {
+        ret = br_signer_init(this, priv);
+        if (!ret)
+            ret = br_signer_handle_options(this, priv, NULL);
+    } else {
+        ret = br_scrubber_init(this, priv);
+        if (!ret)
+            ret = br_scrubber_handle_options(this, priv, NULL);
+    }
+
+    if (ret)
+        goto cleanup;
+
+    ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this,
+                           "brhevent");
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED,
+                NULL);
+        ret = -1;
+    }
+
+    if (!ret) {
+        gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s",
+                (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL);
+        return 0;
+    }
+
+cleanup:
+    (void)pthread_cond_destroy(&priv->cond);
+    (void)pthread_mutex_destroy(&priv->lock);
+
+    br_free_children(this, priv, priv->child_count);
+
+free_priv:
+    GF_FREE(priv);
+out:
+    this->private = NULL;
+    return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+    br_private_t *priv = this->private;
+
+    if (!priv)
+        return;
+
+    if (!priv->iamscrubber)
+        br_fini_signer(this, priv);
+    else
+        (void)br_free_scrubber_monitor(this, priv);
+
+    br_free_children(this, priv, priv->child_count);
+
+    this->private = NULL;
+    GF_FREE(priv);
+
+    glusterfs_ctx_tw_put(this->ctx);
+
+    return;
+}
+
+static void
+br_reconfigure_monitor(xlator_t *this)
+{
+    int32_t ret = 0;
+
+    ret = br_scrub_state_machine(this, _gf_false);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+                NULL);
+    }
+}
+
+static int
+br_reconfigure_scrubber(xlator_t *this, dict_t *options)
+{
+    int32_t ret = -1;
+    br_private_t *priv = NULL;
+
+    priv = this->private;
+
+    pthread_mutex_lock(&priv->lock);
+    {
+        ret = br_scrubber_handle_options(this, priv, options);
+    }
+    pthread_mutex_unlock(&priv->lock);
+
+    if (ret)
+        goto err;
+
+    /* change state for all _up_ subvolume(s) */
+    pthread_mutex_lock(&priv->lock);
+    {
+        br_reconfigure_monitor(this);
+    }
+    pthread_mutex_unlock(&priv->lock);
+
+err:
+    return ret;
+}
+
+static int
+br_reconfigure_signer(xlator_t *this, dict_t *options)
+{
+    br_private_t *priv = this->private;
+
+    return br_signer_handle_options(this, priv, options);
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+    int ret = 0;
+    br_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (priv->iamscrubber)
+        ret = br_reconfigure_scrubber(this, options);
+    else
+        ret = br_reconfigure_signer(this, options);
+
+    return ret;
+}
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+    {
+        .key = {"expiry-time"},
+        .type = GF_OPTION_TYPE_INT,
+        .default_value = SIGNING_TIMEOUT,
+        .op_version = {GD_OP_VERSION_3_7_0},
+        .flags = OPT_FLAG_SETTABLE,
+        .description = "Waiting time for an object on which it waits "
+                       "before it is signed",
+    },
+    {
+        .key = {"brick-count"},
+        .type = GF_OPTION_TYPE_STR,
+        .description = "Total number of bricks for the current node for "
+                       "all volumes in the trusted storage pool.",
+    },
+    {
+        .key = {"scrubber", "scrub"},
+        .type = GF_OPTION_TYPE_BOOL,
+        .default_value = "false",
+        .op_version = {GD_OP_VERSION_3_7_0},
+        .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE,
+        .description = "option to run as a scrubber",
+    },
+    {
+        .key = {"scrub-throttle"},
+        .type = GF_OPTION_TYPE_STR,
+        .default_value = "lazy",
+        .op_version = {GD_OP_VERSION_3_7_0},
+        .flags = OPT_FLAG_SETTABLE,
+        .description = "Scrub-throttle value is a measure of how fast "
+                       "or slow the scrubber scrubs the filesystem for "
+                       "volume <VOLNAME>",
+    },
+    {
+        .key = {"scrub-freq"},
+        .type = GF_OPTION_TYPE_STR,
+        .default_value = "biweekly",
+        .op_version = {GD_OP_VERSION_3_7_0},
+        .flags = OPT_FLAG_SETTABLE,
+        .description = "Scrub frequency for volume <VOLNAME>",
+    },
+    {
+        .key = {"scrub-state"},
+        .type = GF_OPTION_TYPE_STR,
+        .default_value = "active",
+        .op_version = {GD_OP_VERSION_4_0_0},
+        .flags = OPT_FLAG_SETTABLE,
+        .description = "Pause/Resume scrub. Upon resume, scrubber "
+                       "continues from where it left off.",
+    },
+    {
+        .key = {"signer-threads"},
+        .type = GF_OPTION_TYPE_INT,
+        .default_value = BR_WORKERS,
+        .op_version = {GD_OP_VERSION_8_0},
+        .flags = OPT_FLAG_SETTABLE,
+        .description = "Number of signing process threads. As a best "
+                       "practice, set this to the number of processor cores",
+    },
+    {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+    .init = init,
+    .fini = fini,
+    .notify = notify,
+    .reconfigure = reconfigure,
+    .mem_acct_init = mem_acct_init,
+    .op_version = {1}, /* Present from the initial version */
+    .fops = &fops,
+    .cbks = &cbks,
+    .options = options,
+    .identifier = "bit-rot",
+    .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
new file mode 100644
index 00000000000..8ac7dcdac3d
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -0,0 +1,302 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_H__
+#define __BIT_ROT_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include "changelog.h"
+#include "timer-wheel.h"
+
+#include <glusterfs/throttle-tbf.h>
+#include "bit-rot-ssm.h"
+
+#include "bit-rot-common.h"
+#include "bit-rot-stub-mem-types.h"
+#include "bit-rot-scrub-status.h"
+
+#include <openssl/sha.h>
+
+typedef enum scrub_throttle {
+    BR_SCRUB_THROTTLE_VOID = -1,
+    BR_SCRUB_THROTTLE_LAZY = 0,
+    BR_SCRUB_THROTTLE_NORMAL = 1,
+    BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
+    BR_SCRUB_THROTTLE_STALLED = 3,
+} scrub_throttle_t;
+
+typedef enum scrub_freq {
+    BR_FSSCRUB_FREQ_HOURLY = 1,
+    BR_FSSCRUB_FREQ_DAILY,
+    BR_FSSCRUB_FREQ_WEEKLY,
+    BR_FSSCRUB_FREQ_BIWEEKLY,
+    BR_FSSCRUB_FREQ_MONTHLY,
+    BR_FSSCRUB_FREQ_MINUTE,
+    BR_FSSCRUB_FREQ_STALLED,
+} scrub_freq_t;
+
+#define signature_size(hl) (sizeof(br_isignature_t) + hl + 1)
+
+struct br_scanfs {
+    gf_lock_t entrylock;
+
+    pthread_mutex_t waitlock;
+    pthread_cond_t waitcond;
+
+    unsigned int entries;
+    struct list_head queued;
+    struct list_head ready;
+};
+
+/* just need three states to track child status */
+typedef enum br_child_state {
+    BR_CHILD_STATE_CONNECTED = 1,
+    BR_CHILD_STATE_INITIALIZING,
+    BR_CHILD_STATE_CONNFAILED,
+    BR_CHILD_STATE_DISCONNECTED,
+} br_child_state_t;
+
+struct br_child {
+    pthread_mutex_t lock;     /* protects child state */
+    char witnessed;           /* witnessed at least one successful
+                                 connection */
+    br_child_state_t c_state; /* current state of this child */
+
+    char child_up;             /* Indicates whether this child is
+                                  up or not */
+    xlator_t *xl;              /* client xlator corresponding to
+                                  this child */
+    inode_table_t *table;      /* inode table for this child */
+    char brick_path[PATH_MAX]; /* brick export directory of this
+                                  child */
+    struct list_head list;     /* hook to attach to the list of
+                                  UP children */
+    xlator_t *this;            /* Bit rot xlator */
+
+    pthread_t thread;  /* initial crawler for unsigned
+                          object(s) or scrub crawler */
+    int threadrunning; /* active thread */
+
+    struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
+
+    struct timeval tv;
+
+    struct br_scanfs fsscan; /* per subvolume FS scanner */
+
+    gf_boolean_t active_scrubbing; /* Actively scrubbing or not */
+};
+
+typedef struct br_child br_child_t;
+
+struct br_obj_n_workers {
+    struct list_head objects; /* queue of objects expired from the
+                                 timer wheel and ready to be picked
+                                 up for signing */
+    pthread_t *workers;       /* Threads which pick up the objects
+                                 from the above queue and start
+                                 signing each object */
+};
+
+struct br_scrubber {
+    xlator_t *this;
+
+    scrub_throttle_t throttle;
+
+    /**
+     * frequency of scanning for this subvolume. this should
+     * normally be per-child, but since all children follow the
+     * same frequency for a volume, this option ends up here
+     * instead of br_child_t.
+     */
+    scrub_freq_t frequency;
+
+    gf_boolean_t frequency_reconf;
+    gf_boolean_t throttle_reconf;
+
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+
+    unsigned int nr_scrubbers;
+    struct list_head scrubbers;
+
+    /**
+     * list of "rotatable" subvolume(s) undergoing scrubbing
+     */
+    struct list_head scrublist;
+};
+
+struct br_monitor {
+    gf_lock_t lock;
+    pthread_t thread; /* Monitor thread */
+
+    gf_boolean_t inited;
+    pthread_mutex_t mutex;
+    pthread_cond_t cond; /* Thread starts and will be waiting on cond.
+                            First child which is up wakes this up */
+
+    xlator_t *this;
+    /* scheduler */
+    uint32_t boot;
+
+    int32_t active_child_count; /* Number of children currently scrubbing */
+    gf_boolean_t kick;          /* This variable tracks the scrubber is
+                                 * kicked or not. Both 'kick' and
+                                 * 'active_child_count' uses the same pair
+                                 * of mutex-cond variable, i.e, wakelock and
+                                 * wakecond. */
+
+    pthread_mutex_t wakelock;
+    pthread_cond_t wakecond;
+
+    gf_boolean_t done;
+    pthread_mutex_t donelock;
+    pthread_cond_t donecond;
+
+    struct gf_tw_timer_list *timer;
+    br_scrub_state_t state; /* current scrub state */
+};
+
+typedef struct br_obj_n_workers br_obj_n_workers_t;
+
+typedef struct br_private br_private_t;
+
+typedef void (*br_scrubbed_file_update)(br_private_t *priv);
+
+struct br_private {
+    pthread_mutex_t lock;
+
+    struct list_head bricks; /* list of bricks from which enents
+                                have been received */
+
+    struct list_head signing;
+
+    pthread_cond_t object_cond; /* handling signing of objects */
+    int child_count;
+    br_child_t *children; /* list of subvolumes */
+    int up_children;
+
+    pthread_cond_t cond; /* handling CHILD_UP notifications */
+    pthread_t thread;    /* thread for connecting each UP
+                            child with changelog */
+
+    struct tvec_base *timer_wheel; /* timer wheel where the objects which
+                                      changelog has sent sits and waits
+                                      for expiry */
+    br_obj_n_workers_t *obj_queue; /* place holder for all the objects
+                                      that are expired from timer wheel
+                                      and ready to be picked up for
+                                      signing and the workers which sign
+                                      the objects */
+
+    uint32_t expiry_time; /* objects "wait" time */
+
+    uint32_t signer_th_count; /* Number of signing process threads */
+
+    tbf_t *tbf; /* token bucket filter */
+
+    gf_boolean_t iamscrubber; /* function as a fs scrubber */
+
+    struct br_scrub_stats scrub_stat; /* statistics of scrub*/
+
+    struct br_scrubber fsscrub; /* scrubbers for this subvolume */
+
+    struct br_monitor scrub_monitor; /* scrubber monitor */
+};
+
+struct br_object {
+    xlator_t *this;
+
+    uuid_t gfid;
+
+    unsigned long signedversion; /* version against which this object will
+                                    be signed */
+    br_child_t *child;           /* object's subvolume */
+
+    int sign_info;
+
+    struct list_head list; /* hook to add to the queue once the
+                              object is expired from timer wheel */
+    void *data;
+};
+
+typedef struct br_object br_object_t;
+typedef int32_t(br_scrub_ssm_call)(xlator_t *);
+
+void
+br_log_object(xlator_t *, char *, uuid_t, int32_t);
+
+void
+br_log_object_path(xlator_t *, char *, const char *, int32_t);
+
+int32_t
+br_calculate_obj_checksum(unsigned char *, br_child_t *, fd_t *, struct iatt *);
+
+int32_t
+br_prepare_loc(xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
+
+gf_boolean_t
+bitd_is_bad_file(xlator_t *, br_child_t *, loc_t *, fd_t *);
+
+static inline void
+_br_set_child_state(br_child_t *child, br_child_state_t state)
+{
+    child->c_state = state;
+}
+
+static inline int
+_br_is_child_connected(br_child_t *child)
+{
+    return (child->c_state == BR_CHILD_STATE_CONNECTED);
+}
+
+static inline int
+_br_is_child_scrub_active(br_child_t *child)
+{
+    return child->active_scrubbing;
+}
+
+static inline int
+_br_child_failed_conn(br_child_t *child)
+{
+    return (child->c_state == BR_CHILD_STATE_CONNFAILED);
+}
+
+static inline int
+_br_child_witnessed_connection(br_child_t *child)
+{
+    return (child->witnessed == 1);
+}
+
+/* scrub state */
+static inline void
+_br_monitor_set_scrub_state(struct br_monitor *scrub_monitor,
+                            br_scrub_state_t state)
+{
+    scrub_monitor->state = state;
+}
+
+static inline br_scrub_event_t
+_br_child_get_scrub_event(struct br_scrubber *fsscrub)
+{
+    return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
+               ? BR_SCRUB_EVENT_PAUSE
+               : BR_SCRUB_EVENT_SCHEDULE;
+}
+
+int32_t
+br_get_bad_objects_list(xlator_t *this, dict_t **dict);
+
+#endif /* __BIT_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/stub/Makefile.am b/xlators/features/bit-rot/src/stub/Makefile.am
new file mode 100644
index 00000000000..f13de7145fc
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/Makefile.am
@@ -0,0 +1,20 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = bitrot-stub.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bitrot_stub_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+bitrot_stub_la_SOURCES = bit-rot-stub-helpers.c bit-rot-stub.c
+bitrot_stub_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = bit-rot-stub.h bit-rot-common.h bit-rot-stub-mem-types.h \
+	bit-rot-object-version.h bit-rot-stub-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+	-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+	-I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
new file mode 100644
index 00000000000..20561aa7764
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -0,0 +1,178 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_COMMON_H__
+#define __BIT_ROT_COMMON_H__
+
+#include <glusterfs/glusterfs.h>
+#include "bit-rot-object-version.h"
+
+#define BR_VXATTR_VERSION (1 << 0)
+#define BR_VXATTR_SIGNATURE (1 << 1)
+
+#define BR_VXATTR_SIGN_MISSING (BR_VXATTR_SIGNATURE)
+#define BR_VXATTR_ALL_MISSING (BR_VXATTR_VERSION | BR_VXATTR_SIGNATURE)
+
+#define BR_BAD_OBJ_CONTAINER                                                   \
+    (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }
+
+typedef enum br_vxattr_state {
+    BR_VXATTR_STATUS_FULL = 0,
+    BR_VXATTR_STATUS_MISSING = 1,
+    BR_VXATTR_STATUS_UNSIGNED = 2,
+    BR_VXATTR_STATUS_INVALID = 3,
+} br_vxattr_status_t;
+
+typedef enum br_sign_state {
+    BR_SIGN_INVALID = -1,
+    BR_SIGN_NORMAL = 0,
+    BR_SIGN_REOPEN_WAIT = 1,
+    BR_SIGN_QUICK = 2,
+} br_sign_state_t;
+
+static inline br_vxattr_status_t
+br_version_xattr_state(dict_t *xattr, br_version_t **obuf,
+                       br_signature_t **sbuf, gf_boolean_t *objbad)
+{
+    int32_t ret = 0;
+    int32_t vxattr = 0;
+    br_vxattr_status_t status;
+    void *data = NULL;
+
+    /**
+     * The key being present in the dict indicates the xattr was set on
+     * disk. The presence of xattr itself as of now is suffecient to say
+     * the the object is bad.
+     */
+    *objbad = _gf_false;
+    ret = dict_get_bin(xattr, BITROT_OBJECT_BAD_KEY, (void **)&data);
+    if (!ret)
+        *objbad = _gf_true;
+
+    ret = dict_get_bin(xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf);
+    if (ret)
+        vxattr |= BR_VXATTR_VERSION;
+
+    ret = dict_get_bin(xattr, BITROT_SIGNING_VERSION_KEY, (void **)sbuf);
+    if (ret)
+        vxattr |= BR_VXATTR_SIGNATURE;
+
+    switch (vxattr) {
+        case 0:
+            status = BR_VXATTR_STATUS_FULL;
+            break;
+        case BR_VXATTR_SIGN_MISSING:
+            status = BR_VXATTR_STATUS_UNSIGNED;
+            break;
+        case BR_VXATTR_ALL_MISSING:
+            status = BR_VXATTR_STATUS_MISSING;
+            break;
+        default:
+            status = BR_VXATTR_STATUS_INVALID;
+    }
+
+    return status;
+}
+
+/**
+ * in-memory representation of signature used by signer for object
+ * signing.
+ */
+typedef struct br_isignature_in {
+    int8_t signaturetype; /* signature type            */
+
+    unsigned long signedversion; /* version against which the
+                                    object was signed         */
+
+    size_t signaturelen; /* signature length          */
+    char signature[0];   /* object signature          */
+} br_isignature_t;
+
+/**
+ * in-memory representation of signature used by scrubber for object
+ * verification.
+ */
+typedef struct br_isignature_out {
+    char stale; /* stale signature?          */
+
+    unsigned long version; /* current signed version    */
+
+    uint32_t time[2]; /* time when the object
+                         got dirtied               */
+
+    int8_t signaturetype; /* hash type                 */
+    size_t signaturelen;  /* signature length          */
+    char signature[0];    /* signature (hash)          */
+} br_isignature_out_t;
+
+typedef struct br_stub_init {
+    uint32_t timebuf[2];
+    char export[PATH_MAX];
+} br_stub_init_t;
+
+typedef enum {
+    BR_SIGNATURE_TYPE_VOID = -1,  /* object is not signed       */
+    BR_SIGNATURE_TYPE_ZERO = 0,   /* min boundary               */
+    BR_SIGNATURE_TYPE_SHA256 = 1, /* signed with SHA256         */
+    BR_SIGNATURE_TYPE_MAX = 2,    /* max boundary               */
+} br_signature_type;
+
+/* BitRot stub start time (virtual xattr) */
+#define GLUSTERFS_GET_BR_STUB_INIT_TIME "trusted.glusterfs.bit-rot.stub-init"
+
+/* signing/reopen hint */
+#define BR_OBJECT_RESIGN 0
+#define BR_OBJECT_REOPEN 1
+#define BR_REOPEN_SIGN_HINT_KEY "trusted.glusterfs.bit-rot.reopen-hint"
+
+static inline int
+br_is_signature_type_valid(int8_t signaturetype)
+{
+    return ((signaturetype > BR_SIGNATURE_TYPE_ZERO) &&
+            (signaturetype < BR_SIGNATURE_TYPE_MAX));
+}
+
+static inline void
+br_set_default_ongoingversion(br_version_t *buf, uint32_t *tv)
+{
+    buf->ongoingversion = BITROT_DEFAULT_CURRENT_VERSION;
+    buf->timebuf[0] = tv[0];
+    buf->timebuf[1] = tv[1];
+}
+
+static inline void
+br_set_default_signature(br_signature_t *buf, size_t *size)
+{
+    buf->signaturetype = (int8_t)BR_SIGNATURE_TYPE_VOID;
+    buf->signedversion = BITROT_DEFAULT_SIGNING_VERSION;
+
+    *size = sizeof(br_signature_t); /* no signature */
+}
+
+static inline void
+br_set_ongoingversion(br_version_t *buf, unsigned long version, uint32_t *tv)
+{
+    buf->ongoingversion = version;
+    buf->timebuf[0] = tv[0];
+    buf->timebuf[1] = tv[1];
+}
+
+static inline void
+br_set_signature(br_signature_t *buf, br_isignature_t *sign,
+                 size_t signaturelen, size_t *size)
+{
+    buf->signaturetype = sign->signaturetype;
+    buf->signedversion = ntohl(sign->signedversion);
+
+    memcpy(buf->signature, sign->signature, signaturelen);
+    *size = sizeof(br_signature_t) + signaturelen;
+}
+
+#endif /* __BIT_ROT_COMMON_H__ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-object-version.h b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h
new file mode 100644
index 00000000000..7ae6a5200df
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h
@@ -0,0 +1,30 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_OBJECT_VERSION_H
+#define __BIT_ROT_OBJECT_VERSION_H
+
+/**
+ * on-disk formats for ongoing version and object signature.
+ */
+typedef struct br_version {
+    unsigned long ongoingversion;
+    uint32_t timebuf[2];
+} br_version_t;
+
+typedef struct __attribute__((__packed__)) br_signature {
+    int8_t signaturetype;
+
+    unsigned long signedversion;
+
+    char signature[0];
+} br_signature_t;
+
+#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
new file mode 100644
index 00000000000..8ac13a09941
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
@@ -0,0 +1,796 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-stub.h"
+
+br_stub_fd_t *
+br_stub_fd_new(void)
+{
+    br_stub_fd_t *br_stub_fd = NULL;
+
+    br_stub_fd = GF_CALLOC(1, sizeof(*br_stub_fd), gf_br_stub_mt_br_stub_fd_t);
+
+    return br_stub_fd;
+}
+
+int
+__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd)
+{
+    uint64_t value = 0;
+    int ret = -1;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd, out);
+    GF_VALIDATE_OR_GOTO(this->name, br_stub_fd, out);
+
+    value = (uint64_t)(long)br_stub_fd;
+
+    ret = __fd_ctx_set(fd, this, value);
+
+out:
+    return ret;
+}
+
+br_stub_fd_t *
+__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+    br_stub_fd_t *br_stub_fd = NULL;
+    uint64_t value = 0;
+    int ret = -1;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+    ret = __fd_ctx_get(fd, this, &value);
+    if (ret)
+        return NULL;
+
+    br_stub_fd = (br_stub_fd_t *)((long)value);
+
+out:
+    return br_stub_fd;
+}
+
+br_stub_fd_t *
+br_stub_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+    br_stub_fd_t *br_stub_fd = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+    LOCK(&fd->lock);
+    {
+        br_stub_fd = __br_stub_fd_ctx_get(this, fd);
+    }
+    UNLOCK(&fd->lock);
+
+out:
+    return br_stub_fd;
+}
+
+int32_t
+br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd)
+{
+    int32_t ret = -1;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, fd, out);
+    GF_VALIDATE_OR_GOTO(this->name, br_stub_fd, out);
+
+    LOCK(&fd->lock);
+    {
+        ret = __br_stub_fd_ctx_set(this, fd, br_stub_fd);
+    }
+    UNLOCK(&fd->lock);
+
+out:
+    return ret;
+}
+
+/**
+ * Adds an entry to the bad objects directory.
+ * @gfid: gfid of the bad object being added to the bad objects directory
+ */
+int
+br_stub_add(xlator_t *this, uuid_t gfid)
+{
+    char gfid_path[BR_PATH_MAX_PLUS] = {0};
+    char bad_gfid_path[BR_PATH_MAX_PLUS] = {0};
+    int ret = 0;
+    br_stub_private_t *priv = NULL;
+    struct stat st = {0};
+
+    priv = this->private;
+    GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out,
+                                  errno, EINVAL);
+
+    snprintf(gfid_path, sizeof(gfid_path), "%s/%s", priv->stub_basepath,
+             uuid_utoa(gfid));
+
+    ret = sys_stat(gfid_path, &st);
+    if (!ret)
+        goto out;
+    snprintf(bad_gfid_path, sizeof(bad_gfid_path), "%s/stub-%s",
+             priv->stub_basepath, uuid_utoa(priv->bad_object_dir_gfid));
+
+    ret = sys_link(bad_gfid_path, gfid_path);
+    if (ret) {
+        if ((errno != ENOENT) && (errno != EMLINK) && (errno != EEXIST))
+            goto out;
+
+        /*
+         * Continue with success. At least we'll have half of the
+         * functionality, in the sense, object is marked bad and
+         * would be inaccessible. It's only scrub status that would
+         * show up less number of objects. That's fine as we'll have
+         * the log files that will have the missing information.
+         */
+        gf_smsg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, "gfid=%s",
+                uuid_utoa(gfid), NULL);
+    }
+
+    return 0;
+out:
+    return -1;
+}
+
+int
+br_stub_del(xlator_t *this, uuid_t gfid)
+{
+    int32_t op_errno __attribute__((unused)) = 0;
+    br_stub_private_t *priv = NULL;
+    int ret = 0;
+    char gfid_path[BR_PATH_MAX_PLUS] = {0};
+
+    priv = this->private;
+    GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out,
+                                  op_errno, EINVAL);
+    snprintf(gfid_path, sizeof(gfid_path), "%s/%s", priv->stub_basepath,
+             uuid_utoa(gfid));
+    ret = sys_unlink(gfid_path);
+    if (ret && (errno != ENOENT)) {
+        gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL,
+                "path=%s", gfid_path, NULL);
+        ret = -errno;
+        goto out;
+    }
+
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static int
+br_stub_check_stub_directory(xlator_t *this, char *fullpath)
+{
+    int ret = 0;
+    struct stat st = {
+        0,
+    };
+    char oldpath[BR_PATH_MAX_PLUS] = {0};
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    snprintf(oldpath, sizeof(oldpath), "%s/%s", priv->export,
+             OLD_BR_STUB_QUARANTINE_DIR);
+
+    ret = sys_stat(fullpath, &st);
+    if (!ret && !S_ISDIR(st.st_mode))
+        goto error_return;
+    if (ret) {
+        if (errno != ENOENT)
+            goto error_return;
+        ret = sys_stat(oldpath, &st);
+        if (ret)
+            ret = mkdir_p(fullpath, 0600, _gf_true);
+        else
+            ret = sys_rename(oldpath, fullpath);
+    }
+
+    if (ret)
+        gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+                "create-path=%s", fullpath, NULL);
+    return ret;
+
+error_return:
+    gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+            "verify-path=%s", fullpath, NULL);
+    return -1;
+}
+
+/**
+ * Function to create the container for the bad objects within the bad objects
+ * directory.
+ */
+static int
+br_stub_check_stub_file(xlator_t *this, char *path)
+{
+    int ret = 0;
+    int fd = -1;
+    struct stat st = {
+        0,
+    };
+
+    ret = sys_stat(path, &st);
+    if (!ret && !S_ISREG(st.st_mode))
+        goto error_return;
+    if (ret) {
+        if (errno != ENOENT)
+            goto error_return;
+        fd = sys_creat(path, 0);
+        if (fd < 0)
+            gf_smsg(this->name, GF_LOG_ERROR, errno,
+                    BRS_MSG_BAD_OBJECT_DIR_FAIL, "create-path=%s", path, NULL);
+    }
+
+    if (fd >= 0) {
+        sys_close(fd);
+        ret = 0;
+    }
+
+    return ret;
+
+error_return:
+    gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+            "verify-path=%s", path, NULL);
+    return -1;
+}
+
+int
+br_stub_dir_create(xlator_t *this, br_stub_private_t *priv)
+{
+    int ret = -1;
+    char fullpath[BR_PATH_MAX_PLUS] = {
+        0,
+    };
+    char stub_gfid_path[BR_PATH_MAX_PLUS] = {
+        0,
+    };
+
+    gf_uuid_copy(priv->bad_object_dir_gfid, BR_BAD_OBJ_CONTAINER);
+
+    if (snprintf(fullpath, sizeof(fullpath), "%s", priv->stub_basepath) >=
+        sizeof(fullpath))
+        goto out;
+
+    if (snprintf(stub_gfid_path, sizeof(stub_gfid_path), "%s/stub-%s",
+                 priv->stub_basepath, uuid_utoa(priv->bad_object_dir_gfid)) >=
+        sizeof(stub_gfid_path))
+        goto out;
+
+    ret = br_stub_check_stub_directory(this, fullpath);
+    if (ret)
+        goto out;
+    ret = br_stub_check_stub_file(this, stub_gfid_path);
+    if (ret)
+        goto out;
+
+    return 0;
+
+out:
+    return -1;
+}
+
+call_stub_t *
+__br_stub_dequeue(struct list_head *callstubs)
+{
+    call_stub_t *stub = NULL;
+
+    if (!list_empty(callstubs)) {
+        stub = list_entry(callstubs->next, call_stub_t, list);
+        list_del_init(&stub->list);
+    }
+
+    return stub;
+}
+
+void
+__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub)
+{
+    list_add_tail(&stub->list, callstubs);
+}
+
+void
+br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub)
+{
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+    pthread_mutex_lock(&priv->container.bad_lock);
+    {
+        __br_stub_enqueue(&priv->container.bad_queue, stub);
+        pthread_cond_signal(&priv->container.bad_cond);
+    }
+    pthread_mutex_unlock(&priv->container.bad_lock);
+}
+
+void *
+br_stub_worker(void *data)
+{
+    br_stub_private_t *priv = NULL;
+    xlator_t *this = NULL;
+    call_stub_t *stub = NULL;
+
+    THIS = data;
+    this = data;
+    priv = this->private;
+
+    for (;;) {
+        pthread_mutex_lock(&priv->container.bad_lock);
+        {
+            while (list_empty(&priv->container.bad_queue)) {
+                (void)pthread_cond_wait(&priv->container.bad_cond,
+                                        &priv->container.bad_lock);
+            }
+
+            stub = __br_stub_dequeue(&priv->container.bad_queue);
+        }
+        pthread_mutex_unlock(&priv->container.bad_lock);
+
+        if (stub) /* guard against spurious wakeups */
+            call_resume(stub);
+    }
+
+    return NULL;
+}
+
+int32_t
+br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                       dict_t *xattr_req)
+{
+    br_stub_private_t *priv = NULL;
+    struct stat lstatbuf = {0};
+    int ret = 0;
+    int32_t op_errno = EINVAL;
+    int32_t op_ret = -1;
+    struct iatt stbuf = {
+        0,
+    };
+    struct iatt postparent = {
+        0,
+    };
+    dict_t *xattr = NULL;
+    gf_boolean_t ver_enabled = _gf_false;
+
+    BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+    priv = this->private;
+    BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), done);
+
+    VALIDATE_OR_GOTO(loc, done);
+    if (gf_uuid_compare(loc->gfid, priv->bad_object_dir_gfid))
+        goto done;
+
+    ret = sys_lstat(priv->stub_basepath, &lstatbuf);
+    if (ret) {
+        gf_msg_debug(this->name, errno,
+                     "Stat failed on stub bad "
+                     "object dir");
+        op_errno = errno;
+        goto done;
+    } else if (!S_ISDIR(lstatbuf.st_mode)) {
+        gf_msg_debug(this->name, errno,
+                     "bad object container is not "
+                     "a directory");
+        op_errno = ENOTDIR;
+        goto done;
+    }
+
+    iatt_from_stat(&stbuf, &lstatbuf);
+    gf_uuid_copy(stbuf.ia_gfid, priv->bad_object_dir_gfid);
+
+    op_ret = op_errno = 0;
+    xattr = dict_new();
+    if (!xattr) {
+        op_ret = -1;
+        op_errno = ENOMEM;
+    }
+
+done:
+    STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, loc->inode, &stbuf,
+                        xattr, &postparent);
+    if (xattr)
+        dict_unref(xattr);
+    return 0;
+}
+
+static int
+is_bad_gfid_file_current(char *filename, uuid_t gfid)
+{
+    char current_stub_gfid[GF_UUID_BUF_SIZE + 16] = {
+        0,
+    };
+
+    snprintf(current_stub_gfid, sizeof current_stub_gfid, "stub-%s",
+             uuid_utoa(gfid));
+    return (!strcmp(filename, current_stub_gfid));
+}
+
+static void
+check_delete_stale_bad_file(xlator_t *this, char *filename)
+{
+    int ret = 0;
+    struct stat st = {0};
+    char filepath[BR_PATH_MAX_PLUS] = {0};
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (is_bad_gfid_file_current(filename, priv->bad_object_dir_gfid))
+        return;
+
+    snprintf(filepath, sizeof(filepath), "%s/%s", priv->stub_basepath,
+             filename);
+
+    ret = sys_stat(filepath, &st);
+    if (!ret && st.st_nlink == 1)
+        sys_unlink(filepath);
+}
+
+static int
+br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
+                     size_t size, gf_dirent_t *entries)
+{
+    off_t in_case = -1;
+    off_t last_off = 0;
+    size_t filled = 0;
+    int count = 0;
+    int32_t this_size = -1;
+    gf_dirent_t *this_entry = NULL;
+    xlator_t *this = NULL;
+    struct dirent *entry = NULL;
+    struct dirent scratch[2] = {
+        {
+            0,
+        },
+    };
+
+    this = THIS;
+    if (!off) {
+        rewinddir(dir);
+    } else {
+        seekdir(dir, off);
+#ifndef GF_LINUX_HOST_OS
+        if ((u_long)telldir(dir) != off && off != fctx->bad_object.dir_eof) {
+            gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "off=(0x%llx)", off,
+                    "dir=%p", dir, NULL);
+            errno = EINVAL;
+            count = -1;
+            goto out;
+        }
+#endif /* GF_LINUX_HOST_OS */
+    }
+
+    while (filled <= size) {
+        in_case = (u_long)telldir(dir);
+
+        if (in_case == -1) {
+            gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, "dir=%p", dir, "err=%s",
+                    strerror(errno), NULL);
+            goto out;
+        }
+
+        errno = 0;
+        entry = sys_readdir(dir, scratch);
+        if (!entry || errno != 0) {
+            if (errno == EBADF) {
+                gf_smsg(THIS->name, GF_LOG_WARNING, 0,
+                        BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, "dir=%p", dir,
+                        "err=%s", strerror(errno), NULL);
+                goto out;
+            }
+            break;
+        }
+
+        if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+            continue;
+
+        if (!strncmp(entry->d_name, "stub-", strlen("stub-"))) {
+            check_delete_stale_bad_file(this, entry->d_name);
+            continue;
+        }
+
+        this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
+                    strlen(entry->d_name) + 1;
+
+        if (this_size + filled > size) {
+            seekdir(dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+            if ((u_long)telldir(dir) != in_case &&
+                in_case != fctx->bad_object.dir_eof) {
+                gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+                        BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "in_case=(0x%llx)",
+                        in_case, "dir=%p", dir, NULL);
+                errno = EINVAL;
+                count = -1;
+                goto out;
+            }
+#endif /* GF_LINUX_HOST_OS */
+            break;
+        }
+
+        this_entry = gf_dirent_for_name(entry->d_name);
+
+        if (!this_entry) {
+            gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_CREATE_GF_DIRENT_FAILED, "entry-name=%s",
+                    entry->d_name, "err=%s", strerror(errno), NULL);
+            goto out;
+        }
+        /*
+         * we store the offset of next entry here, which is
+         * probably not intended, but code using syncop_readdir()
+         * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+         * for directory read resumption.
+         */
+        last_off = (u_long)telldir(dir);
+        this_entry->d_off = last_off;
+        this_entry->d_ino = entry->d_ino;
+
+        list_add_tail(&this_entry->list, &entries->list);
+
+        filled += this_size;
+        count++;
+    }
+
+    if ((!sys_readdir(dir, scratch) && (errno == 0))) {
+        /* Indicate EOF */
+        errno = ENOENT;
+        /* Remember EOF offset for later detection */
+        fctx->bad_object.dir_eof = last_off;
+    }
+out:
+    return count;
+}
+
+int32_t
+br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                        size_t size, off_t off, dict_t *xdata)
+{
+    br_stub_fd_t *fctx = NULL;
+    DIR *dir = NULL;
+    int ret = -1;
+    int32_t op_ret = -1;
+    int32_t op_errno = 0;
+    int count = 0;
+    gf_dirent_t entries;
+    gf_boolean_t xdata_unref = _gf_false;
+    dict_t *dict = NULL;
+
+    INIT_LIST_HEAD(&entries.list);
+
+    fctx = br_stub_fd_ctx_get(this, fd);
+    if (!fctx) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED,
+                "fd=%p", fd, NULL);
+        op_errno = -ret;
+        goto done;
+    }
+
+    dir = fctx->bad_object.dir;
+
+    if (!dir) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL,
+                "fd=%p", fd, NULL);
+        op_errno = EINVAL;
+        goto done;
+    }
+
+    count = br_stub_fill_readdir(fd, fctx, dir, off, size, &entries);
+
+    /* pick ENOENT to indicate EOF */
+    op_errno = errno;
+    op_ret = count;
+
+    dict = xdata;
+    (void)br_stub_bad_objects_path(this, fd, &entries, &dict);
+    if (!xdata && dict) {
+        xdata = dict;
+        xdata_unref = _gf_true;
+    }
+
+done:
+    STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, xdata);
+    gf_dirent_free(&entries);
+    if (xdata_unref)
+        dict_unref(xdata);
+    return 0;
+}
+
+/**
+ * This function is called to mainly obtain the paths of the corrupt
+ * objects (files as of now). Currently scrub status prints only the
+ * gfid of the corrupted files. Reason is, bitrot-stub maintains the
+ * list of the corrupted objects as entries inside the quarantine
+ * directory (<brick export>/.glusterfs/quarantine)
+ *
+ * And the name of each entry in the qurantine directory is the gfid
+ * of the corrupted object. So scrub status will just show that info.
+ * But it helps the users a lot if the actual path to the object is
+ * also reported. Hence the below function to get that information.
+ * The function allocates a new dict to be returned (if it does not
+ * get one from the caller of readdir i.e. scrubber as of now), and
+ * stores the paths of each corrupted gfid there. The gfid is used as
+ * the key and path is used as the value.
+ *
+ * NOTE: The path will be there in following situations
+ * 1) gfid2path option has been enabled (posix xlator option)
+ *    and the corrupted file contains the path as an extended
+ *    attribute.
+ * 2) If the gfid2path option is not enabled, OR if the xattr
+ *    is absent, then the inode table should have it.
+ *    The path will be there if a name based lookup has happened
+ *    on the file which has been corrupted. With lookup a inode and
+ *    dentry would be created in the inode table. And the path is
+ *    constructed using the in memory inode and dentry. If a lookup
+ *    has not happened OR the inode corresponding to the corrupted
+ *    file does not exist in the inode table (because it got purged
+ *    as lru limit of the inodes exceeded) OR a nameless lookup had
+ *    happened to populate the inode in the inode table, then the
+ *    path will not be printed in scrub and only the gfid will be there.
+ **/
+int
+br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+                         dict_t **dict)
+{
+    gf_dirent_t *entry = NULL;
+    inode_t *inode = NULL;
+    char *hpath = NULL;
+    uuid_t gfid = {0};
+    int ret = -1;
+    dict_t *tmp_dict = NULL;
+    char str_gfid[64] = {0};
+
+    if (list_empty(&entries->list))
+        return 0;
+
+    tmp_dict = *dict;
+
+    if (!tmp_dict) {
+        tmp_dict = dict_new();
+        /*
+         * If the allocation of dict fails then no need treat it
+         * it as a error. This path (or function) is executed when
+         * "gluster volume bitrot <volume name> scrub status" is
+         * executed, to get the list of the corrupted objects.
+         * And the motive of this function is to get the paths of
+         * the corrupted objects. If the dict allocation fails, then
+         * the scrub status will only show the gfids of those corrupted
+         * objects (which is the behavior as of the time of this patch
+         * being worked upon). So just return and only the gfids will
+         * be shown.
+         */
+        if (!tmp_dict) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_FAILED, NULL);
+            goto out;
+        }
+    }
+
+    list_for_each_entry(entry, &entries->list, list)
+    {
+        gf_uuid_clear(gfid);
+        gf_uuid_parse(entry->d_name, gfid);
+
+        inode = inode_find(fd->inode->table, gfid);
+
+        /* No need to check the return value here.
+         * Because @hpath is examined.
+         */
+        (void)br_stub_get_path_of_gfid(this, fd->inode, inode, gfid, &hpath);
+
+        if (hpath) {
+            gf_msg_debug(this->name, 0,
+                         "path of the corrupted "
+                         "object (gfid: %s) is %s",
+                         uuid_utoa(gfid), hpath);
+            br_stub_entry_xattr_fill(this, hpath, entry, tmp_dict);
+        } else
+            gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+                    "gfid=%s", uuid_utoa_r(gfid, str_gfid), NULL);
+
+        inode = NULL;
+        hpath = NULL;
+    }
+
+    ret = 0;
+    *dict = tmp_dict;
+
+out:
+    return ret;
+}
+
+int
+br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
+                         uuid_t gfid, char **path)
+{
+    int32_t ret = -1;
+    char gfid_str[64] = {0};
+
+    GF_VALIDATE_OR_GOTO("bitrot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, parent, out);
+    GF_VALIDATE_OR_GOTO(this->name, path, out);
+
+    /* Above, No need to validate the @inode for hard resolution. Because
+     * inode can be NULL and if it is NULL, then syncop_gfid_to_path_hard
+     * will allocate a new inode and proceed. So no need to bother about
+     * @inode. Because we need it only to send a syncop_getxattr call
+     * from inside syncop_gfid_to_path_hard. And getxattr fetches the
+     * path from the backend.
+     */
+
+    ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
+                                   inode, path, _gf_true);
+    if (ret < 0)
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+                "gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
+
+    /*
+     * Try with soft resolution of path if hard resolve fails. Because
+     * checking the xattr on disk to get the path of a inode (or gfid)
+     * is dependent on whether that option is enabled in the posix
+     * xlator or not. If it is not enabled, then hard resolution by
+     * checking the on disk xattr fails.
+     *
+     * Thus in such situations fall back to the soft resolution which
+     * mainly depends on the inode_path() function. And for using
+     * inode_path, @inode has to be linked i.e. a successful lookup should
+     * have happened on the gfid (or the path) to link the inode to the
+     * inode table. And if @inode is NULL, means, the inode has not been
+     * found in the inode table and better not to do inode_path() on the
+     * inode which has not been linked.
+     */
+    if (ret < 0 && inode) {
+        ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
+                                       inode, path, _gf_false);
+        if (ret < 0)
+            gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+                    "from-memory  gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
+    }
+
+out:
+    return ret;
+}
+
+/**
+ * NOTE: If the file has multiple hardlinks (in gluster volume
+ * namespace), the path would be one of the hardlinks. Its up to
+ * the user to find the remaining hardlinks (using find -samefile)
+ * and remove them.
+ **/
+void
+br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
+                         dict_t *dict)
+{
+    int32_t ret = -1;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, hpath, out);
+
+    /*
+     * Use the entry->d_name (which is nothing but the gfid of the
+     * corrupted object) as the key. And the value will be the actual
+     * path of that object (or file).
+     *
+     * ALso ignore the dict_set errors. scrubber will get the gfid of
+     * the corrupted object for sure. So, for now lets just log the
+     * dict_set_dynstr failure and move on.
+     */
+
+    ret = dict_set_dynstr(dict, entry->d_name, hpath);
+    if (ret)
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED,
+                "path=%s", hpath, "object-name=%s", entry->d_name, NULL);
+out:
+    return;
+}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
new file mode 100644
index 00000000000..9d93caf069f
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -0,0 +1,36 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BR_MEM_TYPES_H
+#define _BR_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum br_mem_types {
+    gf_br_stub_mt_private_t = gf_common_mt_end + 1,
+    gf_br_stub_mt_version_t,
+    gf_br_stub_mt_inode_ctx_t,
+    gf_br_stub_mt_signature_t,
+    gf_br_mt_br_private_t,
+    gf_br_mt_br_child_t,
+    gf_br_mt_br_object_t,
+    gf_br_mt_br_ob_n_wk_t,
+    gf_br_mt_br_scrubber_t,
+    gf_br_mt_br_fsscan_entry_t,
+    gf_br_stub_mt_br_stub_fd_t,
+    gf_br_stub_mt_br_scanner_freq_t,
+    gf_br_stub_mt_sigstub_t,
+    gf_br_mt_br_child_event_t,
+    gf_br_stub_mt_misc,
+    gf_br_mt_br_worker_t,
+    gf_br_stub_mt_end,
+};
+
+#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
new file mode 100644
index 00000000000..6c15a166f18
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -0,0 +1,117 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _BITROT_STUB_MESSAGES_H_
+#define _BITROT_STUB_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
+           BRS_MSG_MEM_ACNT_FAILED, BRS_MSG_CREATE_FRAME_FAILED,
+           BRS_MSG_SET_CONTEXT_FAILED, BRS_MSG_CHANGE_VERSION_FAILED,
+           BRS_MSG_ADD_FD_TO_LIST_FAILED, BRS_MSG_SET_FD_CONTEXT_FAILED,
+           BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, BRS_MSG_NO_CHILD,
+           BRS_MSG_STUB_ALLOC_FAILED, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+           BRS_MSG_CANCEL_SIGN_THREAD_FAILED, BRS_MSG_ADD_FD_TO_INODE,
+           BRS_MSG_SIGN_VERSION_ERROR, BRS_MSG_BAD_OBJ_MARK_FAIL,
+           BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, BRS_MSG_REMOVE_INTERNAL_XATTR,
+           BRS_MSG_SET_INTERNAL_XATTR, BRS_MSG_BAD_OBJECT_ACCESS,
+           BRS_MSG_BAD_CONTAINER_FAIL, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+           BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL,
+           BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, BRS_MSG_GET_FD_CONTEXT_FAILED,
+           BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+           BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
+           BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
+           BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
+           BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
+           BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+           BRS_MSG_VERSION_PREPARE_FAIL);
+
+#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
+#define BRS_MSG_USING_DEFAULT_THREAD_SIZE_STR "Using default thread stack size"
+#define BRS_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRS_MSG_SPAWN_SIGN_THRD_FAILED_STR                                     \
+    "failed to create the new thread for signer"
+#define BRS_MSG_BAD_CONTAINER_FAIL_STR                                         \
+    "failed to launch the thread for storing bad gfids"
+#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED_STR                                  \
+    "Could not cancel sign serializer thread"
+#define BRS_MSG_KILL_SIGN_THREAD_STR "killed the signer thread"
+#define BRS_MSG_GET_INODE_CONTEXT_FAILED_STR                                   \
+    "failed to init the inode context for the inode"
+#define BRS_MSG_ADD_FD_TO_INODE_STR "failed to add fd to the inode"
+#define BRS_MSG_NO_MEMORY_STR "local allocation failed"
+#define BRS_MSG_BAD_OBJECT_ACCESS_STR "bad object accessed. Returning"
+#define BRS_MSG_SIGN_VERSION_ERROR_STR "Signing version exceeds current version"
+#define BRS_MSG_NON_BITD_PID_STR                                               \
+    "PID from where signature request came, does not belong to bit-rot "       \
+    "daemon. Unwinding the fop"
+#define BRS_MSG_SIGN_PREPARE_FAIL_STR                                          \
+    "failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR                                       \
+    "failed to prepare the version. Unwinding the fop"
+#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
+#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
+#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR                                     \
+    "bad object marking is not from the scrubber"
+#define BRS_MSG_ALLOC_MEM_FAILED_STR "failed to allocate memory"
+#define BRS_MSG_SET_INTERNAL_XATTR_STR "called on the internal xattr"
+#define BRS_MSG_REMOVE_INTERNAL_XATTR_STR "removexattr called on internal xattr"
+#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED_STR                                 \
+    "failed to create anonymous fd for the inode"
+#define BRS_MSG_ADD_FD_TO_LIST_FAILED_STR "failed add fd to the list"
+#define BRS_MSG_SET_FD_CONTEXT_FAILED_STR                                      \
+    "failed to set the fd context for the file"
+#define BRS_MSG_NULL_LOCAL_STR "local is NULL"
+#define BRS_MSG_DICT_ALLOC_FAILED_STR                                          \
+    "dict allocation failed: cannot send IPC FOP to changelog"
+#define BRS_MSG_SET_EVENT_FAILED_STR "cannot set release event in dict"
+#define BRS_MSG_CREATE_FRAME_FAILED_STR "create_frame() failure"
+#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL_STR "closedir error"
+#define BRS_MSG_LINK_FAIL_STR "failed to record gfid"
+#define BRS_MSG_BAD_OBJ_UNLINK_FAIL_STR                                        \
+    "failed to delete bad object link from quaratine directory"
+#define BRS_MSG_BAD_OBJECT_DIR_FAIL_STR "failed stub directory"
+#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL_STR                                   \
+    "seekdir failed. Invalid argument (offset reused from another DIR * "      \
+    "structure)"
+#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL_STR "telldir failed on dir"
+#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL_STR "readdir failed on dir"
+#define BRS_MSG_CREATE_GF_DIRENT_FAILED_STR "could not create gf_dirent"
+#define BRS_MSG_GET_FD_CONTEXT_FAILED_STR "pfd is NULL"
+#define BRS_MSG_BAD_HANDLE_DIR_NULL_STR "dir if NULL"
+#define BRS_MSG_ALLOC_FAILED_STR                                               \
+    "failed to allocate new dict for saving the paths of the corrupted "       \
+    "objects. Scrub status will only display the gfid"
+#define BRS_MSG_PATH_GET_FAILED_STR "failed to get the path"
+#define BRS_MSG_PATH_XATTR_GET_FAILED_STR                                      \
+    "failed to get the path xattr from disk for the gfid. Trying to get path " \
+    "from the memory"
+#define BRS_MSG_DICT_SET_FAILED_STR                                            \
+    "failed to set the actual path as the value in the dict for the "          \
+    "corrupted object"
+#define BRS_MSG_SET_CONTEXT_FAILED_STR                                         \
+    "could not set fd context for release callback"
+#define BRS_MSG_CHANGE_VERSION_FAILED_STR "change version failed"
+#endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
new file mode 100644
index 00000000000..447dd47ff41
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -0,0 +1,3590 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+#include <signal.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include "changelog.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/call-stub.h>
+
+#include "bit-rot-stub.h"
+#include "bit-rot-stub-mem-types.h"
+#include "bit-rot-stub-messages.h"
+#include "bit-rot-common.h"
+
+#define BR_STUB_REQUEST_COOKIE 0x1
+
+void
+br_stub_lock_cleaner(void *arg)
+{
+    pthread_mutex_t *clean_mutex = arg;
+
+    pthread_mutex_unlock(clean_mutex);
+    return;
+}
+
+void *
+br_stub_signth(void *);
+
+struct br_stub_signentry {
+    unsigned long v;
+
+    call_stub_t *stub;
+
+    struct list_head list;
+};
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+    int32_t ret = -1;
+
+    if (!this)
+        return ret;
+
+    ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
+
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, NULL);
+        return ret;
+    }
+
+    return ret;
+}
+
+int
+br_stub_bad_object_container_init(xlator_t *this, br_stub_private_t *priv)
+{
+    pthread_attr_t w_attr;
+    int ret = -1;
+
+    ret = pthread_cond_init(&priv->container.bad_cond, NULL);
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+                "cond_init ret=%d", ret, NULL);
+        goto out;
+    }
+
+    ret = pthread_mutex_init(&priv->container.bad_lock, NULL);
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+                "mutex_init ret=%d", ret, NULL);
+        goto cleanup_cond;
+    }
+
+    ret = pthread_attr_init(&w_attr);
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+                "attr_init ret=%d", ret, NULL);
+        goto cleanup_lock;
+    }
+
+    ret = pthread_attr_setstacksize(&w_attr, BAD_OBJECT_THREAD_STACK_SIZE);
+    if (ret == EINVAL) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0,
+                BRS_MSG_USING_DEFAULT_THREAD_SIZE, NULL);
+    }
+
+    INIT_LIST_HEAD(&priv->container.bad_queue);
+    ret = br_stub_dir_create(this, priv);
+    if (ret < 0)
+        goto cleanup_lock;
+
+    ret = gf_thread_create(&priv->container.thread, &w_attr, br_stub_worker,
+                           this, "brswrker");
+    if (ret)
+        goto cleanup_attr;
+
+    return 0;
+
+cleanup_attr:
+    pthread_attr_destroy(&w_attr);
+cleanup_lock:
+    pthread_mutex_destroy(&priv->container.bad_lock);
+cleanup_cond:
+    pthread_cond_destroy(&priv->container.bad_cond);
+out:
+    return -1;
+}
+
+int32_t
+init(xlator_t *this)
+{
+    int ret = 0;
+    char *tmp = NULL;
+    struct timeval tv = {
+        0,
+    };
+    br_stub_private_t *priv = NULL;
+
+    if (!this->children) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, NULL);
+        goto error_return;
+    }
+
+    priv = GF_CALLOC(1, sizeof(*priv), gf_br_stub_mt_private_t);
+    if (!priv)
+        goto error_return;
+
+    priv->local_pool = mem_pool_new(br_stub_local_t, 512);
+    if (!priv->local_pool)
+        goto free_priv;
+
+    GF_OPTION_INIT("bitrot", priv->do_versioning, bool, free_mempool);
+
+    GF_OPTION_INIT("export", tmp, str, free_mempool);
+
+    if (snprintf(priv->export, PATH_MAX, "%s", tmp) >= PATH_MAX)
+        goto free_mempool;
+
+    if (snprintf(priv->stub_basepath, sizeof(priv->stub_basepath), "%s/%s",
+                 priv->export,
+                 BR_STUB_QUARANTINE_DIR) >= sizeof(priv->stub_basepath))
+        goto free_mempool;
+
+    (void)gettimeofday(&tv, NULL);
+
+    /* boot time is in network endian format */
+    priv->boot[0] = htonl(tv.tv_sec);
+    priv->boot[1] = htonl(tv.tv_usec);
+
+    pthread_mutex_init(&priv->lock, NULL);
+    pthread_cond_init(&priv->cond, NULL);
+    INIT_LIST_HEAD(&priv->squeue);
+
+    /* Thread creations need 'this' to be passed so that THIS can be
+     * assigned inside the thread. So setting this->private here.
+     */
+    this->private = priv;
+    if (!priv->do_versioning)
+        return 0;
+
+    ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+                           "brssign");
+    if (ret != 0) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+                NULL);
+        goto cleanup_lock;
+    }
+
+    ret = br_stub_bad_object_container_init(this, priv);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, NULL);
+        goto cleanup_lock;
+    }
+
+    gf_msg_debug(this->name, 0, "bit-rot stub loaded");
+
+    return 0;
+
+cleanup_lock:
+    pthread_cond_destroy(&priv->cond);
+    pthread_mutex_destroy(&priv->lock);
+free_mempool:
+    mem_pool_destroy(priv->local_pool);
+    priv->local_pool = NULL;
+free_priv:
+    GF_FREE(priv);
+    this->private = NULL;
+error_return:
+    return -1;
+}
+
+/* TODO:
+ * As of now enabling bitrot option does 2 things.
+ * 1) Start the Bitrot Daemon which signs the objects (currently files only)
+ *    upon getting notified by the stub.
+ * 2) Enable versioning of the objects. Object versions (again files only) are
+ *    incremented upon modification.
+ * So object versioning is tied to bitrot daemon's signing. In future, object
+ * versioning might be necessary for other things as well apart from bit-rot
+ * detection (well that's the objective of bringing in object-versioning :)).
+ * In that case, better to make versioning a new option and letting it to be
+ * enabled despite bit-rot detection is not needed.
+ * Ex: ICAP.
+ */
+int32_t
+reconfigure(xlator_t *this, dict_t *options)
+{
+    int32_t ret = -1;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
+    if (priv->do_versioning && !priv->signth) {
+        ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+                               "brssign");
+        if (ret != 0) {
+            gf_smsg(this->name, GF_LOG_WARNING, 0,
+                    BRS_MSG_SPAWN_SIGN_THRD_FAILED, NULL);
+            goto err;
+        }
+
+        ret = br_stub_bad_object_container_init(this, priv);
+        if (ret) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
+                    NULL);
+            goto err;
+        }
+    } else {
+        if (priv->signth) {
+            if (gf_thread_cleanup_xint(priv->signth)) {
+                gf_smsg(this->name, GF_LOG_ERROR, 0,
+                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+            } else {
+                gf_smsg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
+                        NULL);
+                priv->signth = 0;
+            }
+        }
+
+        if (priv->container.thread) {
+            if (gf_thread_cleanup_xint(priv->container.thread)) {
+                gf_smsg(this->name, GF_LOG_ERROR, 0,
+                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+            }
+            priv->container.thread = 0;
+        }
+    }
+
+    ret = 0;
+    return ret;
+err:
+    if (priv->signth) {
+        if (gf_thread_cleanup_xint(priv->signth)) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+        }
+        priv->signth = 0;
+    }
+
+    if (priv->container.thread) {
+        if (gf_thread_cleanup_xint(priv->container.thread)) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+        }
+        priv->container.thread = 0;
+    }
+    ret = -1;
+    return ret;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+    br_stub_private_t *priv = NULL;
+
+    if (!this)
+        return 0;
+
+    priv = this->private;
+    if (!priv)
+        return 0;
+
+    default_notify(this, event, data);
+    return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+    int32_t ret = 0;
+    br_stub_private_t *priv = this->private;
+    struct br_stub_signentry *sigstub = NULL;
+    call_stub_t *stub = NULL;
+
+    if (!priv)
+        return;
+
+    if (!priv->do_versioning)
+        goto cleanup;
+
+    ret = gf_thread_cleanup_xint(priv->signth);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+                NULL);
+        goto out;
+    }
+    priv->signth = 0;
+
+    while (!list_empty(&priv->squeue)) {
+        sigstub = list_first_entry(&priv->squeue, struct br_stub_signentry,
+                                   list);
+        list_del_init(&sigstub->list);
+
+        call_stub_destroy(sigstub->stub);
+        GF_FREE(sigstub);
+    }
+
+    ret = gf_thread_cleanup_xint(priv->container.thread);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+                NULL);
+        goto out;
+    }
+
+    priv->container.thread = 0;
+
+    while (!list_empty(&priv->container.bad_queue)) {
+        stub = list_first_entry(&priv->container.bad_queue, call_stub_t, list);
+        list_del_init(&stub->list);
+        call_stub_destroy(stub);
+    }
+
+    pthread_mutex_destroy(&priv->container.bad_lock);
+    pthread_cond_destroy(&priv->container.bad_cond);
+
+cleanup:
+    pthread_mutex_destroy(&priv->lock);
+    pthread_cond_destroy(&priv->cond);
+
+    if (priv->local_pool) {
+        mem_pool_destroy(priv->local_pool);
+        priv->local_pool = NULL;
+    }
+
+    this->private = NULL;
+    GF_FREE(priv);
+
+out:
+    return;
+}
+
+static int
+br_stub_alloc_versions(br_version_t **obuf, br_signature_t **sbuf,
+                       size_t signaturelen)
+{
+    void *mem = NULL;
+    size_t size = 0;
+
+    if (obuf)
+        size += sizeof(br_version_t);
+    if (sbuf)
+        size += sizeof(br_signature_t) + signaturelen;
+
+    mem = GF_CALLOC(1, size, gf_br_stub_mt_version_t);
+    if (!mem)
+        goto error_return;
+
+    if (obuf) {
+        *obuf = (br_version_t *)mem;
+        mem = ((char *)mem + sizeof(br_version_t));
+    }
+    if (sbuf) {
+        *sbuf = (br_signature_t *)mem;
+    }
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static void
+br_stub_dealloc_versions(void *mem)
+{
+    GF_FREE(mem);
+}
+
+static br_stub_local_t *
+br_stub_alloc_local(xlator_t *this)
+{
+    br_stub_private_t *priv = this->private;
+
+    return mem_get0(priv->local_pool);
+}
+
+static void
+br_stub_dealloc_local(br_stub_local_t *ptr)
+{
+    if (!ptr)
+        return;
+
+    mem_put(ptr);
+}
+
+static int
+br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
+                                br_version_t *obuf, unsigned long oversion)
+{
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+    br_set_ongoingversion(obuf, oversion, priv->boot);
+
+    return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+                        sizeof(br_version_t));
+}
+
+static int
+br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
+                                br_isignature_t *sign, size_t signaturelen)
+{
+    size_t size = 0;
+
+    br_set_signature(sbuf, sign, signaturelen, &size);
+
+    return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
+}
+
+/**
+ * initialize an inode context starting with a given ongoing version.
+ * a fresh lookup() or a first creat() call initializes the inode
+ * context, hence the inode is marked dirty. this routine also
+ * initializes the transient inode version.
+ */
+static int
+br_stub_init_inode_versions(xlator_t *this, fd_t *fd, inode_t *inode,
+                            unsigned long version, gf_boolean_t markdirty,
+                            gf_boolean_t bad_object, uint64_t *ctx_addr)
+{
+    int32_t ret = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    ctx = GF_CALLOC(1, sizeof(br_stub_inode_ctx_t), gf_br_stub_mt_inode_ctx_t);
+    if (!ctx)
+        goto error_return;
+
+    INIT_LIST_HEAD(&ctx->fd_list);
+    (markdirty) ? __br_stub_mark_inode_dirty(ctx)
+                : __br_stub_mark_inode_synced(ctx);
+    __br_stub_set_ongoing_version(ctx, version);
+
+    if (bad_object)
+        __br_stub_mark_object_bad(ctx);
+
+    if (fd) {
+        ret = br_stub_add_fd_to_inode(this, fd, ctx);
+        if (ret)
+            goto free_ctx;
+    }
+
+    ret = br_stub_set_inode_ctx(this, inode, ctx);
+    if (ret)
+        goto free_ctx;
+
+    if (ctx_addr)
+        *ctx_addr = (uint64_t)(uintptr_t)ctx;
+    return 0;
+
+free_ctx:
+    GF_FREE(ctx);
+error_return:
+    return -1;
+}
+
+/**
+ * modify the ongoing version of an inode.
+ */
+static int
+br_stub_mod_inode_versions(xlator_t *this, fd_t *fd, inode_t *inode,
+                           unsigned long version)
+{
+    int32_t ret = -1;
+    br_stub_inode_ctx_t *ctx = 0;
+
+    LOCK(&inode->lock);
+    {
+        ctx = __br_stub_get_ongoing_version_ctx(this, inode, NULL);
+        if (ctx == NULL)
+            goto unblock;
+        if (__br_stub_is_inode_dirty(ctx)) {
+            __br_stub_set_ongoing_version(ctx, version);
+            __br_stub_mark_inode_synced(ctx);
+        }
+
+        ret = 0;
+    }
+unblock:
+    UNLOCK(&inode->lock);
+
+    return ret;
+}
+
+static void
+br_stub_fill_local(br_stub_local_t *local, call_stub_t *stub, fd_t *fd,
+                   inode_t *inode, uuid_t gfid, int versioningtype,
+                   unsigned long memversion)
+{
+    local->fopstub = stub;
+    local->versioningtype = versioningtype;
+    local->u.context.version = memversion;
+    if (fd)
+        local->u.context.fd = fd_ref(fd);
+    if (inode)
+        local->u.context.inode = inode_ref(inode);
+    gf_uuid_copy(local->u.context.gfid, gfid);
+}
+
+static void
+br_stub_cleanup_local(br_stub_local_t *local)
+{
+    if (!local)
+        return;
+
+    local->fopstub = NULL;
+    local->versioningtype = 0;
+    local->u.context.version = 0;
+    if (local->u.context.fd) {
+        fd_unref(local->u.context.fd);
+        local->u.context.fd = NULL;
+    }
+    if (local->u.context.inode) {
+        inode_unref(local->u.context.inode);
+        local->u.context.inode = NULL;
+    }
+    memset(local->u.context.gfid, '\0', sizeof(uuid_t));
+}
+
+static int
+br_stub_need_versioning(xlator_t *this, fd_t *fd, gf_boolean_t *versioning,
+                        gf_boolean_t *modified, br_stub_inode_ctx_t **ctx)
+{
+    int32_t ret = -1;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *c = NULL;
+    unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+    *versioning = _gf_false;
+    *modified = _gf_false;
+
+    /* Bitrot stub inode context was initialized only in lookup, create
+     * and mknod cbk path. Object versioning was enabled by default
+     * irrespective of bitrot enabled or not. But it's made optional now.
+     * As a consequence there could be cases where getting inode ctx would
+     * fail because it's not set yet.
+     * e.g., If versioning (with bitrot enable) is enabled while I/O is
+     * happening, it could directly get other fops like writev without
+     * lookup, where getting inode ctx would fail. Hence initialize the
+     * inode ctx on failure to get ctx. This is done in all places where
+     * applicable.
+     */
+    ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+    if (ret < 0) {
+        ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
+                                          _gf_true, _gf_false, &ctx_addr);
+        if (ret) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+                    uuid_utoa(fd->inode->gfid), NULL);
+            goto error_return;
+        }
+    }
+
+    c = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&fd->inode->lock);
+    {
+        if (__br_stub_is_inode_dirty(c))
+            *versioning = _gf_true;
+        if (__br_stub_is_inode_modified(c))
+            *modified = _gf_true;
+    }
+    UNLOCK(&fd->inode->lock);
+
+    if (ctx)
+        *ctx = c;
+    return 0;
+
+error_return:
+    return -1;
+}
+
+static int32_t
+br_stub_anon_fd_ctx(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+    int32_t ret = -1;
+    br_stub_fd_t *br_stub_fd = NULL;
+
+    br_stub_fd = br_stub_fd_ctx_get(this, fd);
+    if (!br_stub_fd) {
+        ret = br_stub_add_fd_to_inode(this, fd, ctx);
+        if (ret) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE,
+                    "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+            goto out;
+        }
+    }
+
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static int
+br_stub_versioning_prep(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                        br_stub_inode_ctx_t *ctx)
+{
+    int32_t ret = -1;
+    br_stub_local_t *local = NULL;
+
+    local = br_stub_alloc_local(this);
+    if (!local) {
+        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, "gfid=%s",
+                uuid_utoa(fd->inode->gfid), NULL);
+        goto error_return;
+    }
+
+    if (fd_is_anonymous(fd)) {
+        ret = br_stub_anon_fd_ctx(this, fd, ctx);
+        if (ret)
+            goto free_local;
+    }
+
+    frame->local = local;
+
+    return 0;
+
+free_local:
+    br_stub_dealloc_local(local);
+error_return:
+    return -1;
+}
+
+static int
+br_stub_mark_inode_modified(xlator_t *this, br_stub_local_t *local)
+{
+    fd_t *fd = NULL;
+    int32_t ret = 0;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+    fd = local->u.context.fd;
+
+    ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+    if (ret < 0) {
+        ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
+                                          _gf_true, _gf_false, &ctx_addr);
+        if (ret)
+            goto error_return;
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&fd->inode->lock);
+    {
+        __br_stub_set_inode_modified(ctx);
+    }
+    UNLOCK(&fd->inode->lock);
+
+    return 0;
+
+error_return:
+    return -1;
+}
+
+/**
+ * The possible return values from br_stub_is_bad_object () are:
+ * 1) 0  => as per the inode context object is not bad
+ * 2) -1 => Failed to get the inode context itself
+ * 3) -2 => As per the inode context object is bad
+ * Both -ve values means the fop which called this function is failed
+ * and error is returned upwards.
+ */
+static int
+br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret,
+                         int32_t *op_errno)
+{
+    int ret = -1;
+    unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+    ret = br_stub_is_bad_object(this, inode);
+    if (ret == -2) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
+                "gfid=%s", uuid_utoa(inode->gfid), NULL);
+        *op_ret = -1;
+        *op_errno = EIO;
+    }
+
+    if (ret == -1) {
+        ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
+                                          _gf_false, NULL);
+        if (ret) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+                    uuid_utoa(inode->gfid), NULL);
+            *op_ret = -1;
+            *op_errno = EINVAL;
+        }
+    }
+
+    return ret;
+}
+
+/**
+ * callback for inode/fd versioning
+ */
+int
+br_stub_fd_incversioning_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                             int op_ret, int op_errno, dict_t *xdata)
+{
+    fd_t *fd = NULL;
+    inode_t *inode = NULL;
+    unsigned long version = 0;
+    br_stub_local_t *local = NULL;
+
+    local = (br_stub_local_t *)frame->local;
+    if (op_ret < 0)
+        goto done;
+    fd = local->u.context.fd;
+    inode = local->u.context.inode;
+    version = local->u.context.version;
+
+    op_ret = br_stub_mod_inode_versions(this, fd, inode, version);
+    if (op_ret < 0)
+        op_errno = EINVAL;
+
+done:
+    if (op_ret < 0) {
+        frame->local = NULL;
+        call_unwind_error(local->fopstub, -1, op_errno);
+        br_stub_cleanup_local(local);
+        br_stub_dealloc_local(local);
+    } else {
+        call_resume(local->fopstub);
+    }
+    return 0;
+}
+
+/**
+ * Initial object versioning
+ *
+ * Version persists two (2) extended attributes as explained below:
+ *   1. Current (ongoing) version: This is incremented on an writev ()
+ *      or truncate () and is the running version for an object.
+ *   2. Signing version: This is the version against which an object
+ *      was signed (checksummed).
+ *
+ * During initial versioning, both ongoing and signing versions are
+ * set of one and zero respectively. A write() call increments the
+ * ongoing version as an indication of modification to the object.
+ * Additionally this needs to be persisted on disk and needs to be
+ * durable: fsync().. :-/
+ * As an optimization only the first write() synchronizes the ongoing
+ * version to disk, subsequent write()s before the *last* release()
+ * are no-op's.
+ *
+ * create(), just like lookup() initializes the object versions to
+ * the default. As an optimization this is not a durable operation:
+ * in case of a crash, hard reboot etc.. absence of versioning xattrs
+ * is ignored in scrubber along with the one time crawler explicitly
+ * triggering signing for such objects.
+ *
+ * c.f. br_stub_writev() / br_stub_truncate()
+ */
+
+/**
+ * perform full or incremental versioning on an inode pointd by an
+ * fd. incremental versioning is done when an inode is dirty and a
+ * writeback is triggered.
+ */
+
+int
+br_stub_fd_versioning(xlator_t *this, call_frame_t *frame, call_stub_t *stub,
+                      dict_t *dict, fd_t *fd, br_stub_version_cbk *callback,
+                      unsigned long memversion, int versioningtype, int durable)
+{
+    int32_t ret = -1;
+    int flags = 0;
+    dict_t *xdata = NULL;
+    br_stub_local_t *local = NULL;
+
+    xdata = dict_new();
+    if (!xdata)
+        goto done;
+
+    ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+    if (ret)
+        goto dealloc_xdata;
+
+    if (durable) {
+        ret = dict_set_int32(xdata, GLUSTERFS_DURABLE_OP, 0);
+        if (ret)
+            goto dealloc_xdata;
+    }
+
+    local = frame->local;
+
+    br_stub_fill_local(local, stub, fd, fd->inode, fd->inode->gfid,
+                       versioningtype, memversion);
+
+    STACK_WIND(frame, callback, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+
+    ret = 0;
+
+dealloc_xdata:
+    dict_unref(xdata);
+done:
+    return ret;
+}
+
+static int
+br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
+                              call_stub_t *stub, fd_t *fd,
+                              br_stub_inode_ctx_t *ctx)
+{
+    int32_t ret = -1;
+    dict_t *dict = NULL;
+    br_version_t *obuf = NULL;
+    unsigned long writeback_version = 0;
+    int op_errno = 0;
+    br_stub_local_t *local = NULL;
+
+    op_errno = EINVAL;
+    local = frame->local;
+
+    writeback_version = __br_stub_writeback_version(ctx);
+
+    op_errno = ENOMEM;
+    dict = dict_new();
+    if (!dict)
+        goto out;
+    ret = br_stub_alloc_versions(&obuf, NULL, 0);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto out;
+    }
+    ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        br_stub_dealloc_versions(obuf);
+        goto out;
+    }
+
+    ret = br_stub_fd_versioning(
+        this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
+        writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
+out:
+    if (dict)
+        dict_unref(dict);
+    if (ret) {
+        if (local)
+            frame->local = NULL;
+        call_unwind_error(stub, -1, op_errno);
+        if (local) {
+            br_stub_cleanup_local(local);
+            br_stub_dealloc_local(local);
+        }
+    }
+
+    return ret;
+}
+
+/** {{{ */
+
+/* fsetxattr() */
+
+int32_t
+br_stub_perform_objsign(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                        dict_t *dict, int flags, dict_t *xdata)
+{
+    STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+
+    dict_unref(xdata);
+    return 0;
+}
+
+void *
+br_stub_signth(void *arg)
+{
+    xlator_t *this = arg;
+    br_stub_private_t *priv = this->private;
+    struct br_stub_signentry *sigstub = NULL;
+
+    THIS = this;
+    while (1) {
+        /*
+         * Disabling bit-rot feature leads to this particular thread
+         * getting cleaned up by reconfigure via a call to the function
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
+         * and pthread_join). But, if this thread had held the mutex
+         * &priv->lock at the time of cancellation, then it leads to
+         * deadlock in future when bit-rot feature is enabled (which
+         * again spawns this thread which cant hold the lock as the
+         * mutex is still held by the previous instance of the thread
+         * which got killed). Also, the br_stub_handle_object_signature
+         * function which is called whenever file has to be signed
+         * also gets blocked as it too attempts to acquire &priv->lock.
+         *
+         * So, arrange for the lock to be unlocked as part of the
+         * cleanup of this thread using pthread_cleanup_push and
+         * pthread_cleanup_pop.
+         */
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
+        pthread_mutex_lock(&priv->lock);
+        {
+            while (list_empty(&priv->squeue))
+                pthread_cond_wait(&priv->cond, &priv->lock);
+
+            sigstub = list_first_entry(&priv->squeue, struct br_stub_signentry,
+                                       list);
+            list_del_init(&sigstub->list);
+        }
+        pthread_mutex_unlock(&priv->lock);
+        pthread_cleanup_pop(0);
+
+        call_resume(sigstub->stub);
+
+        GF_FREE(sigstub);
+    }
+
+    return NULL;
+}
+
+static gf_boolean_t
+br_stub_internal_xattr(dict_t *dict)
+{
+    if (dict_get(dict, GLUSTERFS_SET_OBJECT_SIGNATURE) ||
+        dict_get(dict, GLUSTERFS_GET_OBJECT_SIGNATURE) ||
+        dict_get(dict, BR_REOPEN_SIGN_HINT_KEY) ||
+        dict_get(dict, BITROT_OBJECT_BAD_KEY) ||
+        dict_get(dict, BITROT_SIGNING_VERSION_KEY) ||
+        dict_get(dict, BITROT_CURRENT_VERSION_KEY))
+        return _gf_true;
+
+    return _gf_false;
+}
+
+int
+orderq(struct list_head *elem1, struct list_head *elem2)
+{
+    struct br_stub_signentry *s1 = NULL;
+    struct br_stub_signentry *s2 = NULL;
+
+    s1 = list_entry(elem1, struct br_stub_signentry, list);
+    s2 = list_entry(elem2, struct br_stub_signentry, list);
+
+    return (s1->v > s2->v);
+}
+
+static int
+br_stub_compare_sign_version(xlator_t *this, inode_t *inode,
+                             br_signature_t *sbuf, dict_t *dict,
+                             int *fakesuccess)
+{
+    int32_t ret = -1;
+    uint64_t tmp_ctx = 0;
+    gf_boolean_t invalid = _gf_false;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+    GF_VALIDATE_OR_GOTO(this->name, inode, out);
+    GF_VALIDATE_OR_GOTO(this->name, sbuf, out);
+    GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+    ret = br_stub_get_inode_ctx(this, inode, &tmp_ctx);
+    if (ret) {
+        dict_del(dict, BITROT_SIGNING_VERSION_KEY);
+        goto out;
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)tmp_ctx;
+
+    LOCK(&inode->lock);
+    {
+        if (ctx->currentversion < sbuf->signedversion) {
+            invalid = _gf_true;
+        } else if (ctx->currentversion > sbuf->signedversion) {
+            gf_msg_debug(this->name, 0,
+                         "\"Signing version\" "
+                         "(%lu) lower than \"Current version \" "
+                         "(%lu)",
+                         ctx->currentversion, sbuf->signedversion);
+            *fakesuccess = 1;
+        }
+    }
+    UNLOCK(&inode->lock);
+
+    if (invalid) {
+        ret = -1;
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR,
+                "Signing-ver=%lu", sbuf->signedversion, "current-ver=%lu",
+                ctx->currentversion, NULL);
+    }
+
+out:
+    return ret;
+}
+
+static int
+br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
+                          br_isignature_t *sign, int *fakesuccess)
+{
+    int32_t ret = -1;
+    size_t signaturelen = 0;
+    br_signature_t *sbuf = NULL;
+
+    if (!br_is_signature_type_valid(sign->signaturetype))
+        goto out;
+
+    signaturelen = sign->signaturelen;
+    ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+                "gfid=%s", uuid_utoa(inode->gfid), NULL);
+        ret = -1;
+        goto out;
+    }
+    ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+                "gfid=%s", uuid_utoa(inode->gfid), NULL);
+        ret = -1;
+        br_stub_dealloc_versions(sbuf);
+        goto out;
+    }
+
+    /* At this point sbuf has been added to dict, so the memory will be freed
+     * when the data from the dict is destroyed
+     */
+    ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
+out:
+    return ret;
+}
+
+static void
+br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                                dict_t *dict, br_isignature_t *sign,
+                                dict_t *xdata)
+{
+    int32_t ret = -1;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    int fakesuccess = 0;
+    br_stub_private_t *priv = NULL;
+    struct br_stub_signentry *sigstub = NULL;
+
+    priv = this->private;
+
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
+        gf_smsg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
+                "PID=%d", frame->root->pid, NULL);
+        goto dofop;
+    }
+
+    ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto dofop;
+    }
+    if (fakesuccess) {
+        op_ret = op_errno = 0;
+        goto dofop;
+    }
+
+    dict_del(dict, GLUSTERFS_SET_OBJECT_SIGNATURE);
+
+    ret = -1;
+    if (!xdata) {
+        xdata = dict_new();
+        if (!xdata)
+            goto dofop;
+    } else {
+        dict_ref(xdata);
+    }
+
+    ret = dict_set_int32(xdata, GLUSTERFS_DURABLE_OP, 0);
+    if (ret)
+        goto unref_dict;
+
+    /* prepare dispatch stub to order object signing */
+    sigstub = GF_CALLOC(1, sizeof(*sigstub), gf_br_stub_mt_sigstub_t);
+    if (!sigstub)
+        goto unref_dict;
+
+    INIT_LIST_HEAD(&sigstub->list);
+    sigstub->v = ntohl(sign->signedversion);
+    sigstub->stub = fop_fsetxattr_stub(frame, br_stub_perform_objsign, fd, dict,
+                                       0, xdata);
+    if (!sigstub->stub)
+        goto cleanup_stub;
+
+    pthread_mutex_lock(&priv->lock);
+    {
+        list_add_order(&sigstub->list, &priv->squeue, orderq);
+        pthread_cond_signal(&priv->cond);
+    }
+    pthread_mutex_unlock(&priv->lock);
+
+    return;
+
+cleanup_stub:
+    GF_FREE(sigstub);
+unref_dict:
+    dict_unref(xdata);
+dofop:
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+}
+
+int32_t
+br_stub_fsetxattr_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+    int32_t ret = -1;
+    br_stub_local_t *local = NULL;
+
+    local = frame->local;
+    frame->local = NULL;
+
+    ret = br_stub_mark_inode_modified(this, local);
+    if (ret) {
+        op_ret = -1;
+        op_errno = EINVAL;
+    }
+
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+
+    return 0;
+}
+
+/**
+ * Handles object reopens. Object reopens can be of 3 types. 2 are from
+ * oneshot crawler and 1 from the regular signer.
+ * ONESHOT CRAWLER:
+ * For those objects which were created before bitrot was enabled. oneshow
+ * crawler crawls the namespace and signs all the objects. It has to do
+ * the versioning before making bit-rot-stub send a sign notification.
+ * So it sends fsetxattr with BR_OBJECT_REOPEN as the value. And bit-rot-stub
+ * upon getting BR_OBJECT_REOPEN value checks if the version has to be
+ * increased or not. By default the version will be increased. But if the
+ * object is modified before BR_OBJECT_REOPEN from oneshot crawler, then
+ * versioning need not be done. In that case simply a success is returned.
+ * SIGNER:
+ * Signer wait for 2 minutes upon getting the notification from bit-rot-stub
+ * and then it sends a dummy write (in reality a fsetxattr) call, to change
+ * the state of the inode from REOPEN_WAIT to SIGN_QUICK. The funny part here
+ * is though the inode's state is REOPEN_WAIT, the call sent by signer is
+ * BR_OBJECT_RESIGN. Once the state is changed to SIGN_QUICK, then yet another
+ * notification is sent upon release (RESIGN would have happened via fsetxattr,
+ * so a fd is needed) and the object is signed truly this time.
+ * There is a challenge in the above RESIGN method by signer. After sending
+ * the 1st notification, the inode could be forgotten before RESIGN request
+ * is received. In that case, the inode's context (the newly looked up inode)
+ * would not indicate the inode as being modified (it would be in the default
+ * state) and because of this, a SIGN_QUICK notification to truly sign the
+ * object would not be sent. So, this is how its handled.
+ * if (request == RESIGN) {
+ *    if (inode->sign_info == NORMAL) {
+ *        mark_inode_non_dirty;
+ *        mark_inode_modified;
+ *    }
+ *    GOBACK (means unwind without doing versioning)
+ * }
+ */
+static void
+br_stub_handle_object_reopen(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                             uint32_t val)
+{
+    int32_t ret = -1;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    call_stub_t *stub = NULL;
+    gf_boolean_t inc_version = _gf_false;
+    gf_boolean_t modified = _gf_false;
+    br_stub_inode_ctx_t *ctx = NULL;
+    br_stub_local_t *local = NULL;
+    gf_boolean_t goback = _gf_true;
+
+    ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+    if (ret)
+        goto unwind;
+
+    LOCK(&fd->inode->lock);
+    {
+        if ((val == BR_OBJECT_REOPEN) && inc_version)
+            goback = _gf_false;
+        if (val == BR_OBJECT_RESIGN && ctx->info_sign == BR_SIGN_NORMAL) {
+            __br_stub_mark_inode_synced(ctx);
+            __br_stub_set_inode_modified(ctx);
+        }
+        (void)__br_stub_inode_sign_state(ctx, GF_FOP_FSETXATTR, fd);
+    }
+    UNLOCK(&fd->inode->lock);
+
+    if (goback) {
+        op_ret = op_errno = 0;
+        goto unwind;
+    }
+
+    ret = br_stub_versioning_prep(frame, this, fd, ctx);
+    if (ret)
+        goto unwind;
+    local = frame->local;
+
+    stub = fop_fsetxattr_cbk_stub(frame, br_stub_fsetxattr_resume, 0, 0, NULL);
+    if (!stub) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+                "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto cleanup_local;
+    }
+
+    (void)br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+    return;
+
+cleanup_local:
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+
+unwind:
+    frame->local = NULL;
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+}
+
+/**
+ * This function only handles bad file identification. Instead of checking in
+ * fops like open, readv, writev whether the object is bad or not by doing
+ * getxattr calls, better to catch them when scrubber marks it as bad.
+ * So this callback is called only when the fsetxattr is sent by the scrubber
+ * to mark the object as bad.
+ */
+int
+br_stub_fsetxattr_bad_object_cbk(call_frame_t *frame, void *cookie,
+                                 xlator_t *this, int32_t op_ret,
+                                 int32_t op_errno, dict_t *xdata)
+{
+    br_stub_local_t *local = NULL;
+    int32_t ret = -1;
+
+    local = frame->local;
+    frame->local = NULL;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    /*
+     * What to do if marking the object as bad fails? (i.e. in memory
+     * marking within the inode context. If we are here means fsetxattr
+     * fop has succeeded on disk and the bad object xattr has been set).
+     * We can return failure to scruber, but there is nothing the scrubber
+     * can do with it (it might assume that the on disk setxattr itself has
+     * failed). The main purpose of this operation is to help identify the
+     * bad object by checking the inode context itself (thus avoiding the
+     * necessity of doing a getxattr fop on the disk).
+     *
+     * So as of now, success itself is being returned even though inode
+     * context set operation fails.
+     * In future if there is any change in the policy which can handle this,
+     * then appropriate response should be sent (i.e. success or error).
+     */
+    ret = br_stub_mark_object_bad(this, local->u.context.inode);
+    if (ret)
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
+                "gfid=%s", uuid_utoa(local->u.context.inode->gfid), NULL);
+
+    ret = br_stub_add(this, local->u.context.inode->gfid);
+
+unwind:
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+    return 0;
+}
+
+static int32_t
+br_stub_handle_bad_object_key(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                              dict_t *dict, int flags, dict_t *xdata)
+{
+    br_stub_local_t *local = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    if (frame->root->pid != GF_CLIENT_PID_SCRUB) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto unwind;
+    }
+
+    local = br_stub_alloc_local(this);
+    if (!local) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+                "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        op_ret = -1;
+        op_errno = ENOMEM;
+        goto unwind;
+    }
+
+    br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+                       BR_STUB_NO_VERSIONING, 0);
+    frame->local = local;
+
+    STACK_WIND(frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+    return 0;
+}
+
+/**
+ * As of now, versioning is done by the stub (though as a setxattr
+ * operation) as part of inode modification operations such as writev,
+ * truncate, ftruncate. And signing is done by BitD by a fsetxattr call.
+ * So any kind of setxattr coming on the versioning and the signing xattr is
+ * not allowed (i.e. BITROT_CURRENT_VERSION_KEY and BITROT_SIGNING_VERSION_KEY).
+ * In future if BitD/scrubber are allowed to change the versioning
+ * xattrs (though I cannot see a reason for it as of now), then the below
+ * function can be modified to block setxattr on version for only applications.
+ *
+ * NOTE: BitD sends sign request on GLUSTERFS_SET_OBJECT_SIGNATURE key.
+ *       BITROT_SIGNING_VERSION_KEY is the xattr used to save the signature.
+ *
+ */
+static int32_t
+br_stub_handle_internal_xattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                              char *key)
+{
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+            "setxattr key=%s", key, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+            NULL);
+
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+    return 0;
+}
+
+static void
+br_stub_dump_xattr(xlator_t *this, dict_t *dict, int *op_errno)
+{
+    char *format = "(%s:%s)";
+    char *dump = NULL;
+
+    dump = GF_CALLOC(1, BR_STUB_DUMP_STR_SIZE, gf_br_stub_mt_misc);
+    if (!dump) {
+        *op_errno = ENOMEM;
+        goto out;
+    }
+    dict_dump_to_str(dict, dump, BR_STUB_DUMP_STR_SIZE, format);
+    gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+            "fsetxattr dump=%s", dump, NULL);
+out:
+    if (dump) {
+        GF_FREE(dump);
+    }
+    return;
+}
+
+int
+br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+                  int flags, dict_t *xdata)
+{
+    int32_t ret = 0;
+    uint32_t val = 0;
+    br_isignature_t *sign = NULL;
+    br_stub_private_t *priv = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    priv = this->private;
+
+    if ((frame->root->pid != GF_CLIENT_PID_BITD &&
+         frame->root->pid != GF_CLIENT_PID_SCRUB) &&
+        br_stub_internal_xattr(dict)) {
+        br_stub_dump_xattr(this, dict, &op_errno);
+        goto unwind;
+    }
+
+    if (!priv->do_versioning)
+        goto wind;
+
+    if (!IA_ISREG(fd->inode->ia_type))
+        goto wind;
+
+    /* object signature request */
+    ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
+    if (!ret) {
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
+                     uuid_utoa(fd->inode->gfid));
+        br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
+        goto done;
+    }
+
+    /* signing xattr */
+    if (dict_get(dict, BITROT_SIGNING_VERSION_KEY)) {
+        br_stub_handle_internal_xattr(frame, this, fd,
+                                      BITROT_SIGNING_VERSION_KEY);
+        goto done;
+    }
+
+    /* version xattr */
+    if (dict_get(dict, BITROT_CURRENT_VERSION_KEY)) {
+        br_stub_handle_internal_xattr(frame, this, fd,
+                                      BITROT_CURRENT_VERSION_KEY);
+        goto done;
+    }
+
+    if (dict_get(dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) {
+        br_stub_handle_internal_xattr(frame, this, fd,
+                                      GLUSTERFS_GET_OBJECT_SIGNATURE);
+        goto done;
+    }
+
+    /* object reopen request */
+    ret = dict_get_uint32(dict, BR_REOPEN_SIGN_HINT_KEY, &val);
+    if (!ret) {
+        br_stub_handle_object_reopen(frame, this, fd, val);
+        goto done;
+    }
+
+    /* handle bad object */
+    if (dict_get(dict, BITROT_OBJECT_BAD_KEY)) {
+        br_stub_handle_bad_object_key(frame, this, fd, dict, flags, xdata);
+        goto done;
+    }
+
+wind:
+    STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+    return 0;
+
+unwind:
+    STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+
+done:
+    return 0;
+}
+
+/**
+ * Currently BitD and scrubber are doing fsetxattr to either sign the object
+ * or to mark it as bad. Hence setxattr on any of those keys is denied directly
+ * without checking from where the fop is coming.
+ * Later, if BitD or Scrubber does setxattr of those keys, then appropriate
+ * check has to be added below.
+ */
+int
+br_stub_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+                 int flags, dict_t *xdata)
+{
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    if (br_stub_internal_xattr(dict)) {
+        br_stub_dump_xattr(this, dict, &op_errno);
+        goto unwind;
+    }
+
+    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+                    loc, dict, flags, xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* {f}removexattr() */
+
+int32_t
+br_stub_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                    const char *name, dict_t *xdata)
+{
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
+        !strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
+        !strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+                "name=%s", name, "file-path=%s", loc->path, NULL);
+        goto unwind;
+    }
+
+    STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+                    FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, NULL);
+    return 0;
+}
+
+int32_t
+br_stub_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                     const char *name, dict_t *xdata)
+{
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
+        !strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
+        !strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+                "name=%s", name, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+                NULL);
+        goto unwind;
+    }
+
+    STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+                    FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, NULL);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* {f}getxattr() */
+
+int
+br_stub_listxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                      int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+    if (op_ret < 0)
+        goto unwind;
+
+    br_stub_remove_vxattrs(xattr, _gf_true);
+
+unwind:
+    STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
+    return 0;
+}
+
+/**
+ * ONE SHOT CRAWLER from BitD signs the objects that it encounters while
+ * crawling, if the object is identified as stale by the stub. Stub follows
+ * the below logic to mark an object as stale or not.
+ * If the ongoing version and the signed_version match, then the object is not
+ * stale. Just return. Otherwise if they does not match, then it means one
+ * of the below things.
+ * 1) If the inode does not need write back of the version and the sign state is
+ *    is NORMAL, then some active i/o is going on the object. So skip it.
+ *    A notification will be sent to trigger the sign once the release is
+ *    received on the object.
+ * 2) If inode does not need writeback of the version and the sign state is
+ *    either reopen wait or quick sign, then it means:
+ *    A) BitD restarted and it is not sure whether the object it encountered
+ *       while crawling is in its timer wheel or not. Since there is no way to
+ *       scan the timer wheel as of now, ONE SHOT CRAWLER just goes ahead and
+ *       signs the object. Since the inode does not need writeback, version will
+ *       not be incremented and directly the object will be signed.
+ * 3) If the inode needs writeback, then it means the inode was forgotten after
+ *    the versioning and it has to be signed now.
+ *
+ * This is the algorithm followed:
+ * if (ongoing_version == signed_version); then
+ *     object_is_not_stale;
+ *     return;
+ * else; then
+ *      if (!inode_needs_writeback && inode_sign_state != NORMAL); then
+ *            object_is_stale;
+ *      if (inode_needs_writeback); then
+ *            object_is_stale;
+ *
+ * For SCRUBBER, no need to check for the sign state and inode writeback.
+ * If the ondisk ongoingversion and the ondisk signed version does not match,
+ * then treat the object as stale.
+ */
+char
+br_stub_is_object_stale(xlator_t *this, call_frame_t *frame, inode_t *inode,
+                        br_version_t *obuf, br_signature_t *sbuf)
+{
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    int32_t ret = -1;
+    char stale = 0;
+
+    if (obuf->ongoingversion == sbuf->signedversion)
+        goto out;
+
+    if (frame->root->pid == GF_CLIENT_PID_SCRUB) {
+        stale = 1;
+        goto out;
+    }
+
+    ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+                "gfid=%s", uuid_utoa(inode->gfid), NULL);
+        goto out;
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&inode->lock);
+    {
+        if ((!__br_stub_is_inode_dirty(ctx) &&
+             ctx->info_sign != BR_SIGN_NORMAL) ||
+            __br_stub_is_inode_dirty(ctx))
+            stale = 1;
+    }
+    UNLOCK(&inode->lock);
+
+out:
+    return stale;
+}
+
+int
+br_stub_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                     int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+    int32_t ret = 0;
+    size_t totallen = 0;
+    size_t signaturelen = 0;
+    br_stub_private_t *priv = NULL;
+    br_version_t *obuf = NULL;
+    br_signature_t *sbuf = NULL;
+    br_isignature_out_t *sign = NULL;
+    br_vxattr_status_t status;
+    br_stub_local_t *local = NULL;
+    inode_t *inode = NULL;
+    gf_boolean_t bad_object = _gf_false;
+    gf_boolean_t ver_enabled = _gf_false;
+
+    BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+    priv = this->private;
+
+    if (op_ret < 0)
+        goto unwind;
+    BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkeys);
+
+    if (cookie != (void *)BR_STUB_REQUEST_COOKIE)
+        goto unwind;
+
+    local = frame->local;
+    frame->local = NULL;
+    if (!local) {
+        op_ret = -1;
+        op_errno = EINVAL;
+        goto unwind;
+    }
+    inode = local->u.context.inode;
+
+    op_ret = -1;
+    status = br_version_xattr_state(xattr, &obuf, &sbuf, &bad_object);
+
+    op_errno = EIO;
+    if (bad_object)
+        goto delkeys;
+
+    op_errno = EINVAL;
+    if (status == BR_VXATTR_STATUS_INVALID)
+        goto delkeys;
+
+    op_errno = ENODATA;
+    if ((status == BR_VXATTR_STATUS_MISSING) ||
+        (status == BR_VXATTR_STATUS_UNSIGNED))
+        goto delkeys;
+
+    /**
+     * okay.. we have enough information to satisfy the request,
+     * namely: version and signing extended attribute. what's
+     * pending is the signature length -- that's figured out
+     * indirectly via the size of the _whole_ xattr and the
+     * on-disk signing xattr header size.
+     */
+    op_errno = EINVAL;
+    ret = dict_get_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY,
+                          (uint32_t *)&signaturelen);
+    if (ret)
+        goto delkeys;
+
+    signaturelen -= sizeof(br_signature_t);
+    totallen = sizeof(br_isignature_out_t) + signaturelen;
+
+    op_errno = ENOMEM;
+    sign = GF_CALLOC(1, totallen, gf_br_stub_mt_signature_t);
+    if (!sign)
+        goto delkeys;
+
+    sign->time[0] = obuf->timebuf[0];
+    sign->time[1] = obuf->timebuf[1];
+
+    /* Object's dirty state & current signed version */
+    sign->version = sbuf->signedversion;
+    sign->stale = br_stub_is_object_stale(this, frame, inode, obuf, sbuf);
+
+    /* Object's signature */
+    sign->signaturelen = signaturelen;
+    sign->signaturetype = sbuf->signaturetype;
+    (void)memcpy(sign->signature, sbuf->signature, signaturelen);
+
+    op_errno = EINVAL;
+    ret = dict_set_bin(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void *)sign,
+                       totallen);
+    if (ret < 0) {
+        GF_FREE(sign);
+        goto delkeys;
+    }
+    op_errno = 0;
+    op_ret = totallen;
+
+delkeys:
+    br_stub_remove_vxattrs(xattr, _gf_true);
+
+unwind:
+    STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+    return 0;
+}
+
+static void
+br_stub_send_stub_init_time(call_frame_t *frame, xlator_t *this)
+{
+    int op_ret = 0;
+    int op_errno = 0;
+    dict_t *xattr = NULL;
+    br_stub_init_t stub = {
+        {
+            0,
+        },
+    };
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    xattr = dict_new();
+    if (!xattr) {
+        op_ret = -1;
+        op_errno = ENOMEM;
+        goto unwind;
+    }
+
+    stub.timebuf[0] = priv->boot[0];
+    stub.timebuf[1] = priv->boot[1];
+    memcpy(stub.export, priv->export, strlen(priv->export) + 1);
+
+    op_ret = dict_set_static_bin(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+                                 (void *)&stub, sizeof(br_stub_init_t));
+    if (op_ret < 0) {
+        op_errno = EINVAL;
+        goto unwind;
+    }
+
+    op_ret = sizeof(br_stub_init_t);
+
+unwind:
+    STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, NULL);
+
+    if (xattr)
+        dict_unref(xattr);
+}
+
+int
+br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                 const char *name, dict_t *xdata)
+{
+    void *cookie = NULL;
+    static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+    fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    br_stub_local_t *local = NULL;
+    br_stub_private_t *priv = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+    if (!name) {
+        cbk = br_stub_listxattr_cbk;
+        goto wind;
+    }
+
+    if (br_stub_is_internal_xattr(name))
+        goto unwind;
+
+    priv = this->private;
+    BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+    /**
+     * If xattr is node-uuid and the inode is marked bad, return EIO.
+     * Returning EIO would result in AFR to choose correct node-uuid
+     * corresponding to the subvolume * where the good copy of the
+     * file resides.
+     */
+    if (IA_ISREG(loc->inode->ia_type) && XATTR_IS_NODE_UUID(name) &&
+        br_stub_check_bad_object(this, loc->inode, &op_ret, &op_errno)) {
+        goto unwind;
+    }
+
+    /**
+     * this special extended attribute is allowed only on root
+     */
+    if (name &&
+        (strncmp(name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+                 sizeof(GLUSTERFS_GET_BR_STUB_INIT_TIME) - 1) == 0) &&
+        ((gf_uuid_compare(loc->gfid, rootgfid) == 0) ||
+         (gf_uuid_compare(loc->inode->gfid, rootgfid) == 0))) {
+        BR_STUB_RESET_LOCAL_NULL(frame);
+        br_stub_send_stub_init_time(frame, this);
+        return 0;
+    }
+
+    if (!IA_ISREG(loc->inode->ia_type))
+        goto wind;
+
+    if (name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+                         sizeof(GLUSTERFS_GET_OBJECT_SIGNATURE) - 1) == 0)) {
+        cookie = (void *)BR_STUB_REQUEST_COOKIE;
+
+        local = br_stub_alloc_local(this);
+        if (!local) {
+            op_ret = -1;
+            op_errno = ENOMEM;
+            goto unwind;
+        }
+
+        br_stub_fill_local(local, NULL, NULL, loc->inode, loc->inode->gfid,
+                           BR_STUB_NO_VERSIONING, 0);
+        frame->local = local;
+    }
+
+wind:
+    STACK_WIND_COOKIE(frame, cbk, cookie, FIRST_CHILD(this),
+                      FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+    return 0;
+unwind:
+    BR_STUB_RESET_LOCAL_NULL(frame);
+    STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, NULL, NULL);
+    return 0;
+}
+
+int
+br_stub_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                  const char *name, dict_t *xdata)
+{
+    void *cookie = NULL;
+    static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+    fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    br_stub_local_t *local = NULL;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (!name) {
+        cbk = br_stub_listxattr_cbk;
+        goto wind;
+    }
+
+    if (br_stub_is_internal_xattr(name))
+        goto unwind;
+
+    BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+    /**
+     * If xattr is node-uuid and the inode is marked bad, return EIO.
+     * Returning EIO would result in AFR to choose correct node-uuid
+     * corresponding to the subvolume * where the good copy of the
+     * file resides.
+     */
+    if (IA_ISREG(fd->inode->ia_type) && XATTR_IS_NODE_UUID(name) &&
+        br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno)) {
+        goto unwind;
+    }
+
+    /**
+     * this special extended attribute is allowed only on root
+     */
+    if (name &&
+        (strncmp(name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+                 sizeof(GLUSTERFS_GET_BR_STUB_INIT_TIME) - 1) == 0) &&
+        (gf_uuid_compare(fd->inode->gfid, rootgfid) == 0)) {
+        BR_STUB_RESET_LOCAL_NULL(frame);
+        br_stub_send_stub_init_time(frame, this);
+        return 0;
+    }
+
+    if (!IA_ISREG(fd->inode->ia_type))
+        goto wind;
+
+    if (name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+                         sizeof(GLUSTERFS_GET_OBJECT_SIGNATURE) - 1) == 0)) {
+        cookie = (void *)BR_STUB_REQUEST_COOKIE;
+
+        local = br_stub_alloc_local(this);
+        if (!local) {
+            op_ret = -1;
+            op_errno = ENOMEM;
+            goto unwind;
+        }
+
+        br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+                           BR_STUB_NO_VERSIONING, 0);
+        frame->local = local;
+    }
+
+wind:
+    STACK_WIND_COOKIE(frame, cbk, cookie, FIRST_CHILD(this),
+                      FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+    return 0;
+unwind:
+    BR_STUB_RESET_LOCAL_NULL(frame);
+    STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+    return 0;
+}
+
+int32_t
+br_stub_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+              off_t offset, uint32_t flags, dict_t *xdata)
+{
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    int32_t ret = -1;
+    br_stub_private_t *priv = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+    priv = this->private;
+    if (!priv->do_versioning)
+        goto wind;
+
+    ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+wind:
+    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+                    fd, size, offset, flags, xdata);
+    return 0;
+
+unwind:
+    STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
+                        NULL);
+    return 0;
+}
+
+/**
+ * The first write response on the first fd in the list of fds will set
+ * the flag to indicate that the inode is modified. The subsequent write
+ * respnses coming on either the first fd or some other fd will not change
+ * the fd. The inode-modified flag is unset only upon release of all the
+ * fds.
+ */
+int32_t
+br_stub_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                   int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+                   struct iatt *postbuf, dict_t *xdata)
+{
+    int32_t ret = 0;
+    br_stub_local_t *local = NULL;
+
+    local = frame->local;
+    frame->local = NULL;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    ret = br_stub_mark_inode_modified(this, local);
+    if (ret) {
+        op_ret = -1;
+        op_errno = EINVAL;
+    }
+
+unwind:
+    STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+                        xdata);
+
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+
+    return 0;
+}
+
+int32_t
+br_stub_writev_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                      struct iovec *vector, int32_t count, off_t offset,
+                      uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+    STACK_WIND(frame, br_stub_writev_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+               flags, iobref, xdata);
+    return 0;
+}
+
+/**
+ * This is probably the most crucial part about the whole versioning thing.
+ * There's absolutely no differentiation as such between an anonymous fd
+ * and a regular fd except the fd context initialization. Object versioning
+ * is performed when the inode is dirty. Parallel write operations are no
+ * special with each write performing object versioning followed by marking
+ * the inode as non-dirty (synced). This is followed by the actual operation
+ * (writev() in this case) which on a success marks the inode as modified.
+ * This prevents signing of objects that have not been modified.
+ */
+int32_t
+br_stub_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+               struct iovec *vector, int32_t count, off_t offset,
+               uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+    call_stub_t *stub = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    gf_boolean_t inc_version = _gf_false;
+    gf_boolean_t modified = _gf_false;
+    br_stub_inode_ctx_t *ctx = NULL;
+    int32_t ret = -1;
+    fop_writev_cbk_t cbk = default_writev_cbk;
+    br_stub_local_t *local = NULL;
+    br_stub_private_t *priv = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+
+    priv = this->private;
+    if (!priv->do_versioning)
+        goto wind;
+
+    ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+    if (ret)
+        goto unwind;
+
+    ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+    /**
+     * The inode is not dirty and also witnessed at least one successful
+     * modification operation. Therefore, subsequent operations need not
+     * perform any special tracking.
+     */
+    if (!inc_version && modified)
+        goto wind;
+
+    /**
+     * okay.. so, either the inode needs versioning or the modification
+     * needs to be tracked. ->cbk is set to the appropriate callback
+     * routine for this.
+     * NOTE: ->local needs to be deallocated on failures from here on.
+     */
+    ret = br_stub_versioning_prep(frame, this, fd, ctx);
+    if (ret)
+        goto unwind;
+
+    local = frame->local;
+    if (!inc_version) {
+        br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+                           BR_STUB_NO_VERSIONING, 0);
+        cbk = br_stub_writev_cbk;
+        goto wind;
+    }
+
+    stub = fop_writev_stub(frame, br_stub_writev_resume, fd, vector, count,
+                           offset, flags, iobref, xdata);
+
+    if (!stub) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+                "write  gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto cleanup_local;
+    }
+
+    /* Perform Versioning */
+    return br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+
+wind:
+    STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+               fd, vector, count, offset, flags, iobref, xdata);
+    return 0;
+
+cleanup_local:
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+
+unwind:
+    frame->local = NULL;
+    STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+    return 0;
+}
+
+int32_t
+br_stub_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                      int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+                      struct iatt *postbuf, dict_t *xdata)
+{
+    int32_t ret = -1;
+    br_stub_local_t *local = NULL;
+
+    local = frame->local;
+    frame->local = NULL;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    ret = br_stub_mark_inode_modified(this, local);
+    if (ret) {
+        op_ret = -1;
+        op_errno = EINVAL;
+    }
+
+unwind:
+    STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+                        xdata);
+
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+
+    return 0;
+}
+
+int32_t
+br_stub_ftruncate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                         off_t offset, dict_t *xdata)
+{
+    STACK_WIND(frame, br_stub_ftruncate_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+    return 0;
+}
+
+/* c.f. br_stub_writev() for explanation */
+int32_t
+br_stub_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+                  dict_t *xdata)
+{
+    br_stub_local_t *local = NULL;
+    call_stub_t *stub = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    gf_boolean_t inc_version = _gf_false;
+    gf_boolean_t modified = _gf_false;
+    br_stub_inode_ctx_t *ctx = NULL;
+    int32_t ret = -1;
+    fop_ftruncate_cbk_t cbk = default_ftruncate_cbk;
+    br_stub_private_t *priv = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+
+    priv = this->private;
+    if (!priv->do_versioning)
+        goto wind;
+
+    ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+    if (ret)
+        goto unwind;
+
+    ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+    if (!inc_version && modified)
+        goto wind;
+
+    ret = br_stub_versioning_prep(frame, this, fd, ctx);
+    if (ret)
+        goto unwind;
+
+    local = frame->local;
+    if (!inc_version) {
+        br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+                           BR_STUB_NO_VERSIONING, 0);
+        cbk = br_stub_ftruncate_cbk;
+        goto wind;
+    }
+
+    stub = fop_ftruncate_stub(frame, br_stub_ftruncate_resume, fd, offset,
+                              xdata);
+    if (!stub) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+                "ftruncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto cleanup_local;
+    }
+
+    return br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+
+wind:
+    STACK_WIND(frame, cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+    return 0;
+
+cleanup_local:
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+
+unwind:
+    frame->local = NULL;
+    STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+    return 0;
+}
+
+int32_t
+br_stub_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+                     struct iatt *postbuf, dict_t *xdata)
+{
+    int32_t ret = 0;
+    br_stub_local_t *local = NULL;
+
+    local = frame->local;
+    frame->local = NULL;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    ret = br_stub_mark_inode_modified(this, local);
+    if (ret) {
+        op_ret = -1;
+        op_errno = EINVAL;
+    }
+
+unwind:
+    STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+                        xdata);
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+    return 0;
+}
+
+int32_t
+br_stub_truncate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                        off_t offset, dict_t *xdata)
+{
+    br_stub_local_t *local = frame->local;
+
+    fd_unref(local->u.context.fd);
+    STACK_WIND(frame, br_stub_ftruncate_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+    return 0;
+}
+
+/**
+ * Bit-rot-stub depends heavily on the fd based operations to for doing
+ * versioning and sending notification. It starts tracking the operation
+ * upon getting first fd based modify operation by doing versioning and
+ * sends notification when last fd using which the inode was modified is
+ * released.
+ * But for truncate there is no fd and hence it becomes difficult to do
+ * the versioning and send notification. It is handled by doing versioning
+ * on an anonymous fd. The fd will be valid till the completion of the
+ * truncate call. It guarantees that release on this anonymous fd will happen
+ * after the truncate call and notification is sent after the truncate call.
+ *
+ * c.f. br_writev_cbk() for explanation
+ */
+int32_t
+br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+                 dict_t *xdata)
+{
+    br_stub_local_t *local = NULL;
+    call_stub_t *stub = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    gf_boolean_t inc_version = _gf_false;
+    gf_boolean_t modified = _gf_false;
+    br_stub_inode_ctx_t *ctx = NULL;
+    int32_t ret = -1;
+    fd_t *fd = NULL;
+    fop_truncate_cbk_t cbk = default_truncate_cbk;
+    br_stub_private_t *priv = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+    priv = this->private;
+    if (!priv->do_versioning)
+        goto wind;
+
+    fd = fd_anonymous(loc->inode);
+    if (!fd) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED,
+                "inode-gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+        goto unwind;
+    }
+
+    ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+    if (ret)
+        goto cleanup_fd;
+
+    ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+    if (!inc_version && modified)
+        goto wind;
+
+    ret = br_stub_versioning_prep(frame, this, fd, ctx);
+    if (ret)
+        goto cleanup_fd;
+
+    local = frame->local;
+    if (!inc_version) {
+        br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+                           BR_STUB_NO_VERSIONING, 0);
+        cbk = br_stub_truncate_cbk;
+        goto wind;
+    }
+
+    stub = fop_truncate_stub(frame, br_stub_truncate_resume, loc, offset,
+                             xdata);
+    if (!stub) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+                "truncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto cleanup_local;
+    }
+
+    return br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+
+wind:
+    STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+               loc, offset, xdata);
+    if (fd)
+        fd_unref(fd);
+    return 0;
+
+cleanup_local:
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+cleanup_fd:
+    fd_unref(fd);
+unwind:
+    frame->local = NULL;
+    STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* open() */
+
+/**
+ * It's probably worth mentioning a bit about why some of the housekeeping
+ * work is done in open() call path, rather than the callback path.
+ * Two (or more) open()'s in parallel can race and lead to a situation
+ * where a release() gets triggered (possibly after a series of write()
+ * calls) when *other* open()'s have still not reached callback path
+ * thereby having an active fd on an inode that is in process of getting
+ * signed with the current version.
+ *
+ * Maintaining fd list in the call path ensures that a release() would
+ * not be triggered if an open() call races ahead (followed by a close())
+ * threby finding non-empty fd list.
+ */
+
+int
+br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+             fd_t *fd, dict_t *xdata)
+{
+    int32_t ret = -1;
+    br_stub_inode_ctx_t *ctx = NULL;
+    uint64_t ctx_addr = 0;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    br_stub_private_t *priv = NULL;
+    unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+    priv = this->private;
+
+    if (!priv->do_versioning)
+        goto wind;
+
+    ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+    if (ret) {
+        ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
+                                          _gf_true, _gf_false, &ctx_addr);
+        if (ret) {
+            gf_smsg(this->name, GF_LOG_ERROR, 0,
+                    BRS_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+                    "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+            goto unwind;
+        }
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+    if (frame->root->pid == GF_CLIENT_PID_SCRUB)
+        goto wind;
+
+    if (flags == O_RDONLY)
+        goto wind;
+
+    ret = br_stub_add_fd_to_inode(this, fd, ctx);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto unwind;
+    }
+
+wind:
+    STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, NULL, NULL);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* creat() */
+
+/**
+ * This routine registers a release callback for the given fd and adds the
+ * fd to the inode context fd tracking list.
+ */
+int32_t
+br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+    int32_t ret = -1;
+    br_stub_fd_t *br_stub_fd = NULL;
+
+    ret = br_stub_require_release_call(this, fd, &br_stub_fd);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED,
+                "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+        goto out;
+    }
+
+    LOCK(&fd->inode->lock);
+    {
+        list_add_tail(&ctx->fd_list, &br_stub_fd->list);
+    }
+    UNLOCK(&fd->inode->lock);
+
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int
+br_stub_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                   int op_ret, int op_errno, fd_t *fd, inode_t *inode,
+                   struct iatt *stbuf, struct iatt *preparent,
+                   struct iatt *postparent, dict_t *xdata)
+{
+    int32_t ret = 0;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    if (!priv->do_versioning)
+        goto unwind;
+
+    ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+    if (ret < 0) {
+        ret = br_stub_init_inode_versions(this, fd, inode, version, _gf_true,
+                                          _gf_false, &ctx_addr);
+        if (ret) {
+            op_ret = -1;
+            op_errno = EINVAL;
+        }
+    } else {
+        ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+        ret = br_stub_add_fd_to_inode(this, fd, ctx);
+    }
+
+unwind:
+    STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
+                        preparent, postparent, xdata);
+    return 0;
+}
+
+int
+br_stub_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+               mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+    STACK_WIND(frame, br_stub_create_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+               xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+                        NULL);
+    return 0;
+}
+
+int
+br_stub_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+                  int op_errno, inode_t *inode, struct iatt *stbuf,
+                  struct iatt *preparent, struct iatt *postparent,
+                  dict_t *xdata)
+{
+    int32_t ret = -1;
+    unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    if (!priv->do_versioning)
+        goto unwind;
+
+    ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
+                                      _gf_false, NULL);
+    /**
+     * Like lookup, if init_inode_versions fail, return EINVAL
+     */
+    if (ret) {
+        op_ret = -1;
+        op_errno = EINVAL;
+    }
+
+unwind:
+    STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, stbuf, preparent,
+                        postparent, xdata);
+    return 0;
+}
+
+int
+br_stub_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+              dev_t dev, mode_t umask, dict_t *xdata)
+{
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+    STACK_WIND(frame, br_stub_mknod_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata);
+    return 0;
+unwind:
+    STACK_UNWIND_STRICT(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL);
+    return 0;
+}
+
+/** }}} */
+
+/**
+ * As of now, only lookup searches for bad object xattr and marks the
+ * object as bad in its inode context if the xattr is present. But there
+ * is a possibility that, at the time of the lookup the object was not
+ * marked bad (i.e. bad object xattr was not set), and later its marked
+ * as bad. In this case, object is not bad, so when a fop such as open or
+ * readv or writev comes on the object, the fop will be sent downward instead
+ * of sending as error upwards.
+ * The solution for this is to do a getxattr for the below list of fops.
+ * lookup, readdirp, open, readv, writev.
+ * But doing getxattr for each of the above fops might be costly.
+ * So another method followed is to catch the bad file marking by the scrubber
+ * and set that info within the object's inode context. In this way getxattr
+ * calls can be avoided and bad objects can be caught instantly. Fetching the
+ * xattr is needed only in lookups when there is a brick restart or inode
+ * forget.
+ *
+ * If the dict (@xattr) is NULL, then how should that be handled? Fail the
+ * lookup operation? Or let it continue with version being initialized to
+ * BITROT_DEFAULT_CURRENT_VERSION. But what if the version was different
+ * on disk (and also a right signature was there), but posix failed to
+ * successfully allocate the dict? Posix does not treat call back xdata
+ * creattion failure as the lookup failure.
+ */
+static int32_t
+br_stub_lookup_version(xlator_t *this, uuid_t gfid, inode_t *inode,
+                       dict_t *xattr)
+{
+    unsigned long version = 0;
+    br_version_t *obuf = NULL;
+    br_signature_t *sbuf = NULL;
+    br_vxattr_status_t status;
+    gf_boolean_t bad_object = _gf_false;
+
+    /**
+     * versioning xattrs were requested from POSIX. if available, figure
+     * out the correct version to use in the inode context (start with
+     * the default version if unavailable). As of now versions are not
+     * persisted on-disk. The inode is marked dirty, so that the first
+     * operation (such as write(), etc..) triggers synchronization to
+     * disk.
+     */
+    status = br_version_xattr_state(xattr, &obuf, &sbuf, &bad_object);
+    version = ((status == BR_VXATTR_STATUS_FULL) ||
+               (status == BR_VXATTR_STATUS_UNSIGNED))
+                  ? obuf->ongoingversion
+                  : BITROT_DEFAULT_CURRENT_VERSION;
+
+    /**
+     * If signature is there, but version is not there then that status is
+     * is treated as INVALID. So in that case, we should not initialize the
+     * inode context with wrong version names etc.
+     */
+    if (status == BR_VXATTR_STATUS_INVALID)
+        return -1;
+
+    return br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
+                                       bad_object, NULL);
+}
+
+/** {{{ */
+
+int32_t
+br_stub_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+                dict_t *xdata)
+{
+    br_stub_private_t *priv = NULL;
+    br_stub_fd_t *fd_ctx = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+
+    priv = this->private;
+    if (gf_uuid_compare(fd->inode->gfid, priv->bad_object_dir_gfid))
+        goto normal;
+
+    fd_ctx = br_stub_fd_new();
+    if (!fd_ctx) {
+        op_errno = ENOMEM;
+        goto unwind;
+    }
+
+    fd_ctx->bad_object.dir_eof = -1;
+    fd_ctx->bad_object.dir = sys_opendir(priv->stub_basepath);
+    if (!fd_ctx->bad_object.dir) {
+        op_errno = errno;
+        goto err_freectx;
+    }
+
+    op_ret = br_stub_fd_ctx_set(this, fd, fd_ctx);
+    if (!op_ret)
+        goto unwind;
+
+    sys_closedir(fd_ctx->bad_object.dir);
+
+err_freectx:
+    GF_FREE(fd_ctx);
+unwind:
+    STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL);
+    return 0;
+
+normal:
+    STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+    return 0;
+}
+
+int32_t
+br_stub_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+                off_t off, dict_t *xdata)
+{
+    call_stub_t *stub = NULL;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+    if (!priv->do_versioning)
+        goto out;
+
+    if (gf_uuid_compare(fd->inode->gfid, priv->bad_object_dir_gfid))
+        goto out;
+    stub = fop_readdir_stub(frame, br_stub_readdir_wrapper, fd, size, off,
+                            xdata);
+    if (!stub) {
+        STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+        return 0;
+    }
+    br_stub_worker_enqueue(this, stub);
+    return 0;
+out:
+    STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+    return 0;
+}
+
+int
+br_stub_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                     int op_ret, int op_errno, gf_dirent_t *entries,
+                     dict_t *dict)
+{
+    int32_t ret = 0;
+    uint64_t ctxaddr = 0;
+    gf_dirent_t *entry = NULL;
+    br_stub_private_t *priv = NULL;
+    gf_boolean_t ver_enabled = _gf_false;
+
+    BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+    priv = this->private;
+    BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), unwind);
+
+    if (op_ret < 0)
+        goto unwind;
+
+    list_for_each_entry(entry, &entries->list, list)
+    {
+        if ((strcmp(entry->d_name, ".") == 0) ||
+            (strcmp(entry->d_name, "..") == 0))
+            continue;
+
+        if (!IA_ISREG(entry->d_stat.ia_type))
+            continue;
+
+        /*
+         * Readdirp for most part is a bulk lookup for all the entries
+         * present in the directory being read. Ideally, for each
+         * entry, the handling should be similar to that of a lookup
+         * callback. But for now, just keeping this as it has been
+         * until now (which means, this comment has been added much
+         * later as part of a change that wanted to send the flag
+         * of true/false to br_stub_remove_vxattrs to indicate whether
+         * the bad-object xattr should be removed from the entry->dict
+         * or not). Until this change, the function br_stub_remove_vxattrs
+         * was just removing all the xattrs associated with bit-rot-stub
+         * (like version, bad-object, signature etc). But, there are
+         * scenarios where we only want to send bad-object xattr and not
+         * others. So this comment is part of that change which also
+         * mentions about another possible change that might be needed
+         * in future.
+         * But for now, adding _gf_true means functionally its same as
+         * what this function was doing before. Just remove all the stub
+         * related xattrs.
+         */
+        ret = br_stub_get_inode_ctx(this, entry->inode, &ctxaddr);
+        if (ret < 0)
+            ctxaddr = 0;
+        if (ctxaddr) { /* already has the context */
+            br_stub_remove_vxattrs(entry->dict, _gf_true);
+            continue;
+        }
+
+        ret = br_stub_lookup_version(this, entry->inode->gfid, entry->inode,
+                                     entry->dict);
+        br_stub_remove_vxattrs(entry->dict, _gf_true);
+        if (ret) {
+            /**
+             * there's no per-file granularity support in case of
+             * failure. let's fail the entire request for now..
+             */
+            break;
+        }
+    }
+
+    if (ret) {
+        op_ret = -1;
+        op_errno = EINVAL;
+    }
+
+unwind:
+    STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, dict);
+
+    return 0;
+}
+
+int
+br_stub_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+                 off_t offset, dict_t *dict)
+{
+    int32_t ret = -1;
+    int op_errno = 0;
+    gf_boolean_t xref = _gf_false;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+    BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+    op_errno = ENOMEM;
+    if (!dict) {
+        dict = dict_new();
+        if (!dict)
+            goto unwind;
+    } else {
+        dict = dict_ref(dict);
+    }
+
+    xref = _gf_true;
+
+    op_errno = EINVAL;
+    ret = dict_set_uint32(dict, BITROT_CURRENT_VERSION_KEY, 0);
+    if (ret)
+        goto unwind;
+    ret = dict_set_uint32(dict, BITROT_SIGNING_VERSION_KEY, 0);
+    if (ret)
+        goto unwind;
+    ret = dict_set_uint32(dict, BITROT_OBJECT_BAD_KEY, 0);
+    if (ret)
+        goto unwind;
+
+wind:
+    STACK_WIND(frame, br_stub_readdirp_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+    goto unref_dict;
+
+unwind:
+    if (frame->local == (void *)0x1)
+        frame->local = NULL;
+    STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
+    return 0;
+
+unref_dict:
+    if (xref)
+        dict_unref(dict);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* lookup() */
+
+/**
+ * This function mainly handles the ENOENT error for the bad objects. Though
+ * br_stub_forget () handles removal of the link for the bad object from the
+ * quarantine directory, its better to handle it in lookup as well, where
+ * a failed lookup on a bad object with ENOENT, will trigger deletion of the
+ * link for the bad object from quarantine directory. So whoever comes first
+ * either forget () or lookup () will take care of removing the link.
+ */
+void
+br_stub_handle_lookup_error(xlator_t *this, inode_t *inode, int32_t op_errno)
+{
+    int32_t ret = -1;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    if (op_errno != ENOENT)
+        goto out;
+
+    if (!inode_is_linked(inode))
+        goto out;
+
+    ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+    if (ret)
+        goto out;
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&inode->lock);
+    {
+        if (__br_stub_is_bad_object(ctx))
+            (void)br_stub_del(this, inode->gfid);
+    }
+    UNLOCK(&inode->lock);
+
+    if (__br_stub_is_bad_object(ctx)) {
+        /* File is not present, might be deleted for recovery,
+         * del the bitrot inode context
+         */
+        ctx_addr = 0;
+        inode_ctx_del(inode, this, &ctx_addr);
+        if (ctx_addr) {
+            ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+            GF_FREE(ctx);
+        }
+    }
+
+out:
+    return;
+}
+
+int
+br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                   int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+                   dict_t *xattr, struct iatt *postparent)
+{
+    int32_t ret = 0;
+    br_stub_private_t *priv = NULL;
+    gf_boolean_t ver_enabled = _gf_false;
+    gf_boolean_t remove_bad_file_marker = _gf_true;
+
+    BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+    priv = this->private;
+
+    if (op_ret < 0) {
+        (void)br_stub_handle_lookup_error(this, inode, op_errno);
+
+        /*
+         * If the lookup error is not ENOENT, then it is better
+         * to send the bad file marker to the higher layer (if
+         * it has been set)
+         */
+        if (op_errno != ENOENT)
+            remove_bad_file_marker = _gf_false;
+        goto delkey;
+    }
+
+    BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkey);
+
+    if (!IA_ISREG(stbuf->ia_type))
+        goto unwind;
+
+    /**
+     * If the object is bad, then "bad inode" marker has to be sent back
+     * in resoinse, for revalidated lookups as well. Some xlators such as
+     * quick-read might cache the data in revalidated lookup as fresh
+     * lookup would anyway have sent "bad inode" marker.
+     * In general send bad inode marker for every lookup operation on the
+     * bad object.
+     */
+    if (cookie != (void *)BR_STUB_REQUEST_COOKIE) {
+        ret = br_stub_mark_xdata_bad_object(this, inode, xattr);
+        if (ret) {
+            op_ret = -1;
+            op_errno = EIO;
+            /*
+             * This flag ensures that in the label @delkey below,
+             * bad file marker is not removed from the dictinary,
+             * but other virtual xattrs (such as version, signature)
+             * are removed.
+             */
+            remove_bad_file_marker = _gf_false;
+        }
+        goto delkey;
+    }
+
+    ret = br_stub_lookup_version(this, stbuf->ia_gfid, inode, xattr);
+    if (ret < 0) {
+        op_ret = -1;
+        op_errno = EINVAL;
+        goto delkey;
+    }
+
+    /**
+     * If the object is bad, send "bad inode" marker back in response
+     * for xlator(s) to act accordingly (such as quick-read, etc..)
+     */
+    ret = br_stub_mark_xdata_bad_object(this, inode, xattr);
+    if (ret) {
+        /**
+         * aaha! bad object, but sorry we would not
+         * satisfy the request on allocation failures.
+         */
+        op_ret = -1;
+        op_errno = EIO;
+        goto delkey;
+    }
+
+delkey:
+    br_stub_remove_vxattrs(xattr, remove_bad_file_marker);
+unwind:
+    STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+                        postparent);
+
+    return 0;
+}
+
+int
+br_stub_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+    int32_t ret = 0;
+    int op_errno = 0;
+    void *cookie = NULL;
+    uint64_t ctx_addr = 0;
+    gf_boolean_t xref = _gf_false;
+    br_stub_private_t *priv = NULL;
+    call_stub_t *stub = NULL;
+
+    GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+    GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+    priv = this->private;
+
+    BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+    if (!gf_uuid_compare(loc->gfid, priv->bad_object_dir_gfid) ||
+        !gf_uuid_compare(loc->pargfid, priv->bad_object_dir_gfid)) {
+        stub = fop_lookup_stub(frame, br_stub_lookup_wrapper, loc, xdata);
+        if (!stub) {
+            op_errno = ENOMEM;
+            goto unwind;
+        }
+        br_stub_worker_enqueue(this, stub);
+        return 0;
+    }
+
+    ret = br_stub_get_inode_ctx(this, loc->inode, &ctx_addr);
+    if (ret < 0)
+        ctx_addr = 0;
+    if (ctx_addr != 0)
+        goto wind;
+
+    /**
+     * fresh lookup: request version keys from POSIX
+     */
+    op_errno = ENOMEM;
+    if (!xdata) {
+        xdata = dict_new();
+        if (!xdata)
+            goto unwind;
+    } else {
+        xdata = dict_ref(xdata);
+    }
+
+    xref = _gf_true;
+
+    /**
+     * Requesting both xattrs provides a way of sanity checking the
+     * object. Anomaly checking is done in cbk by examining absence
+     * of either or both xattrs.
+     */
+    op_errno = EINVAL;
+    ret = dict_set_uint32(xdata, BITROT_CURRENT_VERSION_KEY, 0);
+    if (ret)
+        goto unwind;
+    ret = dict_set_uint32(xdata, BITROT_SIGNING_VERSION_KEY, 0);
+    if (ret)
+        goto unwind;
+    ret = dict_set_uint32(xdata, BITROT_OBJECT_BAD_KEY, 0);
+    if (ret)
+        goto unwind;
+    cookie = (void *)BR_STUB_REQUEST_COOKIE;
+
+wind:
+    STACK_WIND_COOKIE(frame, br_stub_lookup_cbk, cookie, FIRST_CHILD(this),
+                      FIRST_CHILD(this)->fops->lookup, loc, xdata);
+    goto dealloc_dict;
+
+unwind:
+    if (frame->local == (void *)0x1)
+        frame->local = NULL;
+    STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+dealloc_dict:
+    if (xref)
+        dict_unref(xdata);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* stat */
+int
+br_stub_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+    int32_t ret = 0;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (!priv->do_versioning)
+        goto wind;
+
+    if (!IA_ISREG(loc->inode->ia_type))
+        goto wind;
+
+    ret = br_stub_check_bad_object(this, loc->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+wind:
+    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+                    loc, xdata);
+    return 0;
+
+unwind:
+    STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, NULL, NULL);
+    return 0;
+}
+
+/* fstat */
+int
+br_stub_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+    int32_t ret = 0;
+    int32_t op_ret = -1;
+    int32_t op_errno = EINVAL;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+
+    if (!priv->do_versioning)
+        goto wind;
+
+    if (!IA_ISREG(fd->inode->ia_type))
+        goto wind;
+
+    ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+    if (ret)
+        goto unwind;
+
+wind:
+    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+                    fd, xdata);
+    return 0;
+
+unwind:
+    STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, NULL, NULL);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* unlink() */
+
+int
+br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                   int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+                   struct iatt *postparent, dict_t *xdata)
+{
+    br_stub_local_t *local = NULL;
+    inode_t *inode = NULL;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    int32_t ret = -1;
+    br_stub_private_t *priv = NULL;
+    gf_boolean_t ver_enabled = _gf_false;
+
+    BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+    priv = this->private;
+    BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), unwind);
+
+    local = frame->local;
+    frame->local = NULL;
+
+    if (op_ret < 0)
+        goto unwind;
+
+    if (!local) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, NULL);
+        goto unwind;
+    }
+    inode = local->u.context.inode;
+    if (!IA_ISREG(inode->ia_type))
+        goto unwind;
+
+    ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+    if (ret) {
+        /**
+         * If the inode is bad AND context is not there, then there
+         * is a possibility of the gfid of the object being listed
+         * in the quarantine directory and will be shown in the
+         * bad objects list. So continuing with the fop with a
+         * warning log. The entry from the quarantine directory
+         * has to be removed manually. Its not a good idea to fail
+         * the fop, as the object has already been deleted.
+         */
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+                "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
+        goto unwind;
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&inode->lock);
+    {
+        /**
+         * Ignoring the return value of br_stub_del ().
+         * There is not much that can be done if unlinking
+         * of the entry in the quarantine directory fails.
+         * The failure is logged.
+         */
+        if (__br_stub_is_bad_object(ctx))
+            (void)br_stub_del(this, inode->gfid);
+    }
+    UNLOCK(&inode->lock);
+
+unwind:
+    STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+                        xdata);
+    br_stub_cleanup_local(local);
+    br_stub_dealloc_local(local);
+    return 0;
+}
+
+int
+br_stub_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+               dict_t *xdata)
+{
+    br_stub_local_t *local = NULL;
+    int32_t op_ret = -1;
+    int32_t op_errno = 0;
+    br_stub_private_t *priv = NULL;
+
+    priv = this->private;
+    BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+    local = br_stub_alloc_local(this);
+    if (!local) {
+        op_ret = -1;
+        op_errno = ENOMEM;
+        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_ALLOC_MEM_FAILED,
+                "local path=%s", loc->path, "gfid=%s",
+                uuid_utoa(loc->inode->gfid), NULL);
+        goto unwind;
+    }
+
+    br_stub_fill_local(local, NULL, NULL, loc->inode, loc->inode->gfid,
+                       BR_STUB_NO_VERSIONING, 0);
+
+    frame->local = local;
+
+wind:
+    STACK_WIND(frame, br_stub_unlink_cbk, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->unlink, loc, flag, xdata);
+    return 0;
+
+unwind:
+    if (frame->local == (void *)0x1)
+        frame->local = NULL;
+    STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* forget() */
+
+int
+br_stub_forget(xlator_t *this, inode_t *inode)
+{
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    inode_ctx_del(inode, this, &ctx_addr);
+    if (!ctx_addr)
+        return 0;
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    GF_FREE(ctx);
+
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+int32_t
+br_stub_noop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+             int32_t op_errno, dict_t *xdata)
+{
+    STACK_DESTROY(frame->root);
+    return 0;
+}
+
+static void
+br_stub_send_ipc_fop(xlator_t *this, fd_t *fd, unsigned long releaseversion,
+                     int sign_info)
+{
+    int32_t op = 0;
+    int32_t ret = 0;
+    dict_t *xdata = NULL;
+    call_frame_t *frame = NULL;
+    changelog_event_t ev = {
+        0,
+    };
+
+    ev.ev_type = CHANGELOG_OP_TYPE_BR_RELEASE;
+    ev.u.releasebr.version = releaseversion;
+    ev.u.releasebr.sign_info = sign_info;
+    gf_uuid_copy(ev.u.releasebr.gfid, fd->inode->gfid);
+
+    xdata = dict_new();
+    if (!xdata) {
+        gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_DICT_ALLOC_FAILED,
+                NULL);
+        goto out;
+    }
+
+    ret = dict_set_static_bin(xdata, "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, NULL);
+        goto dealloc_dict;
+    }
+
+    frame = create_frame(this, this->ctx->pool);
+    if (!frame) {
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED,
+                NULL);
+        goto dealloc_dict;
+    }
+
+    op = GF_IPC_TARGET_CHANGELOG;
+    STACK_WIND(frame, br_stub_noop, FIRST_CHILD(this),
+               FIRST_CHILD(this)->fops->ipc, op, xdata);
+
+dealloc_dict:
+    dict_unref(xdata);
+out:
+    return;
+}
+
+/**
+ * This is how the state machine of sign info works:
+ * 3 states:
+ * 1) BR_SIGN_NORMAL => The default State of the inode
+ * 2) BR_SIGN_REOPEN_WAIT => A release has been sent and is waiting for reopen
+ * 3) BR_SIGN_QUICK => reopen has happened and this release should trigger sign
+ * 2 events:
+ * 1) GF_FOP_RELEASE
+ * 2) GF_FOP_WRITE (actually a dummy write for BitD)
+ *
+ * This is how states are changed based on events:
+ * EVENT: GF_FOP_RELEASE:
+ * if (state == BR_SIGN_NORMAL) ; then
+ *     set state = BR_SIGN_REOPEN_WAIT;
+ * if (state == BR_SIGN_QUICK); then
+ *     set state = BR_SIGN_NORMAL;
+ * EVENT: GF_FOP_WRITE:
+ *  if (state == BR_SIGN_REOPEN_WAIT); then
+ *     set state = BR_SIGN_QUICK;
+ */
+br_sign_state_t
+__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop,
+                           fd_t *fd)
+{
+    br_sign_state_t sign_info = BR_SIGN_INVALID;
+
+    switch (fop) {
+        case GF_FOP_FSETXATTR:
+            sign_info = ctx->info_sign = BR_SIGN_QUICK;
+            break;
+
+        case GF_FOP_RELEASE:
+            GF_ASSERT(ctx->info_sign != BR_SIGN_REOPEN_WAIT);
+
+            if (ctx->info_sign == BR_SIGN_NORMAL) {
+                sign_info = ctx->info_sign = BR_SIGN_REOPEN_WAIT;
+            } else {
+                sign_info = ctx->info_sign;
+                ctx->info_sign = BR_SIGN_NORMAL;
+            }
+
+            break;
+        default:
+            break;
+    }
+
+    return sign_info;
+}
+
+int32_t
+br_stub_release(xlator_t *this, fd_t *fd)
+{
+    int32_t ret = 0;
+    int32_t flags = 0;
+    inode_t *inode = NULL;
+    unsigned long releaseversion = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    uint64_t tmp = 0;
+    br_stub_fd_t *br_stub_fd = NULL;
+    int32_t signinfo = 0;
+
+    inode = fd->inode;
+
+    LOCK(&inode->lock);
+    {
+        ctx = __br_stub_get_ongoing_version_ctx(this, inode, NULL);
+        if (ctx == NULL)
+            goto unblock;
+        br_stub_fd = br_stub_fd_ctx_get(this, fd);
+        if (br_stub_fd) {
+            list_del_init(&br_stub_fd->list);
+        }
+
+        ret = __br_stub_can_trigger_release(inode, ctx, &releaseversion);
+        if (!ret)
+            goto unblock;
+
+        signinfo = __br_stub_inode_sign_state(ctx, GF_FOP_RELEASE, fd);
+        signinfo = htonl(signinfo);
+
+        /* inode back to initital state: mark dirty */
+        if (ctx->info_sign == BR_SIGN_NORMAL) {
+            __br_stub_mark_inode_dirty(ctx);
+            __br_stub_unset_inode_modified(ctx);
+        }
+    }
+unblock:
+    UNLOCK(&inode->lock);
+
+    if (ret) {
+        gf_msg_debug(this->name, 0,
+                     "releaseversion: %lu | flags: %d "
+                     "| signinfo: %d",
+                     (unsigned long)ntohl(releaseversion), flags,
+                     ntohl(signinfo));
+        br_stub_send_ipc_fop(this, fd, releaseversion, signinfo);
+    }
+
+    ret = fd_ctx_del(fd, this, &tmp);
+    br_stub_fd = (br_stub_fd_t *)(long)tmp;
+
+    GF_FREE(br_stub_fd);
+
+    return 0;
+}
+
+int32_t
+br_stub_releasedir(xlator_t *this, fd_t *fd)
+{
+    br_stub_fd_t *fctx = NULL;
+    uint64_t ctx = 0;
+    int ret = 0;
+
+    ret = fd_ctx_del(fd, this, &ctx);
+    if (ret < 0)
+        goto out;
+
+    fctx = (br_stub_fd_t *)(long)ctx;
+    if (fctx->bad_object.dir) {
+        ret = sys_closedir(fctx->bad_object.dir);
+        if (ret)
+            gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
+                    "error=%s", strerror(errno), NULL);
+    }
+
+    GF_FREE(fctx);
+out:
+    return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* ictxmerge */
+
+void
+br_stub_ictxmerge(xlator_t *this, fd_t *fd, inode_t *inode,
+                  inode_t *linked_inode)
+{
+    int32_t ret = 0;
+    uint64_t ctxaddr = 0;
+    uint64_t lctxaddr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    br_stub_inode_ctx_t *lctx = NULL;
+    br_stub_fd_t *br_stub_fd = NULL;
+
+    ret = br_stub_get_inode_ctx(this, inode, &ctxaddr);
+    if (ret < 0)
+        goto done;
+    ctx = (br_stub_inode_ctx_t *)(uintptr_t)ctxaddr;
+
+    LOCK(&linked_inode->lock);
+    {
+        ret = __br_stub_get_inode_ctx(this, linked_inode, &lctxaddr);
+        if (ret < 0)
+            goto unblock;
+        lctx = (br_stub_inode_ctx_t *)(uintptr_t)lctxaddr;
+
+        GF_ASSERT(list_is_singular(&ctx->fd_list));
+        br_stub_fd = list_first_entry(&ctx->fd_list, br_stub_fd_t, list);
+        if (br_stub_fd) {
+            GF_ASSERT(br_stub_fd->fd == fd);
+            list_move_tail(&br_stub_fd->list, &lctx->fd_list);
+        }
+    }
+unblock:
+    UNLOCK(&linked_inode->lock);
+
+done:
+    return;
+}
+
+/** }}} */
+
+struct xlator_fops fops = {
+    .lookup = br_stub_lookup,
+    .stat = br_stub_stat,
+    .fstat = br_stub_fstat,
+    .open = br_stub_open,
+    .create = br_stub_create,
+    .readdirp = br_stub_readdirp,
+    .getxattr = br_stub_getxattr,
+    .fgetxattr = br_stub_fgetxattr,
+    .fsetxattr = br_stub_fsetxattr,
+    .writev = br_stub_writev,
+    .truncate = br_stub_truncate,
+    .ftruncate = br_stub_ftruncate,
+    .mknod = br_stub_mknod,
+    .readv = br_stub_readv,
+    .removexattr = br_stub_removexattr,
+    .fremovexattr = br_stub_fremovexattr,
+    .setxattr = br_stub_setxattr,
+    .opendir = br_stub_opendir,
+    .readdir = br_stub_readdir,
+    .unlink = br_stub_unlink,
+};
+
+struct xlator_cbks cbks = {
+    .forget = br_stub_forget,
+    .release = br_stub_release,
+    .ictxmerge = br_stub_ictxmerge,
+};
+
+struct volume_options options[] = {
+    {.key = {"bitrot"},
+     .type = GF_OPTION_TYPE_BOOL,
+     .default_value = "off",
+     .op_version = {GD_OP_VERSION_3_7_0},
+     .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE,
+     .tags = {"bitrot"},
+     .description = "enable/disable bitrot stub"},
+    {.key = {"export"},
+     .type = GF_OPTION_TYPE_PATH,
+     .op_version = {GD_OP_VERSION_3_7_0},
+     .tags = {"bitrot"},
+     .description = "brick path for versioning",
+     .default_value = "{{ brick.path }}"},
+    {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+    .init = init,
+    .fini = fini,
+    .notify = notify,
+    .reconfigure = reconfigure,
+    .mem_acct_init = mem_acct_init,
+    .op_version = {1}, /* Present from the initial version */
+    .fops = &fops,
+    .cbks = &cbks,
+    .options = options,
+    .identifier = "bitrot-stub",
+    .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
new file mode 100644
index 00000000000..edd79a77e4f
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -0,0 +1,515 @@
+/*
+  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+#ifndef __BIT_ROT_STUB_H__
+#define __BIT_ROT_STUB_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
+#include "bit-rot-stub-mem-types.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+#include "bit-rot-common.h"
+#include "bit-rot-stub-messages.h"
+#include "glusterfs3-xdr.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+
+#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
+#define BR_STUB_DUMP_STR_SIZE 65536
+
+#define BR_PATH_MAX_EXTRA (PATH_MAX + 1024)
+#define BR_PATH_MAX_PLUS (PATH_MAX + 2048)
+
+/*
+ * Oops. Spelling mistake. Correcting it
+ */
+#define OLD_BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quanrantine"
+#define BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quarantine"
+
+/* do not reference frame->local in cbk unless initialized.
+ * Assigned 0x1 marks verisoning flag between call path and
+ * cbk path.
+ */
+#define BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, label)                   \
+    do {                                                                       \
+        if (priv->do_versioning)                                               \
+            frame->local = (void *)0x1;                                        \
+        else                                                                   \
+            goto label;                                                        \
+    } while (0)
+
+#define BR_STUB_VER_COND_GOTO(priv, cond, label)                               \
+    do {                                                                       \
+        if (!priv->do_versioning || cond)                                      \
+            goto label;                                                        \
+    } while (0)
+
+#define BR_STUB_VER_ENABLED_IN_CALLPATH(frame, flag)                           \
+    do {                                                                       \
+        if (frame->local)                                                      \
+            flag = _gf_true;                                                   \
+        if (frame->local == (void *)0x1)                                       \
+            frame->local = NULL;                                               \
+    } while (0)
+
+#define BR_STUB_RESET_LOCAL_NULL(frame)                                        \
+    do {                                                                       \
+        if (frame->local == (void *)0x1)                                       \
+            frame->local = NULL;                                               \
+    } while (0)
+
+typedef int(br_stub_version_cbk)(call_frame_t *, void *, xlator_t *, int32_t,
+                                 int32_t, dict_t *);
+
+typedef struct br_stub_inode_ctx {
+    int need_writeback;           /* does the inode need
+                                        a writeback to disk? */
+    unsigned long currentversion; /* ongoing version */
+
+    int info_sign;
+    struct list_head fd_list; /* list of open fds or fds participating in
+                                 write operations */
+    gf_boolean_t bad_object;
+} br_stub_inode_ctx_t;
+
+typedef struct br_stub_fd {
+    fd_t *fd;
+    struct list_head list;
+    struct bad_object_dir {
+        DIR *dir;
+        off_t dir_eof;
+    } bad_object;
+} br_stub_fd_t;
+
+#define I_DIRTY (1 << 0) /* inode needs writeback */
+#define I_MODIFIED (1 << 1)
+#define WRITEBACK_DURABLE 1 /* writeback is durable */
+
+/**
+ * This could just have been a plain struct without unions and all,
+ * but we may need additional things in the future.
+ */
+typedef struct br_stub_local {
+    call_stub_t *fopstub; /* stub for original fop */
+
+    int versioningtype; /* not much used atm */
+
+    union {
+        struct br_stub_ctx {
+            fd_t *fd;
+            uuid_t gfid;
+            inode_t *inode;
+            unsigned long version;
+        } context;
+    } u;
+} br_stub_local_t;
+
+#define BR_STUB_NO_VERSIONING (1 << 0)
+#define BR_STUB_INCREMENTAL_VERSIONING (1 << 1)
+
+typedef struct br_stub_private {
+    gf_boolean_t do_versioning;
+
+    uint32_t boot[2];
+    char export[PATH_MAX];
+
+    pthread_mutex_t lock;
+    pthread_cond_t cond;
+
+    struct list_head squeue; /* ordered signing queue */
+    pthread_t signth;
+    struct bad_objects_container {
+        pthread_t thread;
+        pthread_mutex_t bad_lock;
+        pthread_cond_t bad_cond;
+        struct list_head bad_queue;
+    } container;
+    struct mem_pool *local_pool;
+
+    char stub_basepath[BR_PATH_MAX_EXTRA];
+
+    uuid_t bad_object_dir_gfid;
+} br_stub_private_t;
+
+br_stub_fd_t *
+br_stub_fd_new(void);
+
+int
+__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
+
+br_stub_fd_t *
+__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd);
+
+br_stub_fd_t *
+br_stub_fd_ctx_get(xlator_t *this, fd_t *fd);
+
+int32_t
+br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
+
+static inline gf_boolean_t
+__br_stub_is_bad_object(br_stub_inode_ctx_t *ctx)
+{
+    return ctx->bad_object;
+}
+
+static inline void
+__br_stub_mark_object_bad(br_stub_inode_ctx_t *ctx)
+{
+    ctx->bad_object = _gf_true;
+}
+
+/* inode writeback helpers */
+static inline void
+__br_stub_mark_inode_dirty(br_stub_inode_ctx_t *ctx)
+{
+    ctx->need_writeback |= I_DIRTY;
+}
+
+static inline void
+__br_stub_mark_inode_synced(br_stub_inode_ctx_t *ctx)
+{
+    ctx->need_writeback &= ~I_DIRTY;
+}
+
+static inline int
+__br_stub_is_inode_dirty(br_stub_inode_ctx_t *ctx)
+{
+    return (ctx->need_writeback & I_DIRTY);
+}
+
+/* inode mofification markers */
+static inline void
+__br_stub_set_inode_modified(br_stub_inode_ctx_t *ctx)
+{
+    ctx->need_writeback |= I_MODIFIED;
+}
+
+static inline void
+__br_stub_unset_inode_modified(br_stub_inode_ctx_t *ctx)
+{
+    ctx->need_writeback &= ~I_MODIFIED;
+}
+
+static inline int
+__br_stub_is_inode_modified(br_stub_inode_ctx_t *ctx)
+{
+    return (ctx->need_writeback & I_MODIFIED);
+}
+
+static inline int
+br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx)
+{
+    int32_t ret = 0;
+    br_stub_fd_t *br_stub_fd = NULL;
+
+    br_stub_fd = br_stub_fd_new();
+    if (!br_stub_fd)
+        return -1;
+
+    br_stub_fd->fd = fd;
+    INIT_LIST_HEAD(&br_stub_fd->list);
+
+    ret = br_stub_fd_ctx_set(this, fd, br_stub_fd);
+    if (ret)
+        gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
+                NULL);
+    else
+        *fd_ctx = br_stub_fd;
+
+    return ret;
+}
+
+/* get/set inode context helpers */
+
+static inline int
+__br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx)
+{
+    return __inode_ctx_get(inode, this, ctx);
+}
+
+static inline int
+br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx)
+{
+    int ret = -1;
+
+    LOCK(&inode->lock);
+    {
+        ret = __br_stub_get_inode_ctx(this, inode, ctx);
+    }
+    UNLOCK(&inode->lock);
+
+    return ret;
+}
+
+static inline int
+br_stub_set_inode_ctx(xlator_t *this, inode_t *inode, br_stub_inode_ctx_t *ctx)
+{
+    uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx;
+    return inode_ctx_set(inode, this, &ctx_addr);
+}
+
+/* version get/set helpers */
+
+static inline unsigned long
+__br_stub_writeback_version(br_stub_inode_ctx_t *ctx)
+{
+    return (ctx->currentversion + 1);
+}
+
+static inline void
+__br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version)
+{
+    if (ctx->currentversion < version)
+        ctx->currentversion = version;
+    else
+        gf_smsg("bit-rot-stub", GF_LOG_WARNING, 0,
+                BRS_MSG_CHANGE_VERSION_FAILED, "current version=%lu",
+                ctx->currentversion, "new version=%lu", version, NULL);
+}
+
+static inline int
+__br_stub_can_trigger_release(inode_t *inode, br_stub_inode_ctx_t *ctx,
+                              unsigned long *version)
+{
+    /**
+     * If the inode is modified, then it has to be dirty. An inode is
+     * marked dirty once version is increased. Its marked as modified
+     * when the modification call (write/truncate) which triggered
+     * the versioning is successful.
+     */
+    if (__br_stub_is_inode_modified(ctx) && list_empty(&ctx->fd_list) &&
+        (ctx->info_sign != BR_SIGN_REOPEN_WAIT)) {
+        GF_ASSERT(__br_stub_is_inode_dirty(ctx) == 0);
+
+        if (version)
+            *version = htonl(ctx->currentversion);
+        return 1;
+    }
+
+    return 0;
+}
+
+static inline int32_t
+br_stub_get_ongoing_version(xlator_t *this, inode_t *inode,
+                            unsigned long *version)
+{
+    int32_t ret = 0;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    LOCK(&inode->lock);
+    {
+        ret = __inode_ctx_get(inode, this, &ctx_addr);
+        if (ret < 0)
+            goto unblock;
+        ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+        *version = ctx->currentversion;
+    }
+unblock:
+    UNLOCK(&inode->lock);
+
+    return ret;
+}
+
+/**
+ * fetch the current version from inode and return the context.
+ * inode->lock should be held before invoking this as context
+ * *needs* to be valid in the caller.
+ */
+static inline br_stub_inode_ctx_t *
+__br_stub_get_ongoing_version_ctx(xlator_t *this, inode_t *inode,
+                                  unsigned long *version)
+{
+    int32_t ret = 0;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    ret = __inode_ctx_get(inode, this, &ctx_addr);
+    if (ret < 0)
+        return NULL;
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+    if (version)
+        *version = ctx->currentversion;
+
+    return ctx;
+}
+
+/* filter for xattr fetch */
+static inline int
+br_stub_is_internal_xattr(const char *name)
+{
+    if (name && ((strncmp(name, BITROT_CURRENT_VERSION_KEY,
+                          SLEN(BITROT_CURRENT_VERSION_KEY)) == 0) ||
+                 (strncmp(name, BITROT_SIGNING_VERSION_KEY,
+                          SLEN(BITROT_SIGNING_VERSION_KEY)) == 0)))
+        return 1;
+    return 0;
+}
+
+static inline void
+br_stub_remove_vxattrs(dict_t *xattr, gf_boolean_t remove_bad_marker)
+{
+    if (xattr) {
+        /*
+         * When a file is corrupted, bad-object should be
+         * set in the dict. But, other info such as version,
+         * signature etc should not be set. Hence the flag
+         * remove_bad_marker. The consumer should know whether
+         * to send the bad-object info in the dict or not.
+         */
+        if (remove_bad_marker)
+            dict_del(xattr, BITROT_OBJECT_BAD_KEY);
+        dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
+        dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
+        dict_del(xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
+    }
+}
+
+/**
+ * This function returns the below values for different situations
+ * 0  => as per the inode context object is not bad
+ * -1 => Failed to get the inode context itself
+ * -2 => As per the inode context object is bad
+ * Both -ve values means the fop which called this function is failed
+ * and error is returned upwards.
+ * In future if needed or more errors have to be handled, then those
+ * errors can be made into enums.
+ */
+static inline int
+br_stub_is_bad_object(xlator_t *this, inode_t *inode)
+{
+    int bad_object = 0;
+    gf_boolean_t tmp = _gf_false;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+    int32_t ret = -1;
+
+    ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+                "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
+        bad_object = -1;
+        goto out;
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&inode->lock);
+    {
+        tmp = __br_stub_is_bad_object(ctx);
+        if (tmp)
+            bad_object = -2;
+    }
+    UNLOCK(&inode->lock);
+
+out:
+    return bad_object;
+}
+
+static inline int32_t
+br_stub_mark_object_bad(xlator_t *this, inode_t *inode)
+{
+    int32_t ret = -1;
+    uint64_t ctx_addr = 0;
+    br_stub_inode_ctx_t *ctx = NULL;
+
+    ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+    if (ret) {
+        gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+                "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
+        goto out;
+    }
+
+    ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+    LOCK(&inode->lock);
+    {
+        __br_stub_mark_object_bad(ctx);
+    }
+    UNLOCK(&inode->lock);
+
+out:
+    return ret;
+}
+
+/**
+ * There is a possibility that dict_set might fail. The o/p of dict_set is
+ * given to the caller and the caller has to decide what to do.
+ */
+static inline int32_t
+br_stub_mark_xdata_bad_object(xlator_t *this, inode_t *inode, dict_t *xdata)
+{
+    int32_t ret = 0;
+
+    if (br_stub_is_bad_object(this, inode) == -2)
+        ret = dict_set_int32(xdata, GLUSTERFS_BAD_INODE, 1);
+
+    return ret;
+}
+
+int32_t
+br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx);
+
+br_sign_state_t
+__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop,
+                           fd_t *fd);
+
+int
+br_stub_dir_create(xlator_t *this, br_stub_private_t *priv);
+
+int
+br_stub_add(xlator_t *this, uuid_t gfid);
+
+int32_t
+br_stub_create_stub_gfid(xlator_t *this, char *stub_gfid_path, uuid_t gfid);
+
+int
+br_stub_dir_create(xlator_t *this, br_stub_private_t *priv);
+
+call_stub_t *
+__br_stub_dequeue(struct list_head *callstubs);
+
+void
+__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub);
+
+void
+br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub);
+
+void *
+br_stub_worker(void *data);
+
+int32_t
+br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                       dict_t *xattr_req);
+
+int32_t
+br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                        size_t size, off_t off, dict_t *xdata);
+
+int
+br_stub_del(xlator_t *this, uuid_t gfid);
+
+int
+br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+                         dict_t **dict);
+
+void
+br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
+                         dict_t *dict);
+
+int
+br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
+                         uuid_t gfid, char **path);
+
+#endif /* __BIT_ROT_STUB_H__ */