summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/gf-dirent.c39
-rw-r--r--libglusterfs/src/gf-dirent.h2
-rw-r--r--libglusterfs/src/list.h14
-rw-r--r--libglusterfs/src/syncop.c17
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c565
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h9
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c146
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h40
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h3
9 files changed, 760 insertions, 75 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index b5f395afc36..99c0eb6441d 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -171,6 +171,20 @@ gf_dirent_for_name (const char *name)
return gf_dirent;
}
+void
+gf_dirent_entry_free (gf_dirent_t *entry)
+{
+ if (!entry)
+ return;
+
+ if (entry->dict)
+ dict_unref (entry->dict);
+ if (entry->inode)
+ inode_unref (entry->inode);
+
+ list_del (&entry->list);
+ GF_FREE (entry);
+}
void
gf_dirent_free (gf_dirent_t *entries)
@@ -185,16 +199,27 @@ gf_dirent_free (gf_dirent_t *entries)
return;
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (entry->dict)
- dict_unref (entry->dict);
- if (entry->inode)
- inode_unref (entry->inode);
-
- list_del (&entry->list);
- GF_FREE (entry);
+ gf_dirent_entry_free (entry);
}
}
+gf_dirent_t *
+entry_copy (gf_dirent_t *source)
+{
+ gf_dirent_t *sink = NULL;
+
+ sink = gf_dirent_for_name (source->d_name);
+
+ sink->d_off = source->d_off;
+ sink->d_ino = source->d_ino;
+ sink->d_type = source->d_type;
+ sink->d_stat = source->d_stat;
+
+ if (source->inode)
+ sink->inode = inode_ref (source->inode);
+ return sink;
+}
+
void
gf_link_inode_from_dirent (xlator_t *this, inode_t *parent, gf_dirent_t *entry)
{
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
index 07c605f82b0..faeaf411941 100644
--- a/libglusterfs/src/gf-dirent.h
+++ b/libglusterfs/src/gf-dirent.h
@@ -61,6 +61,8 @@ struct _gf_dirent_t {
#define DT_ISDIR(mode) (mode == DT_DIR)
gf_dirent_t *gf_dirent_for_name (const char *name);
+gf_dirent_t *entry_copy (gf_dirent_t *source);
+void gf_dirent_entry_free (gf_dirent_t *entry);
void gf_dirent_free (gf_dirent_t *entries);
int gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
gf_dirent_t *entries);
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h
index 875594136a2..b8f9a6eebd8 100644
--- a/libglusterfs/src/list.h
+++ b/libglusterfs/src/list.h
@@ -214,6 +214,20 @@ static inline void list_replace_init(struct list_head *old,
INIT_LIST_HEAD(old);
}
+/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void list_rotate_left (struct list_head *head)
+{
+ struct list_head *first;
+
+ if (!list_empty (head)) {
+ first = head->next;
+ list_move_tail (first, head);
+ }
+}
+
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index cb08b03d44b..81eae5a9105 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -1217,23 +1217,6 @@ syncop_lookup (xlator_t *subvol, loc_t *loc, struct iatt *iatt,
return args.op_ret;
}
-static gf_dirent_t *
-entry_copy (gf_dirent_t *source)
-{
- gf_dirent_t *sink = NULL;
-
- sink = gf_dirent_for_name (source->d_name);
-
- sink->d_off = source->d_off;
- sink->d_ino = source->d_ino;
- sink->d_type = source->d_type;
- sink->d_stat = source->d_stat;
-
- if (source->inode)
- sink->inode = inode_ref (source->inode);
- return sink;
-}
-
int32_t
syncop_readdirp_cbk (call_frame_t *frame,
void *cookie,
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index e0581a40df0..8a80052f250 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -13,17 +13,35 @@
#include "config.h"
#endif
+#include <math.h>
#include <ctype.h>
#include <sys/uio.h>
#include "glusterfs.h"
-#include "xlator.h"
#include "logging.h"
+#include "common-utils.h"
-#include "bit-rot.h"
#include "bit-rot-scrub.h"
#include <pthread.h>
+struct br_scrubbers {
+ pthread_t scrubthread;
+
+ struct list_head list;
+};
+
+struct br_fsscan_entry {
+ void *data;
+
+ loc_t parent;
+
+ gf_dirent_t *entry;
+
+ struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+ struct list_head list;
+};
+
/**
* fetch signature extended attribute from an object's fd.
* NOTE: On success @xattr is not unref'd as @sign points
@@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this,
* signs with SHA256).
*/
int
-bitd_start_scrub (xlator_t *subvol,
- gf_dirent_t *entry, loc_t *parent, void *data)
+br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
{
int32_t ret = -1;
fd_t *fd = NULL;
@@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol,
struct iatt parent_buf = {0, };
pid_t pid = 0;
br_child_t *child = NULL;
- xlator_t *this = NULL;
unsigned char *md = NULL;
inode_t *linked_inode = NULL;
br_isignature_out_t *sign = NULL;
unsigned long signedversion = 0;
+ gf_dirent_t *entry = NULL;
+ loc_t *parent = NULL;
- GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
- GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out);
- child = data;
- this = child->this;
+ entry = fsentry->entry;
+ parent = &fsentry->parent;
+ child = fsentry->data;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", entry, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", parent, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", child, out);
pid = GF_CLIENT_PID_SCRUB;
@@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol,
return ret;
}
-#define BR_SCRUB_THROTTLE_COUNT 30
-#define BR_SCRUB_THROTTLE_ZZZ 60
+static void
+wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan)
+{
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_replace_init (&fsscan->queued, &fsscan->ready);
+
+ /* wake up scrubbers */
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ while (fsscan->entries != 0)
+ pthread_cond_wait
+ (&fsscan->waitcond, &fsscan->waitlock);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+}
+
+static inline void
+_br_fsscan_inc_entry_count (struct br_scanfs *fsscan)
+{
+ fsscan->entries++;
+}
+
+static inline void
+_br_fsscan_dec_entry_count (struct br_scanfs *fsscan)
+{
+ if (--fsscan->entries == 0) {
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_cond_signal (&fsscan->waitcond);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+ }
+}
+
+static void
+_br_fsscan_collect_entry (struct br_scanfs *fsscan,
+ struct br_fsscan_entry *fsentry)
+{
+ list_add_tail (&fsentry->list, &fsscan->queued);
+ _br_fsscan_inc_entry_count (fsscan);
+}
+
+#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int32_t ret = -1;
+ int scrub = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return);
+
+ child = data;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t);
+ if (!fsentry)
+ goto error_return;
+
+ {
+ fsentry->data = data;
+ fsentry->fsscan = &child->fsscan;
+
+ /* copy parent loc */
+ ret = loc_copy (&fsentry->parent, parent);
+ if (ret)
+ goto dealloc;
+
+ /* copy child entry */
+ fsentry->entry = entry_copy (entry);
+ if (!fsentry->entry)
+ goto locwipe;
+
+ INIT_LIST_HEAD (&fsentry->list);
+ }
+
+ LOCK (&fsscan->entrylock);
+ {
+ _br_fsscan_collect_entry (fsscan, fsentry);
+
+ /**
+ * need not be a equality check as entries may be pushed
+ * back onto the scanned queue when thread(s) are cleaned.
+ */
+ if (fsscan->entries >= NR_ENTRIES)
+ scrub = 1;
+ }
+ UNLOCK (&fsscan->entrylock);
+
+ if (scrub)
+ wait_for_scrubbing (this, fsscan);
+
+ return 0;
+
+ locwipe:
+ loc_wipe (&fsentry->parent);
+ dealloc:
+ GF_FREE (fsentry);
+ error_return:
+ return -1;
+}
+
void *
-br_scrubber (void *arg)
+br_fsscanner (void *arg)
{
- loc_t loc = {0,};
- xlator_t *this = NULL;
- br_child_t *child = NULL;
+ loc_t loc = {0,};
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+ struct br_scanfs *fsscan = NULL;
child = arg;
this = child->this;
+ fsscan = &child->fsscan;
THIS = this;
loc.inode = child->table->root;
while (1) {
- (void) syncop_ftw_throttle
- (child->xl, &loc,
- GF_CLIENT_PID_SCRUB, child, bitd_start_scrub,
- BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ);
+ (void) syncop_ftw (child->xl, &loc,
+ GF_CLIENT_PID_SCRUB,
+ child, br_fsscanner_handle_entry);
+ if (!list_empty (&fsscan->queued))
+ wait_for_scrubbing (this, fsscan);
+ }
+
+ return NULL;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale (xlator_t *this,
+ br_private_t *priv, scrub_throttle_t throttle)
+{
+ unsigned int scale = 0;
+
+ switch (throttle) {
+ case BR_SCRUB_THROTTLE_VOID:
+ scale = 0;
+ break;
+ case BR_SCRUB_THROTTLE_LAZY:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+ break;
+ case BR_SCRUB_THROTTLE_NORMAL:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+ break;
+ case BR_SCRUB_THROTTLE_AGGRESSIVE:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown throttle %d", throttle);
+ }
+
+ return scale;
+
+}
+
+static void
+br_scrubber_cleanup_handler (void *arg)
+{
+ struct br_scrubber *fsscrub = arg;
+ pthread_mutex_unlock (&fsscrub->mutex);
+}
+
+static inline br_child_t *
+_br_scrubber_get_next_child (struct br_scrubber *fsscrub)
+{
+ br_child_t *child = NULL;
+
+ child = list_first_entry (&fsscrub->scrublist, br_child_t, list);
+ list_rotate_left (&fsscrub->scrublist);
+
+ return child;
+}
+
+static inline void
+_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ if (list_empty (&fsscan->ready))
+ return;
+ *fsentry = list_first_entry
+ (&fsscan->ready, struct br_fsscan_entry, list);
+ list_del_init (&(*fsentry)->list);
+}
+
+static inline void
+_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ br_child_t *child = NULL;
+ br_child_t *firstchild = NULL;
+
+ while (1) {
+ if (list_empty (&fsscrub->scrublist))
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+
+ firstchild = NULL;
+ for (child = _br_scrubber_get_next_child (fsscrub);
+ child != firstchild;
+ child = _br_scrubber_get_next_child (fsscrub)) {
+
+ if (!firstchild)
+ firstchild = child;
+
+ _br_scrubber_get_entry (child, fsentry);
+ if (*fsentry)
+ break;
+ }
+
+ if (*fsentry)
+ break;
+
+ /* nothing to work on.. wait till available */
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+ }
+}
+
+static void
+br_scrubber_pick_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub);
+
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ *fsentry = NULL;
+ _br_scrubber_find_scrubbable_entry (fsscrub, fsentry);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ pthread_cleanup_pop (0);
+}
- sleep (BR_SCRUB_THROTTLE_ZZZ);
+struct br_scrub_entry {
+ gf_boolean_t scrubbed;
+ struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle (void *arg)
+{
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrub_entry *sentry = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ sentry = arg;
+
+ fsentry = sentry->fsentry;
+ fsscan = fsentry->fsscan;
+
+ LOCK (&fsscan->entrylock);
+ {
+ if (sentry->scrubbed) {
+ _br_fsscan_dec_entry_count (fsscan);
+
+ /* cleanup ->entry */
+ fsentry->data = NULL;
+ fsentry->fsscan = NULL;
+ loc_wipe (&fsentry->parent);
+ gf_dirent_entry_free (fsentry->entry);
+
+ GF_FREE (sentry->fsentry);
+ } else {
+ /* (re)queue the entry again for scrub */
+ _br_fsscan_collect_entry (fsscan, sentry->fsentry);
+ }
+ }
+ UNLOCK (&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ struct br_scrub_entry sentry = {0, };
+
+ sentry.scrubbed = 0;
+ sentry.fsentry = fsentry;
+
+ pthread_cleanup_push (br_scrubber_entry_handle, &sentry);
+ {
+ (void) br_scrubber_scrub_begin (this, fsentry);
+ sentry.scrubbed = 1;
+ }
+ pthread_cleanup_pop (1);
+}
+
+void *br_scrubber_proc (void *arg)
+{
+ xlator_t *this = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ fsscrub = arg;
+ THIS = this = fsscrub->this;
+
+ while (1) {
+ br_scrubber_pick_entry (fsscrub, &fsentry);
+ br_scrubber_scrub_entry (this, fsentry);
+ sleep (1);
}
return NULL;
}
+
+static int32_t
+br_scrubber_scale_up (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int32_t ret = -1;
+ int diff = 0;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v2 - v1);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling up scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = GF_CALLOC (diff, sizeof (*scrub),
+ gf_br_mt_br_scrubber_t);
+ if (!scrub)
+ break;
+
+ INIT_LIST_HEAD (&scrub->list);
+ ret = gf_thread_create (&scrub->scrubthread,
+ NULL, br_scrubber_proc, fsscrub);
+ if (ret)
+ break;
+
+ fsscrub->nr_scrubbers++;
+ list_add_tail (&scrub->list, &fsscrub->scrubbers);
+ }
+
+ if ((i != diff) && !scrub)
+ goto error_return;
+
+ if (i != diff) /* degraded scaling.. */
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale up to %d scrubber(s). Spawned "
+ "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i));
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_scale_down (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int diff = 0;
+ int32_t ret = -1;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v1 - v2);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling down scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0 ; i < diff; i++) {
+ scrub = list_first_entry
+ (&fsscrub->scrubbers, struct br_scrubbers, list);
+
+ list_del_init (&scrub->list);
+ ret = gf_thread_cleanup_xint (scrub->scrubthread);
+ if (ret)
+ break;
+ GF_FREE (scrub);
+
+ fsscrub->nr_scrubbers--;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale down to %d scrubber(s). "
+ "Terminated %d/%d [total scrubber(s): %d]",
+ v1, i, diff, (v2 - i));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_configure (xlator_t *this, br_private_t *priv,
+ struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+ int32_t ret = 0;
+ unsigned int v1 = 0;
+ unsigned int v2 = 0;
+
+ v1 = fsscrub->nr_scrubbers;
+ v2 = br_scrubber_calc_scale (this, priv, nthrottle);
+
+ if (v1 == v2)
+ return 0;
+
+ if (v1 > v2)
+ ret = br_scrubber_scale_down (this, fsscrub, v1, v2);
+ else
+ ret = br_scrubber_scale_up (this, fsscrub, v1, v2);
+
+ return ret;
+}
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle (xlator_t *this,
+ br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+ fsscrub = &priv->fsscrub;
+
+ if (options)
+ GF_OPTION_RECONF ("scrub-throttle",
+ tmp, options, str, error_return);
+ else
+ GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return);
+
+ if (strcasecmp (tmp, "lazy") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_LAZY;
+ else if (strcasecmp (tmp, "normal") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+ else if (strcasecmp (tmp, "aggressive") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+ else
+ goto error_return;
+
+ /* on failure old throttling value is preserved */
+ ret = br_scrubber_configure (this, priv, fsscrub, nthrottle);
+ if (ret)
+ goto error_return;
+
+ fsscrub->throttle = nthrottle;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/* TODO: pause/resume, frequency */
+int32_t
+br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+
+ ret = br_scrubber_handle_throttle (this, priv, options);
+ if (ret)
+ goto error_return;
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+int32_t
+br_scrubber_init (xlator_t *this, br_private_t *priv)
+{
+ struct br_scrubber *fsscrub = NULL;
+
+ priv->tbf = br_tbf_init (NULL, 0);
+ if (!priv->tbf)
+ return -1;
+
+ fsscrub = &priv->fsscrub;
+
+ fsscrub->this = this;
+ fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+ pthread_mutex_init (&fsscrub->mutex, NULL);
+ pthread_cond_init (&fsscrub->cond, NULL);
+
+ fsscrub->nr_scrubbers = 0;
+ INIT_LIST_HEAD (&fsscrub->scrubbers);
+ INIT_LIST_HEAD (&fsscrub->scrublist);
+
+ return 0;
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
index daec9ad8196..4f00020d66a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -11,6 +11,13 @@
#ifndef __BIT_ROT__SCRUB_H__
#define __BIT_ROT_SCRUB_H__
-void *br_scrubber (void *);
+#include "xlator.h"
+#include "bit-rot.h"
+
+void *br_fsscanner (void *);
+
+int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *);
+
+int32_t br_scrubber_init (xlator_t *, br_private_t *);
#endif /* __BIT_ROT_SCRUB_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 880b16edfa8..eea81aec53a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -29,15 +29,6 @@
#define BR_HASH_CALC_READ_SIZE (128 * 1024)
-br_tbf_opspec_t opthrottle[] = {
- {
- .op = BR_TBF_OP_HASH,
- .rate = BR_HASH_CALC_READ_SIZE,
- .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE),
- },
- /** TODO: throttle getdents(), read() request(s) */
-};
-
static int
br_find_child_index (xlator_t *this, xlator_t *child)
{
@@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
child->threadrunning = 1;
/* it's OK to continue, "old" objects would be signed when modified */
+ list_del_init (&child->list);
return 0;
dealloc:
@@ -1078,14 +1070,45 @@ static inline int32_t
br_enact_scrubber (xlator_t *this, br_child_t *child)
{
int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ LOCK_INIT (&fsscan->entrylock);
+ pthread_mutex_init (&fsscan->waitlock, NULL);
+ pthread_cond_init (&fsscan->waitcond, NULL);
- ret = gf_thread_create (&child->thread, NULL, br_scrubber, child);
+ fsscan->entries = 0;
+ INIT_LIST_HEAD (&fsscan->queued);
+ INIT_LIST_HEAD (&fsscan->ready);
+
+ ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
if (ret != 0) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber");
+ gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot "
+ "scrubber daemon [Brick: %s]", child->brick_path);
+ goto error_return;
}
- return ret;
+ /**
+ * Everything has been setup.. add this subvolume to scrubbers
+ * list.
+ */
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_move (&child->list, &fsscrub->scrublist);
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ return 0;
+
+ error_return:
+ return -1;
}
/**
@@ -1202,8 +1225,7 @@ br_handle_events (void *arg)
"failed to connect to the "
"child (subvolume: %s)",
child->xl->name);
- else
- list_del_init (&child->list);
+
}
}
@@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv)
return -1;
}
-int32_t
-br_init_rate_limiter (br_private_t *priv)
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer (xlator_t *this, int child_count, int numbricks)
{
- br_tbf_opspec_t *spec = opthrottle;
- priv->tbf = br_tbf_init (spec, sizeof (opthrottle)
- / sizeof (br_tbf_opspec_t));
+ br_private_t *priv = NULL;
+ br_tbf_opspec_t spec = {0,};
+
+ priv = this->private;
+
+ spec.op = BR_TBF_OP_HASH;
+ spec.rate = 0;
+ spec.maxlimit = 0;
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+ double contribution = 0;
+ contribution = ((double)1 - ((double)child_count / (double)numbricks));
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+ spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+ if (!spec.rate)
+ gf_log (this->name,
+ GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\"");
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "[Rate Limit Info] \"tokens/sec (rate): %lu, "
+ "maxlimit: %lu\"", spec.rate, spec.maxlimit);
+ priv->tbf = br_tbf_init (&spec, 1);
return priv->tbf ? 0 : -1;
}
+static int32_t
+br_signer_init (xlator_t *this, br_private_t *priv)
+{
+ int32_t ret = 0;
+ int numbricks = 0;
+
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return);
+ GF_OPTION_INIT ("brick-count", numbricks, int32, error_return);
+
+ ret = br_rate_limit_signer (this, priv->child_count, numbricks);
+ if (ret)
+ goto error_return;
+
+ ret = br_init_signer (this, priv);
+ if (ret)
+ goto cleanup_tbf;
+
+ return 0;
+
+ cleanup_tbf:
+ /* cleanup TBF */
+ error_return:
+ return -1;
+
+}
+
int32_t
init (xlator_t *this)
{
@@ -1410,7 +1488,6 @@ init (xlator_t *this)
}
GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
- GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out);
priv->child_count = xlator_subvolume_count (this);
priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
@@ -1443,18 +1520,19 @@ init (xlator_t *this)
INIT_LIST_HEAD (&priv->children[i].list);
INIT_LIST_HEAD (&priv->bricks);
- ret = br_init_rate_limiter (priv);
- if (ret)
- goto cleanup_mutex;
-
this->private = priv;
if (!priv->iamscrubber) {
- ret = br_init_signer (this, priv);
- if (ret)
- goto cleanup_tbf;
+ ret = br_signer_init (this, priv);
+ } else {
+ ret = br_scrubber_init (this, priv);
+ if (!ret)
+ ret = br_scrubber_handle_options (this, priv, NULL);
}
+ if (ret)
+ goto cleanup_mutex;
+
ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
if (ret != 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1469,7 +1547,6 @@ init (xlator_t *this)
return 0;
}
- cleanup_tbf:
cleanup_mutex:
(void) pthread_cond_destroy (&priv->cond);
(void) pthread_mutex_destroy (&priv->lock);
@@ -1505,6 +1582,17 @@ fini (xlator_t *this)
return;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv->iamscrubber)
+ return 0;
+
+ return br_scrubber_handle_options (this, priv, options);
+}
+
struct xlator_fops fops;
struct xlator_cbks cbks;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 5b641801916..6f21a6985ba 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -38,8 +38,26 @@
*/
#define BR_WORKERS 4
+typedef enum scrub_throttle {
+ BR_SCRUB_THROTTLE_VOID = -1,
+ BR_SCRUB_THROTTLE_LAZY = 0,
+ BR_SCRUB_THROTTLE_NORMAL = 1,
+ BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
+} scrub_throttle_t;
+
#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
+struct br_scanfs {
+ gf_lock_t entrylock;
+
+ pthread_mutex_t waitlock;
+ pthread_cond_t waitcond;
+
+ unsigned int entries;
+ struct list_head queued;
+ struct list_head ready;
+};
+
struct br_child {
char child_up; /* Indicates whether this child is
up or not */
@@ -53,12 +71,14 @@ struct br_child {
xlator_t *this; /* Bit rot xlator */
pthread_t thread; /* initial crawler for unsigned
- object(s) */
+ object(s) or scrub crawler */
int threadrunning; /* active thread */
struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
struct timeval tv;
+
+ struct br_scanfs fsscan; /* per subvolume FS scanner */
};
typedef struct br_child br_child_t;
@@ -72,6 +92,23 @@ struct br_obj_n_workers {
signing each object */
};
+struct br_scrubber {
+ xlator_t *this;
+
+ scrub_throttle_t throttle;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ unsigned int nr_scrubbers;
+ struct list_head scrubbers;
+
+ /*
+ * list of "rotatable" subvolume(s) undergoing scrubbing
+ */
+ struct list_head scrublist;
+};
+
typedef struct br_obj_n_workers br_obj_n_workers_t;
struct br_private {
@@ -100,6 +137,7 @@ struct br_private {
br_tbf_t *tbf; /* token bucket filter */
gf_boolean_t iamscrubber; /* function as a fs scrubber */
+ struct br_scrubber fsscrub; /* scrubbers for this subvolume */
};
typedef struct br_private br_private_t;
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index bb4030493db..46271407219 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -25,6 +25,9 @@ enum br_mem_types {
gf_br_mt_br_tbf_t,
gf_br_mt_br_tbf_bucket_t,
gf_br_mt_br_tbf_throttle_t,
+ gf_br_mt_br_tbf_opspec_t,
+ gf_br_mt_br_scrubber_t,
+ gf_br_mt_br_fsscan_entry_t,
gf_br_stub_mt_end
};