summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-04-27 21:34:34 +0530
committerVijay Bellur <vbellur@redhat.com>2015-05-07 22:51:41 -0700
commit9ba8963999bca431ec14a25961a163810cfe1e5b (patch)
tree783f5a29b7cfc63331a88a1ec5d222a7a4c2d57e /xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
parent4ccd70b323d4cb929b7b7a88e592fc98fab06198 (diff)
features/bitrot: Throttle filesystem scrubber
This patch introduces multithreaded filesystem scrubber based on throttling option configured for a particular volume. The implementation "logically" breaks scanning and scrubbing with the number of scrubber threads auto-configured depending upon the throttle configuration. Scanning (crawling) is left single threaded (per brick) with entries scrubbed in bulk. On reaching this "bulk" watermark, scanner waits until entries are scrubbed. Bricks for a particular volume have a set of thread(s) assigned for scrubbing, with entries for each brick scrubbed in a round robin fashion to avoid scrub "stalls" when a brick (out of N bricks) is under active scrubbing. This mechanism helps us implement "pause/resume" with ease: all one need to do is to cleanup scrubber threads and let the main scanner thread "wait" untill scrubbing is resumed (where the scrubber thread(s) are spawned again), therefore continuing where we left off (unless we restart the deamons, where crawl initiates from root directory again, but I guess that's OK). [ NOTE: Throttling is optional for the signer daemon, without which it runs full throttle. However, passing "-DBR_RATE_LIMIT_SIGNER" predefined in CFLAGS enables CPU throttling (during checksum calculation) thereby avoiding high CPU usage. ] Subsequent patches would introduce CPU throttling during hash calculation for scrubber. Change-Id: I5701dd6cd4dff27ca3144ac5e3798a2216b39d4f BUG: 1207020 Signed-off-by: Venky Shankar <vshankar@redhat.com> Reviewed-on: http://review.gluster.org/10511 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot-scrub.c')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c565
1 files changed, 545 insertions, 20 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index e0581a40df0..8a80052f250 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -13,17 +13,35 @@
#include "config.h"
#endif
+#include <math.h>
#include <ctype.h>
#include <sys/uio.h>
#include "glusterfs.h"
-#include "xlator.h"
#include "logging.h"
+#include "common-utils.h"
-#include "bit-rot.h"
#include "bit-rot-scrub.h"
#include <pthread.h>
+struct br_scrubbers {
+ pthread_t scrubthread;
+
+ struct list_head list;
+};
+
+struct br_fsscan_entry {
+ void *data;
+
+ loc_t parent;
+
+ gf_dirent_t *entry;
+
+ struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+ struct list_head list;
+};
+
/**
* fetch signature extended attribute from an object's fd.
* NOTE: On success @xattr is not unref'd as @sign points
@@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this,
* signs with SHA256).
*/
int
-bitd_start_scrub (xlator_t *subvol,
- gf_dirent_t *entry, loc_t *parent, void *data)
+br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
{
int32_t ret = -1;
fd_t *fd = NULL;
@@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol,
struct iatt parent_buf = {0, };
pid_t pid = 0;
br_child_t *child = NULL;
- xlator_t *this = NULL;
unsigned char *md = NULL;
inode_t *linked_inode = NULL;
br_isignature_out_t *sign = NULL;
unsigned long signedversion = 0;
+ gf_dirent_t *entry = NULL;
+ loc_t *parent = NULL;
- GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
- GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out);
- child = data;
- this = child->this;
+ entry = fsentry->entry;
+ parent = &fsentry->parent;
+ child = fsentry->data;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", entry, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", parent, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", child, out);
pid = GF_CLIENT_PID_SCRUB;
@@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol,
return ret;
}
-#define BR_SCRUB_THROTTLE_COUNT 30
-#define BR_SCRUB_THROTTLE_ZZZ 60
+static void
+wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan)
+{
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_replace_init (&fsscan->queued, &fsscan->ready);
+
+ /* wake up scrubbers */
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ while (fsscan->entries != 0)
+ pthread_cond_wait
+ (&fsscan->waitcond, &fsscan->waitlock);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+}
+
+static inline void
+_br_fsscan_inc_entry_count (struct br_scanfs *fsscan)
+{
+ fsscan->entries++;
+}
+
+static inline void
+_br_fsscan_dec_entry_count (struct br_scanfs *fsscan)
+{
+ if (--fsscan->entries == 0) {
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_cond_signal (&fsscan->waitcond);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+ }
+}
+
+static void
+_br_fsscan_collect_entry (struct br_scanfs *fsscan,
+ struct br_fsscan_entry *fsentry)
+{
+ list_add_tail (&fsentry->list, &fsscan->queued);
+ _br_fsscan_inc_entry_count (fsscan);
+}
+
+#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int32_t ret = -1;
+ int scrub = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return);
+
+ child = data;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t);
+ if (!fsentry)
+ goto error_return;
+
+ {
+ fsentry->data = data;
+ fsentry->fsscan = &child->fsscan;
+
+ /* copy parent loc */
+ ret = loc_copy (&fsentry->parent, parent);
+ if (ret)
+ goto dealloc;
+
+ /* copy child entry */
+ fsentry->entry = entry_copy (entry);
+ if (!fsentry->entry)
+ goto locwipe;
+
+ INIT_LIST_HEAD (&fsentry->list);
+ }
+
+ LOCK (&fsscan->entrylock);
+ {
+ _br_fsscan_collect_entry (fsscan, fsentry);
+
+ /**
+ * need not be a equality check as entries may be pushed
+ * back onto the scanned queue when thread(s) are cleaned.
+ */
+ if (fsscan->entries >= NR_ENTRIES)
+ scrub = 1;
+ }
+ UNLOCK (&fsscan->entrylock);
+
+ if (scrub)
+ wait_for_scrubbing (this, fsscan);
+
+ return 0;
+
+ locwipe:
+ loc_wipe (&fsentry->parent);
+ dealloc:
+ GF_FREE (fsentry);
+ error_return:
+ return -1;
+}
+
void *
-br_scrubber (void *arg)
+br_fsscanner (void *arg)
{
- loc_t loc = {0,};
- xlator_t *this = NULL;
- br_child_t *child = NULL;
+ loc_t loc = {0,};
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+ struct br_scanfs *fsscan = NULL;
child = arg;
this = child->this;
+ fsscan = &child->fsscan;
THIS = this;
loc.inode = child->table->root;
while (1) {
- (void) syncop_ftw_throttle
- (child->xl, &loc,
- GF_CLIENT_PID_SCRUB, child, bitd_start_scrub,
- BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ);
+ (void) syncop_ftw (child->xl, &loc,
+ GF_CLIENT_PID_SCRUB,
+ child, br_fsscanner_handle_entry);
+ if (!list_empty (&fsscan->queued))
+ wait_for_scrubbing (this, fsscan);
+ }
+
+ return NULL;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale (xlator_t *this,
+ br_private_t *priv, scrub_throttle_t throttle)
+{
+ unsigned int scale = 0;
+
+ switch (throttle) {
+ case BR_SCRUB_THROTTLE_VOID:
+ scale = 0;
+ break;
+ case BR_SCRUB_THROTTLE_LAZY:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+ break;
+ case BR_SCRUB_THROTTLE_NORMAL:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+ break;
+ case BR_SCRUB_THROTTLE_AGGRESSIVE:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown throttle %d", throttle);
+ }
+
+ return scale;
+
+}
+
+static void
+br_scrubber_cleanup_handler (void *arg)
+{
+ struct br_scrubber *fsscrub = arg;
+ pthread_mutex_unlock (&fsscrub->mutex);
+}
+
+static inline br_child_t *
+_br_scrubber_get_next_child (struct br_scrubber *fsscrub)
+{
+ br_child_t *child = NULL;
+
+ child = list_first_entry (&fsscrub->scrublist, br_child_t, list);
+ list_rotate_left (&fsscrub->scrublist);
+
+ return child;
+}
+
+static inline void
+_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ if (list_empty (&fsscan->ready))
+ return;
+ *fsentry = list_first_entry
+ (&fsscan->ready, struct br_fsscan_entry, list);
+ list_del_init (&(*fsentry)->list);
+}
+
+static inline void
+_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ br_child_t *child = NULL;
+ br_child_t *firstchild = NULL;
+
+ while (1) {
+ if (list_empty (&fsscrub->scrublist))
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+
+ firstchild = NULL;
+ for (child = _br_scrubber_get_next_child (fsscrub);
+ child != firstchild;
+ child = _br_scrubber_get_next_child (fsscrub)) {
+
+ if (!firstchild)
+ firstchild = child;
+
+ _br_scrubber_get_entry (child, fsentry);
+ if (*fsentry)
+ break;
+ }
+
+ if (*fsentry)
+ break;
+
+ /* nothing to work on.. wait till available */
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+ }
+}
+
+static void
+br_scrubber_pick_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub);
+
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ *fsentry = NULL;
+ _br_scrubber_find_scrubbable_entry (fsscrub, fsentry);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ pthread_cleanup_pop (0);
+}
- sleep (BR_SCRUB_THROTTLE_ZZZ);
+struct br_scrub_entry {
+ gf_boolean_t scrubbed;
+ struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle (void *arg)
+{
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrub_entry *sentry = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ sentry = arg;
+
+ fsentry = sentry->fsentry;
+ fsscan = fsentry->fsscan;
+
+ LOCK (&fsscan->entrylock);
+ {
+ if (sentry->scrubbed) {
+ _br_fsscan_dec_entry_count (fsscan);
+
+ /* cleanup ->entry */
+ fsentry->data = NULL;
+ fsentry->fsscan = NULL;
+ loc_wipe (&fsentry->parent);
+ gf_dirent_entry_free (fsentry->entry);
+
+ GF_FREE (sentry->fsentry);
+ } else {
+ /* (re)queue the entry again for scrub */
+ _br_fsscan_collect_entry (fsscan, sentry->fsentry);
+ }
+ }
+ UNLOCK (&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ struct br_scrub_entry sentry = {0, };
+
+ sentry.scrubbed = 0;
+ sentry.fsentry = fsentry;
+
+ pthread_cleanup_push (br_scrubber_entry_handle, &sentry);
+ {
+ (void) br_scrubber_scrub_begin (this, fsentry);
+ sentry.scrubbed = 1;
+ }
+ pthread_cleanup_pop (1);
+}
+
+void *br_scrubber_proc (void *arg)
+{
+ xlator_t *this = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ fsscrub = arg;
+ THIS = this = fsscrub->this;
+
+ while (1) {
+ br_scrubber_pick_entry (fsscrub, &fsentry);
+ br_scrubber_scrub_entry (this, fsentry);
+ sleep (1);
}
return NULL;
}
+
+static int32_t
+br_scrubber_scale_up (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int32_t ret = -1;
+ int diff = 0;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v2 - v1);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling up scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = GF_CALLOC (diff, sizeof (*scrub),
+ gf_br_mt_br_scrubber_t);
+ if (!scrub)
+ break;
+
+ INIT_LIST_HEAD (&scrub->list);
+ ret = gf_thread_create (&scrub->scrubthread,
+ NULL, br_scrubber_proc, fsscrub);
+ if (ret)
+ break;
+
+ fsscrub->nr_scrubbers++;
+ list_add_tail (&scrub->list, &fsscrub->scrubbers);
+ }
+
+ if ((i != diff) && !scrub)
+ goto error_return;
+
+ if (i != diff) /* degraded scaling.. */
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale up to %d scrubber(s). Spawned "
+ "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i));
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_scale_down (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int diff = 0;
+ int32_t ret = -1;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v1 - v2);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling down scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0 ; i < diff; i++) {
+ scrub = list_first_entry
+ (&fsscrub->scrubbers, struct br_scrubbers, list);
+
+ list_del_init (&scrub->list);
+ ret = gf_thread_cleanup_xint (scrub->scrubthread);
+ if (ret)
+ break;
+ GF_FREE (scrub);
+
+ fsscrub->nr_scrubbers--;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale down to %d scrubber(s). "
+ "Terminated %d/%d [total scrubber(s): %d]",
+ v1, i, diff, (v2 - i));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_configure (xlator_t *this, br_private_t *priv,
+ struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+ int32_t ret = 0;
+ unsigned int v1 = 0;
+ unsigned int v2 = 0;
+
+ v1 = fsscrub->nr_scrubbers;
+ v2 = br_scrubber_calc_scale (this, priv, nthrottle);
+
+ if (v1 == v2)
+ return 0;
+
+ if (v1 > v2)
+ ret = br_scrubber_scale_down (this, fsscrub, v1, v2);
+ else
+ ret = br_scrubber_scale_up (this, fsscrub, v1, v2);
+
+ return ret;
+}
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle (xlator_t *this,
+ br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+ fsscrub = &priv->fsscrub;
+
+ if (options)
+ GF_OPTION_RECONF ("scrub-throttle",
+ tmp, options, str, error_return);
+ else
+ GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return);
+
+ if (strcasecmp (tmp, "lazy") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_LAZY;
+ else if (strcasecmp (tmp, "normal") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+ else if (strcasecmp (tmp, "aggressive") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+ else
+ goto error_return;
+
+ /* on failure old throttling value is preserved */
+ ret = br_scrubber_configure (this, priv, fsscrub, nthrottle);
+ if (ret)
+ goto error_return;
+
+ fsscrub->throttle = nthrottle;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/* TODO: pause/resume, frequency */
+int32_t
+br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+
+ ret = br_scrubber_handle_throttle (this, priv, options);
+ if (ret)
+ goto error_return;
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+int32_t
+br_scrubber_init (xlator_t *this, br_private_t *priv)
+{
+ struct br_scrubber *fsscrub = NULL;
+
+ priv->tbf = br_tbf_init (NULL, 0);
+ if (!priv->tbf)
+ return -1;
+
+ fsscrub = &priv->fsscrub;
+
+ fsscrub->this = this;
+ fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+ pthread_mutex_init (&fsscrub->mutex, NULL);
+ pthread_cond_init (&fsscrub->cond, NULL);
+
+ fsscrub->nr_scrubbers = 0;
+ INIT_LIST_HEAD (&fsscrub->scrubbers);
+ INIT_LIST_HEAD (&fsscrub->scrublist);
+
+ return 0;
+}