diff options
-rw-r--r-- | libglusterfs/src/gf-dirent.c | 39 | ||||
-rw-r--r-- | libglusterfs/src/gf-dirent.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/list.h | 14 | ||||
-rw-r--r-- | libglusterfs/src/syncop.c | 17 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 565 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.h | 9 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 146 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 40 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h | 3 |
9 files changed, 760 insertions, 75 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c index b5f395afc36..99c0eb6441d 100644 --- a/libglusterfs/src/gf-dirent.c +++ b/libglusterfs/src/gf-dirent.c @@ -171,6 +171,20 @@ gf_dirent_for_name (const char *name) return gf_dirent; } +void +gf_dirent_entry_free (gf_dirent_t *entry) +{ + if (!entry) + return; + + if (entry->dict) + dict_unref (entry->dict); + if (entry->inode) + inode_unref (entry->inode); + + list_del (&entry->list); + GF_FREE (entry); +} void gf_dirent_free (gf_dirent_t *entries) @@ -185,16 +199,27 @@ gf_dirent_free (gf_dirent_t *entries) return; list_for_each_entry_safe (entry, tmp, &entries->list, list) { - if (entry->dict) - dict_unref (entry->dict); - if (entry->inode) - inode_unref (entry->inode); - - list_del (&entry->list); - GF_FREE (entry); + gf_dirent_entry_free (entry); } } +gf_dirent_t * +entry_copy (gf_dirent_t *source) +{ + gf_dirent_t *sink = NULL; + + sink = gf_dirent_for_name (source->d_name); + + sink->d_off = source->d_off; + sink->d_ino = source->d_ino; + sink->d_type = source->d_type; + sink->d_stat = source->d_stat; + + if (source->inode) + sink->inode = inode_ref (source->inode); + return sink; +} + void gf_link_inode_from_dirent (xlator_t *this, inode_t *parent, gf_dirent_t *entry) { diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h index 07c605f82b0..faeaf411941 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/gf-dirent.h @@ -61,6 +61,8 @@ struct _gf_dirent_t { #define DT_ISDIR(mode) (mode == DT_DIR) gf_dirent_t *gf_dirent_for_name (const char *name); +gf_dirent_t *entry_copy (gf_dirent_t *source); +void gf_dirent_entry_free (gf_dirent_t *entry); void gf_dirent_free (gf_dirent_t *entries); int gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent, gf_dirent_t *entries); diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h index 875594136a2..b8f9a6eebd8 100644 --- a/libglusterfs/src/list.h +++ b/libglusterfs/src/list.h @@ -214,6 +214,20 @@ static inline void list_replace_init(struct list_head *old, INIT_LIST_HEAD(old); } +/** + * list_rotate_left - rotate the list to the left + * @head: the head of the list + */ +static inline void list_rotate_left (struct list_head *head) +{ + struct list_head *first; + + if (!list_empty (head)) { + first = head->next; + list_move_tail (first, head); + } +} + #define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index cb08b03d44b..81eae5a9105 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -1217,23 +1217,6 @@ syncop_lookup (xlator_t *subvol, loc_t *loc, struct iatt *iatt, return args.op_ret; } -static gf_dirent_t * -entry_copy (gf_dirent_t *source) -{ - gf_dirent_t *sink = NULL; - - sink = gf_dirent_for_name (source->d_name); - - sink->d_off = source->d_off; - sink->d_ino = source->d_ino; - sink->d_type = source->d_type; - sink->d_stat = source->d_stat; - - if (source->inode) - sink->inode = inode_ref (source->inode); - return sink; -} - int32_t syncop_readdirp_cbk (call_frame_t *frame, void *cookie, diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index e0581a40df0..8a80052f250 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -13,17 +13,35 @@ #include "config.h" #endif +#include <math.h> #include <ctype.h> #include <sys/uio.h> #include "glusterfs.h" -#include "xlator.h" #include "logging.h" +#include "common-utils.h" -#include "bit-rot.h" #include "bit-rot-scrub.h" #include <pthread.h> +struct br_scrubbers { + pthread_t scrubthread; + + struct list_head list; +}; + +struct br_fsscan_entry { + void *data; + + loc_t parent; + + gf_dirent_t *entry; + + struct br_scanfs *fsscan; /* backpointer to subvolume scanner */ + + struct list_head list; +}; + /** * fetch signature extended attribute from an object's fd. * NOTE: On success @xattr is not unref'd as @sign points @@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this, * signs with SHA256). */ int -bitd_start_scrub (xlator_t *subvol, - gf_dirent_t *entry, loc_t *parent, void *data) +br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) { int32_t ret = -1; fd_t *fd = NULL; @@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol, struct iatt parent_buf = {0, }; pid_t pid = 0; br_child_t *child = NULL; - xlator_t *this = NULL; unsigned char *md = NULL; inode_t *linked_inode = NULL; br_isignature_out_t *sign = NULL; unsigned long signedversion = 0; + gf_dirent_t *entry = NULL; + loc_t *parent = NULL; - GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out); - GF_VALIDATE_OR_GOTO ("bit-rot", data, out); + GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out); - child = data; - this = child->this; + entry = fsentry->entry; + parent = &fsentry->parent; + child = fsentry->data; + + GF_VALIDATE_OR_GOTO ("bit-rot", entry, out); + GF_VALIDATE_OR_GOTO ("bit-rot", parent, out); + GF_VALIDATE_OR_GOTO ("bit-rot", child, out); pid = GF_CLIENT_PID_SCRUB; @@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol, return ret; } -#define BR_SCRUB_THROTTLE_COUNT 30 -#define BR_SCRUB_THROTTLE_ZZZ 60 +static void +wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) +{ + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + + pthread_mutex_lock (&fsscan->waitlock); + { + pthread_mutex_lock (&fsscrub->mutex); + { + list_replace_init (&fsscan->queued, &fsscan->ready); + + /* wake up scrubbers */ + pthread_cond_broadcast (&fsscrub->cond); + } + pthread_mutex_unlock (&fsscrub->mutex); + + while (fsscan->entries != 0) + pthread_cond_wait + (&fsscan->waitcond, &fsscan->waitlock); + } + pthread_mutex_unlock (&fsscan->waitlock); +} + +static inline void +_br_fsscan_inc_entry_count (struct br_scanfs *fsscan) +{ + fsscan->entries++; +} + +static inline void +_br_fsscan_dec_entry_count (struct br_scanfs *fsscan) +{ + if (--fsscan->entries == 0) { + pthread_mutex_lock (&fsscan->waitlock); + { + pthread_cond_signal (&fsscan->waitcond); + } + pthread_mutex_unlock (&fsscan->waitlock); + } +} + +static void +_br_fsscan_collect_entry (struct br_scanfs *fsscan, + struct br_fsscan_entry *fsentry) +{ + list_add_tail (&fsentry->list, &fsscan->queued); + _br_fsscan_inc_entry_count (fsscan); +} + +#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */ + +int +br_fsscanner_handle_entry (xlator_t *subvol, + gf_dirent_t *entry, loc_t *parent, void *data) +{ + int32_t ret = -1; + int scrub = 0; + br_child_t *child = NULL; + xlator_t *this = NULL; + struct br_scanfs *fsscan = NULL; + struct br_fsscan_entry *fsentry = NULL; + + GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return); + GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return); + + child = data; + this = child->this; + fsscan = &child->fsscan; + + fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); + if (!fsentry) + goto error_return; + + { + fsentry->data = data; + fsentry->fsscan = &child->fsscan; + + /* copy parent loc */ + ret = loc_copy (&fsentry->parent, parent); + if (ret) + goto dealloc; + + /* copy child entry */ + fsentry->entry = entry_copy (entry); + if (!fsentry->entry) + goto locwipe; + + INIT_LIST_HEAD (&fsentry->list); + } + + LOCK (&fsscan->entrylock); + { + _br_fsscan_collect_entry (fsscan, fsentry); + + /** + * need not be a equality check as entries may be pushed + * back onto the scanned queue when thread(s) are cleaned. + */ + if (fsscan->entries >= NR_ENTRIES) + scrub = 1; + } + UNLOCK (&fsscan->entrylock); + + if (scrub) + wait_for_scrubbing (this, fsscan); + + return 0; + + locwipe: + loc_wipe (&fsentry->parent); + dealloc: + GF_FREE (fsentry); + error_return: + return -1; +} + void * -br_scrubber (void *arg) +br_fsscanner (void *arg) { - loc_t loc = {0,}; - xlator_t *this = NULL; - br_child_t *child = NULL; + loc_t loc = {0,}; + xlator_t *this = NULL; + br_child_t *child = NULL; + struct br_scanfs *fsscan = NULL; child = arg; this = child->this; + fsscan = &child->fsscan; THIS = this; loc.inode = child->table->root; while (1) { - (void) syncop_ftw_throttle - (child->xl, &loc, - GF_CLIENT_PID_SCRUB, child, bitd_start_scrub, - BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ); + (void) syncop_ftw (child->xl, &loc, + GF_CLIENT_PID_SCRUB, + child, br_fsscanner_handle_entry); + if (!list_empty (&fsscan->queued)) + wait_for_scrubbing (this, fsscan); + } + + return NULL; +} + +#define BR_SCRUB_THREAD_SCALE_LAZY 0 +#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4 +#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0 + +#ifndef M_E +#define M_E 2.718 +#endif + +/** + * This is just a simple exponential scale to a fixed value selected + * per throttle config. We probably need to be more smart and select + * the scale based on the number of processor cores too. + */ +static unsigned int +br_scrubber_calc_scale (xlator_t *this, + br_private_t *priv, scrub_throttle_t throttle) +{ + unsigned int scale = 0; + + switch (throttle) { + case BR_SCRUB_THROTTLE_VOID: + scale = 0; + break; + case BR_SCRUB_THROTTLE_LAZY: + scale = priv->child_count * + pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY); + break; + case BR_SCRUB_THROTTLE_NORMAL: + scale = priv->child_count * + pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL); + break; + case BR_SCRUB_THROTTLE_AGGRESSIVE: + scale = priv->child_count * + pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE); + break; + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown throttle %d", throttle); + } + + return scale; + +} + +static void +br_scrubber_cleanup_handler (void *arg) +{ + struct br_scrubber *fsscrub = arg; + pthread_mutex_unlock (&fsscrub->mutex); +} + +static inline br_child_t * +_br_scrubber_get_next_child (struct br_scrubber *fsscrub) +{ + br_child_t *child = NULL; + + child = list_first_entry (&fsscrub->scrublist, br_child_t, list); + list_rotate_left (&fsscrub->scrublist); + + return child; +} + +static inline void +_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry) +{ + struct br_scanfs *fsscan = &child->fsscan; + + if (list_empty (&fsscan->ready)) + return; + *fsentry = list_first_entry + (&fsscan->ready, struct br_fsscan_entry, list); + list_del_init (&(*fsentry)->list); +} + +static inline void +_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub, + struct br_fsscan_entry **fsentry) +{ + br_child_t *child = NULL; + br_child_t *firstchild = NULL; + + while (1) { + if (list_empty (&fsscrub->scrublist)) + pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + + firstchild = NULL; + for (child = _br_scrubber_get_next_child (fsscrub); + child != firstchild; + child = _br_scrubber_get_next_child (fsscrub)) { + + if (!firstchild) + firstchild = child; + + _br_scrubber_get_entry (child, fsentry); + if (*fsentry) + break; + } + + if (*fsentry) + break; + + /* nothing to work on.. wait till available */ + pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + } +} + +static void +br_scrubber_pick_entry (struct br_scrubber *fsscrub, + struct br_fsscan_entry **fsentry) +{ + pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub); + + pthread_mutex_lock (&fsscrub->mutex); + { + *fsentry = NULL; + _br_scrubber_find_scrubbable_entry (fsscrub, fsentry); + } + pthread_mutex_unlock (&fsscrub->mutex); + + pthread_cleanup_pop (0); +} - sleep (BR_SCRUB_THROTTLE_ZZZ); +struct br_scrub_entry { + gf_boolean_t scrubbed; + struct br_fsscan_entry *fsentry; +}; + +/** + * We need to be a bit careful here. These thread(s) are prone to cancellations + * when threads are scaled down (depending on the thottling value configured) + * and pausing scrub. A thread can get cancelled while it's waiting for entries + * in the ->pending queue or when an object is undergoing scrubbing. + */ +static void +br_scrubber_entry_handle (void *arg) +{ + struct br_scanfs *fsscan = NULL; + struct br_scrub_entry *sentry = NULL; + struct br_fsscan_entry *fsentry = NULL; + + sentry = arg; + + fsentry = sentry->fsentry; + fsscan = fsentry->fsscan; + + LOCK (&fsscan->entrylock); + { + if (sentry->scrubbed) { + _br_fsscan_dec_entry_count (fsscan); + + /* cleanup ->entry */ + fsentry->data = NULL; + fsentry->fsscan = NULL; + loc_wipe (&fsentry->parent); + gf_dirent_entry_free (fsentry->entry); + + GF_FREE (sentry->fsentry); + } else { + /* (re)queue the entry again for scrub */ + _br_fsscan_collect_entry (fsscan, sentry->fsentry); + } + } + UNLOCK (&fsscan->entrylock); +} + +static void +br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry) +{ + struct br_scrub_entry sentry = {0, }; + + sentry.scrubbed = 0; + sentry.fsentry = fsentry; + + pthread_cleanup_push (br_scrubber_entry_handle, &sentry); + { + (void) br_scrubber_scrub_begin (this, fsentry); + sentry.scrubbed = 1; + } + pthread_cleanup_pop (1); +} + +void *br_scrubber_proc (void *arg) +{ + xlator_t *this = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_fsscan_entry *fsentry = NULL; + + fsscrub = arg; + THIS = this = fsscrub->this; + + while (1) { + br_scrubber_pick_entry (fsscrub, &fsentry); + br_scrubber_scrub_entry (this, fsentry); + sleep (1); } return NULL; } + +static int32_t +br_scrubber_scale_up (xlator_t *this, + struct br_scrubber *fsscrub, + unsigned int v1, unsigned int v2) +{ + int i = 0; + int32_t ret = -1; + int diff = 0; + struct br_scrubbers *scrub = NULL; + + diff = (int)(v2 - v1); + + gf_log (this->name, GF_LOG_INFO, + "Scaling up scrubbers [%d => %d]", v1, v2); + + for (i = 0; i < diff; i++) { + scrub = GF_CALLOC (diff, sizeof (*scrub), + gf_br_mt_br_scrubber_t); + if (!scrub) + break; + + INIT_LIST_HEAD (&scrub->list); + ret = gf_thread_create (&scrub->scrubthread, + NULL, br_scrubber_proc, fsscrub); + if (ret) + break; + + fsscrub->nr_scrubbers++; + list_add_tail (&scrub->list, &fsscrub->scrubbers); + } + + if ((i != diff) && !scrub) + goto error_return; + + if (i != diff) /* degraded scaling.. */ + gf_log (this->name, GF_LOG_WARNING, + "Could not fully scale up to %d scrubber(s). Spawned " + "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i)); + + return 0; + + error_return: + return -1; +} + +static int32_t +br_scrubber_scale_down (xlator_t *this, + struct br_scrubber *fsscrub, + unsigned int v1, unsigned int v2) +{ + int i = 0; + int diff = 0; + int32_t ret = -1; + struct br_scrubbers *scrub = NULL; + + diff = (int)(v1 - v2); + + gf_log (this->name, GF_LOG_INFO, + "Scaling down scrubbers [%d => %d]", v1, v2); + + for (i = 0 ; i < diff; i++) { + scrub = list_first_entry + (&fsscrub->scrubbers, struct br_scrubbers, list); + + list_del_init (&scrub->list); + ret = gf_thread_cleanup_xint (scrub->scrubthread); + if (ret) + break; + GF_FREE (scrub); + + fsscrub->nr_scrubbers--; + } + + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Could not fully scale down to %d scrubber(s). " + "Terminated %d/%d [total scrubber(s): %d]", + v1, i, diff, (v2 - i)); + ret = 0; + } + + return ret; +} + +static int32_t +br_scrubber_configure (xlator_t *this, br_private_t *priv, + struct br_scrubber *fsscrub, scrub_throttle_t nthrottle) +{ + int32_t ret = 0; + unsigned int v1 = 0; + unsigned int v2 = 0; + + v1 = fsscrub->nr_scrubbers; + v2 = br_scrubber_calc_scale (this, priv, nthrottle); + + if (v1 == v2) + return 0; + + if (v1 > v2) + ret = br_scrubber_scale_down (this, fsscrub, v1, v2); + else + ret = br_scrubber_scale_up (this, fsscrub, v1, v2); + + return ret; +} + +/* TODO: token buket spec */ +static int32_t +br_scrubber_handle_throttle (xlator_t *this, + br_private_t *priv, dict_t *options) +{ + int32_t ret = 0; + char *tmp = NULL; + struct br_scrubber *fsscrub = NULL; + scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID; + + fsscrub = &priv->fsscrub; + + if (options) + GF_OPTION_RECONF ("scrub-throttle", + tmp, options, str, error_return); + else + GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return); + + if (strcasecmp (tmp, "lazy") == 0) + nthrottle = BR_SCRUB_THROTTLE_LAZY; + else if (strcasecmp (tmp, "normal") == 0) + nthrottle = BR_SCRUB_THROTTLE_NORMAL; + else if (strcasecmp (tmp, "aggressive") == 0) + nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE; + else + goto error_return; + + /* on failure old throttling value is preserved */ + ret = br_scrubber_configure (this, priv, fsscrub, nthrottle); + if (ret) + goto error_return; + + fsscrub->throttle = nthrottle; + return 0; + + error_return: + return -1; +} + +/* TODO: pause/resume, frequency */ +int32_t +br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options) +{ + int32_t ret = 0; + + ret = br_scrubber_handle_throttle (this, priv, options); + if (ret) + goto error_return; + + return 0; + + error_return: + return -1; +} + +int32_t +br_scrubber_init (xlator_t *this, br_private_t *priv) +{ + struct br_scrubber *fsscrub = NULL; + + priv->tbf = br_tbf_init (NULL, 0); + if (!priv->tbf) + return -1; + + fsscrub = &priv->fsscrub; + + fsscrub->this = this; + fsscrub->throttle = BR_SCRUB_THROTTLE_VOID; + + pthread_mutex_init (&fsscrub->mutex, NULL); + pthread_cond_init (&fsscrub->cond, NULL); + + fsscrub->nr_scrubbers = 0; + INIT_LIST_HEAD (&fsscrub->scrubbers); + INIT_LIST_HEAD (&fsscrub->scrublist); + + return 0; +} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h index daec9ad8196..4f00020d66a 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h @@ -11,6 +11,13 @@ #ifndef __BIT_ROT__SCRUB_H__ #define __BIT_ROT_SCRUB_H__ -void *br_scrubber (void *); +#include "xlator.h" +#include "bit-rot.h" + +void *br_fsscanner (void *); + +int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *); + +int32_t br_scrubber_init (xlator_t *, br_private_t *); #endif /* __BIT_ROT_SCRUB_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 880b16edfa8..eea81aec53a 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -29,15 +29,6 @@ #define BR_HASH_CALC_READ_SIZE (128 * 1024) -br_tbf_opspec_t opthrottle[] = { - { - .op = BR_TBF_OP_HASH, - .rate = BR_HASH_CALC_READ_SIZE, - .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE), - }, - /** TODO: throttle getdents(), read() request(s) */ -}; - static int br_find_child_index (xlator_t *this, xlator_t *child) { @@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) child->threadrunning = 1; /* it's OK to continue, "old" objects would be signed when modified */ + list_del_init (&child->list); return 0; dealloc: @@ -1078,14 +1070,45 @@ static inline int32_t br_enact_scrubber (xlator_t *this, br_child_t *child) { int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; + + LOCK_INIT (&fsscan->entrylock); + pthread_mutex_init (&fsscan->waitlock, NULL); + pthread_cond_init (&fsscan->waitcond, NULL); - ret = gf_thread_create (&child->thread, NULL, br_scrubber, child); + fsscan->entries = 0; + INIT_LIST_HEAD (&fsscan->queued); + INIT_LIST_HEAD (&fsscan->ready); + + ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child); if (ret != 0) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber"); + gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot " + "scrubber daemon [Brick: %s]", child->brick_path); + goto error_return; } - return ret; + /** + * Everything has been setup.. add this subvolume to scrubbers + * list. + */ + pthread_mutex_lock (&fsscrub->mutex); + { + list_move (&child->list, &fsscrub->scrublist); + pthread_cond_broadcast (&fsscrub->cond); + } + pthread_mutex_unlock (&fsscrub->mutex); + + return 0; + + error_return: + return -1; } /** @@ -1202,8 +1225,7 @@ br_handle_events (void *arg) "failed to connect to the " "child (subvolume: %s)", child->xl->name); - else - list_del_init (&child->list); + } } @@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv) return -1; } -int32_t -br_init_rate_limiter (br_private_t *priv) +/** + * For signer, only rate limit CPU usage (during hash calculation) when + * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full + * throttle. + */ +static int32_t +br_rate_limit_signer (xlator_t *this, int child_count, int numbricks) { - br_tbf_opspec_t *spec = opthrottle; - priv->tbf = br_tbf_init (spec, sizeof (opthrottle) - / sizeof (br_tbf_opspec_t)); + br_private_t *priv = NULL; + br_tbf_opspec_t spec = {0,}; + + priv = this->private; + + spec.op = BR_TBF_OP_HASH; + spec.rate = 0; + spec.maxlimit = 0; + +#ifdef BR_RATE_LIMIT_SIGNER + + double contribution = 0; + contribution = ((double)1 - ((double)child_count / (double)numbricks)); + if (contribution == 0) + contribution = 1; + spec.rate = BR_HASH_CALC_READ_SIZE * contribution; + spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; + +#endif + + if (!spec.rate) + gf_log (this->name, + GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\""); + else + gf_log (this->name, GF_LOG_INFO, + "[Rate Limit Info] \"tokens/sec (rate): %lu, " + "maxlimit: %lu\"", spec.rate, spec.maxlimit); + priv->tbf = br_tbf_init (&spec, 1); return priv->tbf ? 0 : -1; } +static int32_t +br_signer_init (xlator_t *this, br_private_t *priv) +{ + int32_t ret = 0; + int numbricks = 0; + + GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return); + GF_OPTION_INIT ("brick-count", numbricks, int32, error_return); + + ret = br_rate_limit_signer (this, priv->child_count, numbricks); + if (ret) + goto error_return; + + ret = br_init_signer (this, priv); + if (ret) + goto cleanup_tbf; + + return 0; + + cleanup_tbf: + /* cleanup TBF */ + error_return: + return -1; + +} + int32_t init (xlator_t *this) { @@ -1410,7 +1488,6 @@ init (xlator_t *this) } GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); - GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out); priv->child_count = xlator_subvolume_count (this); priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children), @@ -1443,18 +1520,19 @@ init (xlator_t *this) INIT_LIST_HEAD (&priv->children[i].list); INIT_LIST_HEAD (&priv->bricks); - ret = br_init_rate_limiter (priv); - if (ret) - goto cleanup_mutex; - this->private = priv; if (!priv->iamscrubber) { - ret = br_init_signer (this, priv); - if (ret) - goto cleanup_tbf; + ret = br_signer_init (this, priv); + } else { + ret = br_scrubber_init (this, priv); + if (!ret) + ret = br_scrubber_handle_options (this, priv, NULL); } + if (ret) + goto cleanup_mutex; + ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this); if (ret != 0) { gf_log (this->name, GF_LOG_ERROR, @@ -1469,7 +1547,6 @@ init (xlator_t *this) return 0; } - cleanup_tbf: cleanup_mutex: (void) pthread_cond_destroy (&priv->cond); (void) pthread_mutex_destroy (&priv->lock); @@ -1505,6 +1582,17 @@ fini (xlator_t *this) return; } +int +reconfigure (xlator_t *this, dict_t *options) +{ + br_private_t *priv = this->private; + + if (!priv->iamscrubber) + return 0; + + return br_scrubber_handle_options (this, priv, options); +} + struct xlator_fops fops; struct xlator_cbks cbks; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 5b641801916..6f21a6985ba 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -38,8 +38,26 @@ */ #define BR_WORKERS 4 +typedef enum scrub_throttle { + BR_SCRUB_THROTTLE_VOID = -1, + BR_SCRUB_THROTTLE_LAZY = 0, + BR_SCRUB_THROTTLE_NORMAL = 1, + BR_SCRUB_THROTTLE_AGGRESSIVE = 2, +} scrub_throttle_t; + #define signature_size(hl) (sizeof (br_isignature_t) + hl + 1) +struct br_scanfs { + gf_lock_t entrylock; + + pthread_mutex_t waitlock; + pthread_cond_t waitcond; + + unsigned int entries; + struct list_head queued; + struct list_head ready; +}; + struct br_child { char child_up; /* Indicates whether this child is up or not */ @@ -53,12 +71,14 @@ struct br_child { xlator_t *this; /* Bit rot xlator */ pthread_t thread; /* initial crawler for unsigned - object(s) */ + object(s) or scrub crawler */ int threadrunning; /* active thread */ struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */ struct timeval tv; + + struct br_scanfs fsscan; /* per subvolume FS scanner */ }; typedef struct br_child br_child_t; @@ -72,6 +92,23 @@ struct br_obj_n_workers { signing each object */ }; +struct br_scrubber { + xlator_t *this; + + scrub_throttle_t throttle; + + pthread_mutex_t mutex; + pthread_cond_t cond; + + unsigned int nr_scrubbers; + struct list_head scrubbers; + + /* + * list of "rotatable" subvolume(s) undergoing scrubbing + */ + struct list_head scrublist; +}; + typedef struct br_obj_n_workers br_obj_n_workers_t; struct br_private { @@ -100,6 +137,7 @@ struct br_private { br_tbf_t *tbf; /* token bucket filter */ gf_boolean_t iamscrubber; /* function as a fs scrubber */ + struct br_scrubber fsscrub; /* scrubbers for this subvolume */ }; typedef struct br_private br_private_t; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index bb4030493db..46271407219 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -25,6 +25,9 @@ enum br_mem_types { gf_br_mt_br_tbf_t, gf_br_mt_br_tbf_bucket_t, gf_br_mt_br_tbf_throttle_t, + gf_br_mt_br_tbf_opspec_t, + gf_br_mt_br_scrubber_t, + gf_br_mt_br_fsscan_entry_t, gf_br_stub_mt_end }; |