diff options
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot-scrub.c')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 565 |
1 files changed, 545 insertions, 20 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index e0581a40df0..8a80052f250 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -13,17 +13,35 @@ #include "config.h" #endif +#include <math.h> #include <ctype.h> #include <sys/uio.h> #include "glusterfs.h" -#include "xlator.h" #include "logging.h" +#include "common-utils.h" -#include "bit-rot.h" #include "bit-rot-scrub.h" #include <pthread.h> +struct br_scrubbers { + pthread_t scrubthread; + + struct list_head list; +}; + +struct br_fsscan_entry { + void *data; + + loc_t parent; + + gf_dirent_t *entry; + + struct br_scanfs *fsscan; /* backpointer to subvolume scanner */ + + struct list_head list; +}; + /** * fetch signature extended attribute from an object's fd. * NOTE: On success @xattr is not unref'd as @sign points @@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this, * signs with SHA256). */ int -bitd_start_scrub (xlator_t *subvol, - gf_dirent_t *entry, loc_t *parent, void *data) +br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) { int32_t ret = -1; fd_t *fd = NULL; @@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol, struct iatt parent_buf = {0, }; pid_t pid = 0; br_child_t *child = NULL; - xlator_t *this = NULL; unsigned char *md = NULL; inode_t *linked_inode = NULL; br_isignature_out_t *sign = NULL; unsigned long signedversion = 0; + gf_dirent_t *entry = NULL; + loc_t *parent = NULL; - GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out); - GF_VALIDATE_OR_GOTO ("bit-rot", data, out); + GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out); - child = data; - this = child->this; + entry = fsentry->entry; + parent = &fsentry->parent; + child = fsentry->data; + + GF_VALIDATE_OR_GOTO ("bit-rot", entry, out); + GF_VALIDATE_OR_GOTO ("bit-rot", parent, out); + GF_VALIDATE_OR_GOTO ("bit-rot", child, out); pid = GF_CLIENT_PID_SCRUB; @@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol, return ret; } -#define BR_SCRUB_THROTTLE_COUNT 30 -#define BR_SCRUB_THROTTLE_ZZZ 60 +static void +wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) +{ + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + + pthread_mutex_lock (&fsscan->waitlock); + { + pthread_mutex_lock (&fsscrub->mutex); + { + list_replace_init (&fsscan->queued, &fsscan->ready); + + /* wake up scrubbers */ + pthread_cond_broadcast (&fsscrub->cond); + } + pthread_mutex_unlock (&fsscrub->mutex); + + while (fsscan->entries != 0) + pthread_cond_wait + (&fsscan->waitcond, &fsscan->waitlock); + } + pthread_mutex_unlock (&fsscan->waitlock); +} + +static inline void +_br_fsscan_inc_entry_count (struct br_scanfs *fsscan) +{ + fsscan->entries++; +} + +static inline void +_br_fsscan_dec_entry_count (struct br_scanfs *fsscan) +{ + if (--fsscan->entries == 0) { + pthread_mutex_lock (&fsscan->waitlock); + { + pthread_cond_signal (&fsscan->waitcond); + } + pthread_mutex_unlock (&fsscan->waitlock); + } +} + +static void +_br_fsscan_collect_entry (struct br_scanfs *fsscan, + struct br_fsscan_entry *fsentry) +{ + list_add_tail (&fsentry->list, &fsscan->queued); + _br_fsscan_inc_entry_count (fsscan); +} + +#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */ + +int +br_fsscanner_handle_entry (xlator_t *subvol, + gf_dirent_t *entry, loc_t *parent, void *data) +{ + int32_t ret = -1; + int scrub = 0; + br_child_t *child = NULL; + xlator_t *this = NULL; + struct br_scanfs *fsscan = NULL; + struct br_fsscan_entry *fsentry = NULL; + + GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return); + GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return); + + child = data; + this = child->this; + fsscan = &child->fsscan; + + fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); + if (!fsentry) + goto error_return; + + { + fsentry->data = data; + fsentry->fsscan = &child->fsscan; + + /* copy parent loc */ + ret = loc_copy (&fsentry->parent, parent); + if (ret) + goto dealloc; + + /* copy child entry */ + fsentry->entry = entry_copy (entry); + if (!fsentry->entry) + goto locwipe; + + INIT_LIST_HEAD (&fsentry->list); + } + + LOCK (&fsscan->entrylock); + { + _br_fsscan_collect_entry (fsscan, fsentry); + + /** + * need not be a equality check as entries may be pushed + * back onto the scanned queue when thread(s) are cleaned. + */ + if (fsscan->entries >= NR_ENTRIES) + scrub = 1; + } + UNLOCK (&fsscan->entrylock); + + if (scrub) + wait_for_scrubbing (this, fsscan); + + return 0; + + locwipe: + loc_wipe (&fsentry->parent); + dealloc: + GF_FREE (fsentry); + error_return: + return -1; +} + void * -br_scrubber (void *arg) +br_fsscanner (void *arg) { - loc_t loc = {0,}; - xlator_t *this = NULL; - br_child_t *child = NULL; + loc_t loc = {0,}; + xlator_t *this = NULL; + br_child_t *child = NULL; + struct br_scanfs *fsscan = NULL; child = arg; this = child->this; + fsscan = &child->fsscan; THIS = this; loc.inode = child->table->root; while (1) { - (void) syncop_ftw_throttle - (child->xl, &loc, - GF_CLIENT_PID_SCRUB, child, bitd_start_scrub, - BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ); + (void) syncop_ftw (child->xl, &loc, + GF_CLIENT_PID_SCRUB, + child, br_fsscanner_handle_entry); + if (!list_empty (&fsscan->queued)) + wait_for_scrubbing (this, fsscan); + } + + return NULL; +} + +#define BR_SCRUB_THREAD_SCALE_LAZY 0 +#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4 +#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0 + +#ifndef M_E +#define M_E 2.718 +#endif + +/** + * This is just a simple exponential scale to a fixed value selected + * per throttle config. We probably need to be more smart and select + * the scale based on the number of processor cores too. + */ +static unsigned int +br_scrubber_calc_scale (xlator_t *this, + br_private_t *priv, scrub_throttle_t throttle) +{ + unsigned int scale = 0; + + switch (throttle) { + case BR_SCRUB_THROTTLE_VOID: + scale = 0; + break; + case BR_SCRUB_THROTTLE_LAZY: + scale = priv->child_count * + pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY); + break; + case BR_SCRUB_THROTTLE_NORMAL: + scale = priv->child_count * + pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL); + break; + case BR_SCRUB_THROTTLE_AGGRESSIVE: + scale = priv->child_count * + pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE); + break; + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown throttle %d", throttle); + } + + return scale; + +} + +static void +br_scrubber_cleanup_handler (void *arg) +{ + struct br_scrubber *fsscrub = arg; + pthread_mutex_unlock (&fsscrub->mutex); +} + +static inline br_child_t * +_br_scrubber_get_next_child (struct br_scrubber *fsscrub) +{ + br_child_t *child = NULL; + + child = list_first_entry (&fsscrub->scrublist, br_child_t, list); + list_rotate_left (&fsscrub->scrublist); + + return child; +} + +static inline void +_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry) +{ + struct br_scanfs *fsscan = &child->fsscan; + + if (list_empty (&fsscan->ready)) + return; + *fsentry = list_first_entry + (&fsscan->ready, struct br_fsscan_entry, list); + list_del_init (&(*fsentry)->list); +} + +static inline void +_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub, + struct br_fsscan_entry **fsentry) +{ + br_child_t *child = NULL; + br_child_t *firstchild = NULL; + + while (1) { + if (list_empty (&fsscrub->scrublist)) + pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + + firstchild = NULL; + for (child = _br_scrubber_get_next_child (fsscrub); + child != firstchild; + child = _br_scrubber_get_next_child (fsscrub)) { + + if (!firstchild) + firstchild = child; + + _br_scrubber_get_entry (child, fsentry); + if (*fsentry) + break; + } + + if (*fsentry) + break; + + /* nothing to work on.. wait till available */ + pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + } +} + +static void +br_scrubber_pick_entry (struct br_scrubber *fsscrub, + struct br_fsscan_entry **fsentry) +{ + pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub); + + pthread_mutex_lock (&fsscrub->mutex); + { + *fsentry = NULL; + _br_scrubber_find_scrubbable_entry (fsscrub, fsentry); + } + pthread_mutex_unlock (&fsscrub->mutex); + + pthread_cleanup_pop (0); +} - sleep (BR_SCRUB_THROTTLE_ZZZ); +struct br_scrub_entry { + gf_boolean_t scrubbed; + struct br_fsscan_entry *fsentry; +}; + +/** + * We need to be a bit careful here. These thread(s) are prone to cancellations + * when threads are scaled down (depending on the thottling value configured) + * and pausing scrub. A thread can get cancelled while it's waiting for entries + * in the ->pending queue or when an object is undergoing scrubbing. + */ +static void +br_scrubber_entry_handle (void *arg) +{ + struct br_scanfs *fsscan = NULL; + struct br_scrub_entry *sentry = NULL; + struct br_fsscan_entry *fsentry = NULL; + + sentry = arg; + + fsentry = sentry->fsentry; + fsscan = fsentry->fsscan; + + LOCK (&fsscan->entrylock); + { + if (sentry->scrubbed) { + _br_fsscan_dec_entry_count (fsscan); + + /* cleanup ->entry */ + fsentry->data = NULL; + fsentry->fsscan = NULL; + loc_wipe (&fsentry->parent); + gf_dirent_entry_free (fsentry->entry); + + GF_FREE (sentry->fsentry); + } else { + /* (re)queue the entry again for scrub */ + _br_fsscan_collect_entry (fsscan, sentry->fsentry); + } + } + UNLOCK (&fsscan->entrylock); +} + +static void +br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry) +{ + struct br_scrub_entry sentry = {0, }; + + sentry.scrubbed = 0; + sentry.fsentry = fsentry; + + pthread_cleanup_push (br_scrubber_entry_handle, &sentry); + { + (void) br_scrubber_scrub_begin (this, fsentry); + sentry.scrubbed = 1; + } + pthread_cleanup_pop (1); +} + +void *br_scrubber_proc (void *arg) +{ + xlator_t *this = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_fsscan_entry *fsentry = NULL; + + fsscrub = arg; + THIS = this = fsscrub->this; + + while (1) { + br_scrubber_pick_entry (fsscrub, &fsentry); + br_scrubber_scrub_entry (this, fsentry); + sleep (1); } return NULL; } + +static int32_t +br_scrubber_scale_up (xlator_t *this, + struct br_scrubber *fsscrub, + unsigned int v1, unsigned int v2) +{ + int i = 0; + int32_t ret = -1; + int diff = 0; + struct br_scrubbers *scrub = NULL; + + diff = (int)(v2 - v1); + + gf_log (this->name, GF_LOG_INFO, + "Scaling up scrubbers [%d => %d]", v1, v2); + + for (i = 0; i < diff; i++) { + scrub = GF_CALLOC (diff, sizeof (*scrub), + gf_br_mt_br_scrubber_t); + if (!scrub) + break; + + INIT_LIST_HEAD (&scrub->list); + ret = gf_thread_create (&scrub->scrubthread, + NULL, br_scrubber_proc, fsscrub); + if (ret) + break; + + fsscrub->nr_scrubbers++; + list_add_tail (&scrub->list, &fsscrub->scrubbers); + } + + if ((i != diff) && !scrub) + goto error_return; + + if (i != diff) /* degraded scaling.. */ + gf_log (this->name, GF_LOG_WARNING, + "Could not fully scale up to %d scrubber(s). Spawned " + "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i)); + + return 0; + + error_return: + return -1; +} + +static int32_t +br_scrubber_scale_down (xlator_t *this, + struct br_scrubber *fsscrub, + unsigned int v1, unsigned int v2) +{ + int i = 0; + int diff = 0; + int32_t ret = -1; + struct br_scrubbers *scrub = NULL; + + diff = (int)(v1 - v2); + + gf_log (this->name, GF_LOG_INFO, + "Scaling down scrubbers [%d => %d]", v1, v2); + + for (i = 0 ; i < diff; i++) { + scrub = list_first_entry + (&fsscrub->scrubbers, struct br_scrubbers, list); + + list_del_init (&scrub->list); + ret = gf_thread_cleanup_xint (scrub->scrubthread); + if (ret) + break; + GF_FREE (scrub); + + fsscrub->nr_scrubbers--; + } + + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Could not fully scale down to %d scrubber(s). " + "Terminated %d/%d [total scrubber(s): %d]", + v1, i, diff, (v2 - i)); + ret = 0; + } + + return ret; +} + +static int32_t +br_scrubber_configure (xlator_t *this, br_private_t *priv, + struct br_scrubber *fsscrub, scrub_throttle_t nthrottle) +{ + int32_t ret = 0; + unsigned int v1 = 0; + unsigned int v2 = 0; + + v1 = fsscrub->nr_scrubbers; + v2 = br_scrubber_calc_scale (this, priv, nthrottle); + + if (v1 == v2) + return 0; + + if (v1 > v2) + ret = br_scrubber_scale_down (this, fsscrub, v1, v2); + else + ret = br_scrubber_scale_up (this, fsscrub, v1, v2); + + return ret; +} + +/* TODO: token buket spec */ +static int32_t +br_scrubber_handle_throttle (xlator_t *this, + br_private_t *priv, dict_t *options) +{ + int32_t ret = 0; + char *tmp = NULL; + struct br_scrubber *fsscrub = NULL; + scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID; + + fsscrub = &priv->fsscrub; + + if (options) + GF_OPTION_RECONF ("scrub-throttle", + tmp, options, str, error_return); + else + GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return); + + if (strcasecmp (tmp, "lazy") == 0) + nthrottle = BR_SCRUB_THROTTLE_LAZY; + else if (strcasecmp (tmp, "normal") == 0) + nthrottle = BR_SCRUB_THROTTLE_NORMAL; + else if (strcasecmp (tmp, "aggressive") == 0) + nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE; + else + goto error_return; + + /* on failure old throttling value is preserved */ + ret = br_scrubber_configure (this, priv, fsscrub, nthrottle); + if (ret) + goto error_return; + + fsscrub->throttle = nthrottle; + return 0; + + error_return: + return -1; +} + +/* TODO: pause/resume, frequency */ +int32_t +br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options) +{ + int32_t ret = 0; + + ret = br_scrubber_handle_throttle (this, priv, options); + if (ret) + goto error_return; + + return 0; + + error_return: + return -1; +} + +int32_t +br_scrubber_init (xlator_t *this, br_private_t *priv) +{ + struct br_scrubber *fsscrub = NULL; + + priv->tbf = br_tbf_init (NULL, 0); + if (!priv->tbf) + return -1; + + fsscrub = &priv->fsscrub; + + fsscrub->this = this; + fsscrub->throttle = BR_SCRUB_THROTTLE_VOID; + + pthread_mutex_init (&fsscrub->mutex, NULL); + pthread_cond_init (&fsscrub->cond, NULL); + + fsscrub->nr_scrubbers = 0; + INIT_LIST_HEAD (&fsscrub->scrubbers); + INIT_LIST_HEAD (&fsscrub->scrublist); + + return 0; +} |