diff options
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot.c')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 242 |
1 files changed, 184 insertions, 58 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 6234dd83864..c1a508a57a6 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -22,6 +22,7 @@ #include "compat-errno.h" #include "bit-rot.h" +#include "bit-rot-scrub.h" #include <pthread.h> static int @@ -146,6 +147,38 @@ br_prepare_signature (const unsigned char *sign, return signature; } +gf_boolean_t +bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd) +{ + int32_t ret = -1; + dict_t *xattr = NULL; + inode_t *inode = NULL; + gf_boolean_t bad_file = _gf_false; + + GF_VALIDATE_OR_GOTO ("bit-rot", this, out); + + inode = (loc) ? loc->inode : fd->inode; + + if (fd) + ret = syncop_fgetxattr (child->xl, fd, &xattr, + "trusted.glusterfs.bad-file", NULL); + else if (loc) + ret = syncop_getxattr (child->xl, loc, &xattr, + "trusted.glusterfs.bad-file", NULL); + + if (!ret) { + gf_log (this->name, GF_LOG_ERROR, "[GFID: %s] is marked " + "corrupted", uuid_utoa (inode->gfid)); + bad_file = _gf_true; + } + + if (xattr) + dict_unref (xattr); + +out: + return bad_file; +} + /** * Do a lookup on the gfid present within the object. */ @@ -222,6 +255,7 @@ br_object_open (xlator_t *this, ret = syncop_open (object->child->xl, &loc, O_RDONLY, fd); if (ret) { + br_log_object (this, "open", inode->gfid, -ret); fd_unref (fd); fd = NULL; } else { @@ -284,8 +318,8 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, } int32_t -br_object_checksum (unsigned char *md, - br_object_t *object, fd_t *fd, struct iatt *iatt) +br_calculate_obj_checksum (unsigned char *md, + br_child_t *child, fd_t *fd, struct iatt *iatt) { int32_t ret = -1; off_t offset = 0; @@ -294,16 +328,16 @@ br_object_checksum (unsigned char *md, SHA256_CTX sha256; - GF_VALIDATE_OR_GOTO ("bit-rot", object, out); + GF_VALIDATE_OR_GOTO ("bit-rot", child, out); GF_VALIDATE_OR_GOTO ("bit-rot", iatt, out); GF_VALIDATE_OR_GOTO ("bit-rot", fd, out); - this = object->this; + this = child->this; SHA256_Init (&sha256); while (1) { - ret = br_object_read_block_and_sign (this, fd, object->child, + ret = br_object_read_block_and_sign (this, fd, child, offset, block, &sha256); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "reading block with " @@ -326,6 +360,13 @@ br_object_checksum (unsigned char *md, } static inline int32_t +br_object_checksum (unsigned char *md, + br_object_t *object, fd_t *fd, struct iatt *iatt) +{ + return br_calculate_obj_checksum (md, object->child, fd, iatt); +} + +static inline int32_t br_object_read_sign (inode_t *linked_inode, fd_t *fd, br_object_t *object, struct iatt *iatt) { @@ -396,7 +437,8 @@ br_object_read_sign (inode_t *linked_inode, fd_t *fd, br_object_t *object, static inline int br_object_sign_softerror (int32_t op_errno) { - return ((op_errno == ENOENT) || (op_errno = ESTALE)); + return ((op_errno == ENOENT) || (op_errno = ESTALE) + || (op_errno == ENODATA)); } void @@ -459,8 +501,6 @@ static inline int32_t br_sign_object (br_object_t *object) * we have an open file descriptor on the object. from here on, * do not be generous to file operation errors. */ - - /* change this to DEBUG log level later */ gf_log (this->name, GF_LOG_DEBUG, "Signing object [%s]", uuid_utoa (linked_inode->gfid)); @@ -878,6 +918,9 @@ bitd_oneshot_crawl (xlator_t *subvol, * if there are any fds present for that inode) and handle properly. */ + if (bitd_is_bad_file (this, child, &loc, NULL)) + goto unref_inode; + ret = syncop_getxattr (child->xl, &loc, &xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, NULL); if (ret < 0) { @@ -993,11 +1036,26 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) return -1; } +static inline int32_t +br_enact_scrubber (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + + ret = gf_thread_create (&child->thread, NULL, br_scrubber, child); + if (ret != 0) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber"); + } + + return ret; +} + /** * This routine fetches various attributes associated with a child which * is basically a subvolume. Attributes include brick path and the stub * birth time. This is done by performing a lookup on the root followed - * by getxattr() on a virtual key. + * by getxattr() on a virtual key. Depending on the configuration, the + * process either acts as a signer or a scrubber. */ static inline int32_t br_brick_connect (xlator_t *this, br_child_t *child) @@ -1008,12 +1066,15 @@ br_brick_connect (xlator_t *this, br_child_t *child) struct iatt parent = {0, }; br_stub_init_t *stub = NULL; dict_t *xattr = NULL; + br_private_t *priv = NULL; int op_errno = 0; GF_VALIDATE_OR_GOTO ("bit-rot", this, out); GF_VALIDATE_OR_GOTO (this->name, child, out); GF_VALIDATE_OR_GOTO (this->name, this->private, out); + priv = this->private; + loc.inode = inode_ref (child->table->root); uuid_copy (loc.gfid, loc.inode->gfid); loc.path = gf_strdup ("/"); @@ -1049,7 +1110,10 @@ br_brick_connect (xlator_t *this, br_child_t *child) child->tv.tv_sec = ntohl (stub->timebuf[0]); child->tv.tv_usec = ntohl (stub->timebuf[0]); - ret = br_enact_signer (this, child, stub); + if (priv->iamscrubber) + ret = br_enact_scrubber (this, child); + else + ret = br_enact_signer (this, child, stub); free_dict: dict_unref (xattr); @@ -1208,6 +1272,78 @@ out: return 0; } +/** + * Initialize signer specific structures, spawn worker threads. + */ + +static inline void +br_fini_signer (xlator_t *this, br_private_t *priv) +{ + int i = 0; + + for (; i < BR_WORKERS; i++) { + (void) gf_thread_cleanup_xint (priv->obj_queue->workers[i]); + } + + pthread_cond_destroy (&priv->object_cond); + gf_tw_cleanup_timers (priv->timer_wheel); +} + +static inline int32_t +br_init_signer (xlator_t *this, br_private_t *priv) +{ + int i = 0; + int32_t ret = -1; + + /* initialize gfchangelog xlator context */ + ret = gf_changelog_init (this); + if (ret) + goto out; + + priv->timer_wheel = gf_tw_init_timers (); + if (!priv->timer_wheel) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize the timer wheel"); + goto out; + } + + pthread_cond_init (&priv->object_cond, NULL); + + priv->obj_queue = GF_CALLOC (1, sizeof (*priv->obj_queue), + gf_br_mt_br_ob_n_wk_t); + if (!priv->obj_queue) + goto cleanup_timer; + INIT_LIST_HEAD (&priv->obj_queue->objects); + + for (i = 0; i < BR_WORKERS; i++) { + ret = gf_thread_create (&priv->obj_queue->workers[i], NULL, + br_process_object, this); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "thread creation failed (%s)", strerror (-ret)); + ret = -1; + goto cleanup_threads; + } + } + + return 0; + + cleanup_threads: + for (i--; i >= 0; i--) { + (void) gf_thread_cleanup_xint (priv->obj_queue->workers[i]); + } + + GF_FREE (priv->obj_queue); + + cleanup_timer: + /* that's explicit */ + pthread_cond_destroy (&priv->object_cond); + gf_tw_cleanup_timers (priv->timer_wheel); + + out: + return -1; +} + int32_t init (xlator_t *this) { @@ -1228,18 +1364,14 @@ init (xlator_t *this) goto out; } - /* initialize gfchangelog xlator context */ - ret = gf_changelog_init (this); - if (ret) - goto out; - + GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out); priv->child_count = xlator_subvolume_count (this); priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children), gf_br_mt_br_child_t); if (!priv->children) - goto out; + goto free_priv; trav = this->children; while (trav) { @@ -1252,7 +1384,7 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_ERROR, "failed to allocate mem-pool for timer"); errno = ENOMEM; - goto out; + goto free_children; } i++; @@ -1268,55 +1400,41 @@ init (xlator_t *this) this->private = priv; - ret = gf_thread_create (&priv->thread, NULL, br_handle_events, - this); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "thread creation failed (%s)", strerror (errno)); - goto out; - } - - priv->timer_wheel = gf_tw_init_timers (); - if (!priv->timer_wheel) { - gf_log (this->name, GF_LOG_ERROR, "failed to initialize the " - "timer wheel"); - goto out; + if (!priv->iamscrubber) { + ret = br_init_signer (this, priv); + if (ret) + goto cleanup_mutex; } - pthread_cond_init (&priv->object_cond, NULL); - priv->obj_queue = GF_CALLOC (1, sizeof (*priv->obj_queue), - gf_br_mt_br_ob_n_wk_t); - if (!priv->obj_queue) { - gf_log (this->name, GF_LOG_ERROR, "memory allocation failed"); - goto out; + ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "thread creation failed (%s)", strerror (-ret)); + ret = -1; } - INIT_LIST_HEAD (&priv->obj_queue->objects); - - for (i = 0; i < BR_WORKERS; i++) { - gf_thread_create (&priv->obj_queue->workers[i], NULL, - br_process_object, this); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "thread creation failed (%s)", - strerror (errno)); - goto out; - } + if (!ret) { + gf_log (this->name, GF_LOG_INFO, + "bit-rot xlator loaded in \"%s\" mode", + (priv->iamscrubber) ? "SCRUBBER" : "SIGNER"); + return 0; } - ret = 0; - -out: - if (ret) { - if (priv->children) - GF_FREE (priv->children); - if (priv->timer_wheel) - gf_tw_cleanup_timers (priv->timer_wheel); - GF_FREE (priv); + cleanup_mutex: + (void) pthread_cond_destroy (&priv->cond); + (void) pthread_mutex_destroy (&priv->lock); + free_children: + for (i = 0; i < priv->child_count; i++) { + if (priv->children[i].timer_pool) + mem_pool_destroy (priv->children[i].timer_pool); } - gf_log (this->name, GF_LOG_DEBUG, "bit-rot xlator loaded"); - return ret; + GF_FREE (priv->children); + free_priv: + GF_FREE (priv); + out: + this->private = NULL; + return -1; } void @@ -1327,9 +1445,12 @@ fini (xlator_t *this) if (!priv) return; + if (!priv->iamscrubber) + br_fini_signer (this, priv); br_free_children (this); if (priv->timer_wheel) gf_tw_cleanup_timers (priv->timer_wheel); + this->private = NULL; GF_FREE (priv); @@ -1347,5 +1468,10 @@ struct volume_options options[] = { .description = "default time duration for which an object waits " "before it is signed", }, + { .key = {"scrubber"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "option to run as a scrubber", + }, { .key = {NULL} }, }; |