diff options
Diffstat (limited to 'xlators/features')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 247 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 13 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-common.h | 6 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.c | 27 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.h | 1 |
5 files changed, 212 insertions, 82 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 1712aa1529b..d2b179027a2 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -24,23 +24,26 @@ #include "bit-rot-scrub.h" #include <pthread.h> +/** + * fetch signature extended attribute from an object's fd. + * NOTE: On success @xattr is not unref'd as @sign points + * to the dictionary value. + */ static inline int32_t -bitd_fetch_signature (xlator_t *this, - br_child_t *child, fd_t *fd, br_isignature_out_t *sign) +bitd_fetch_signature (xlator_t *this, br_child_t *child, + fd_t *fd, dict_t **xattr, br_isignature_out_t **sign) { - int32_t ret = -1; - dict_t *xattr = NULL; - br_isignature_out_t *sigptr = NULL; + int32_t ret = -1; - ret = syncop_fgetxattr (child->xl, fd, &xattr, + ret = syncop_fgetxattr (child->xl, fd, xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, NULL); if (ret < 0) { - br_log_object (this, "getxattr", fd->inode->gfid, -ret); + br_log_object (this, "fgetxattr", fd->inode->gfid, -ret); goto out; } - ret = dict_get_ptr (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, - (void **)&sigptr); + ret = dict_get_ptr + (*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **) sign); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to extract signature info [GFID: %s]", @@ -48,67 +51,141 @@ bitd_fetch_signature (xlator_t *this, goto unref_dict; } - ret = 0; - (void) memcpy (sign, sigptr, sizeof (br_isignature_out_t)); + return 0; unref_dict: - dict_unref (xattr); + dict_unref (*xattr); out: - return ret; + return -1; } -static inline int32_t +/** + * POST COMPUTE CHECK + * + * Checks to be performed before verifying calculated signature + * Object is skipped if: + * - has stale signature + * - mismatches versions caches in pre-compute check + */ + +int32_t bitd_scrub_post_compute_check (xlator_t *this, br_child_t *child, - br_isignature_out_t *sign, fd_t *fd) + fd_t *fd, unsigned long version, + br_isignature_out_t **signature) { - int32_t ret = 0; + int32_t ret = 0; + size_t signlen = 0; + dict_t *xattr = NULL; + br_isignature_out_t *signptr = NULL; - ret = bitd_fetch_signature (this, child, fd, sign); - if (ret) + ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); + if (ret < 0) goto out; - if (sign->stale) + + /** + * Either the object got dirtied during the time the signature was + * calculated OR the version we saved during pre-compute check does + * not match now, implying that the object got dirtied and signed in + * between scrubs pre & post compute checks (checksum window). + * + * The log entry looks pretty ugly, but helps in debugging.. + */ + if (signptr->stale || (signptr->version != version)) { + gf_log (this->name, GF_LOG_DEBUG, + "<STAGE: POST> Object [GFID: %s] either has a stale " + "signature OR underwent signing during checksumming " + "{Stale: %d | Version: %lu,%lu}", + uuid_utoa (fd->inode->gfid), (signptr->stale) ? 1 : 0, + version, signptr->version); ret = -1; + goto unref_dict; + } + + signlen = signptr->signaturelen; + *signature = GF_CALLOC (1, sizeof (br_isignature_out_t) + signlen, + gf_common_mt_char); + (void) memcpy (*signature, signptr, + sizeof (br_isignature_out_t) + signlen); + + unref_dict: + dict_unref (xattr); out: return ret; } static inline int32_t -bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, fd_t *fd) +bitd_signature_staleness (xlator_t *this, + br_child_t *child, fd_t *fd, + int *stale, unsigned long *version) { int32_t ret = -1; - br_isignature_out_t sign = {0,}; + dict_t *xattr = NULL; + br_isignature_out_t *signptr = NULL; - /* if the object is already marked bad, don't bother checking */ - if (bitd_is_bad_file (this, child, NULL, fd)) + ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); + if (ret < 0) goto out; - /* else, check for signature staleness */ - ret = bitd_fetch_signature (this, child, fd, &sign); - if (ret) - goto out; - if (sign.stale) { - ret = -1; + /** + * save verison for validation in post compute stage + * c.f. bitd_scrub_post_compute_check() + */ + *stale = signptr->stale ? 1 : 0; + *version = signptr->version; + + dict_unref (xattr); + + out: + return ret; +} + +/** + * PRE COMPUTE CHECK + * + * Checks to be performed before initiating object signature calculation. + * An object is skipped if: + * - it's already marked corrupted + * - has stale signature + */ +int32_t +bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, + fd_t *fd, unsigned long *version) +{ + int stale = 0; + int32_t ret = -1; + + if (bitd_is_bad_file (this, child, NULL, fd)) { + gf_log (this->name, GF_LOG_WARNING, + "Object [GFID: %s] is marked corrupted, skipping..", + uuid_utoa (fd->inode->gfid)); goto out; } - ret = 0; + ret = bitd_signature_staleness (this, child, fd, &stale, version); + if (!ret && stale) { + gf_log (this->name, GF_LOG_DEBUG, + "<STAGE: PRE> Object [GFID: %s] has stale signature", + uuid_utoa (fd->inode->gfid)); + ret = -1; + } out: return ret; } -static inline int +/* static inline int */ +int bitd_compare_ckum (xlator_t *this, br_isignature_out_t *sign, unsigned char *md, inode_t *linked_inode, gf_dirent_t *entry, fd_t *fd, br_child_t *child) { int ret = -1; - dict_t xattr = {0,}; + dict_t *xattr = NULL; GF_VALIDATE_OR_GOTO ("bit-rot", this, out); GF_VALIDATE_OR_GOTO (this->name, sign, out); @@ -118,52 +195,70 @@ bitd_compare_ckum (xlator_t *this, GF_VALIDATE_OR_GOTO (this->name, md, out); GF_VALIDATE_OR_GOTO (this->name, entry, out); - if (strncmp (sign->signature, (char *)md, strlen (sign->signature))) { - gf_log (this->name, GF_LOG_WARNING, "checksums does not match " - "for the entry %s (gfid: %s)", entry->d_name, - uuid_utoa (linked_inode->gfid)); - ret = dict_set_int32 (&xattr, "trusted.glusterfs.bad-file", - _gf_true); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "dict-set for " - "bad-file (entry: %s, gfid: %s) failed", - entry->d_name, uuid_utoa (linked_inode->gfid)); - goto out; - } - - ret = syncop_fsetxattr (child->xl, fd, &xattr, 0); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "setxattr to mark " - "the file %s (gfid: %s) as bad failed", - entry->d_name, uuid_utoa (linked_inode->gfid)); - goto out; - } + if (strncmp + (sign->signature, (char *) md, strlen (sign->signature)) == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Entry %s [GFID: %s] matches calculated checksum", + entry->d_name, uuid_utoa (linked_inode->gfid)); + return 0; } -out: + gf_log (this->name, GF_LOG_WARNING, + "Object checksumsum mismatch: %s [GFID: %s]", + entry->d_name, uuid_utoa (linked_inode->gfid)); + + /* Perform bad-file marking */ + xattr = dict_new (); + if (!xattr) { + ret = -1; + goto out; + } + + ret = dict_set_int32 (xattr, "trusted.glusterfs.bad-file", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting bad-file marker for %s [GFID: %s]", + entry->d_name, uuid_utoa (linked_inode->gfid)); + goto dictfree; + } + + gf_log (this->name, GF_LOG_INFO, "Marking %s [GFID: %s] as corrupted..", + entry->d_name, uuid_utoa (linked_inode->gfid)); + ret = syncop_fsetxattr (child->xl, fd, xattr, 0); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error marking object %s [GFID: %s] as corrupted", + entry->d_name, uuid_utoa (linked_inode->gfid)); + + dictfree: + dict_unref (xattr); + out: return ret; } /** - * This is the scrubber. As of now there's a mix of fd and inode - * operations. Better to move them to fd based to be clean and - * avoid code cluttering. + * "The Scrubber" + * + * Perform signature validation for a given object with the assumption + * that the signature is SHA256 (because signer as of now _always_ + * signs with SHA256). */ int bitd_start_scrub (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, void *data) { - int32_t ret = -1; - fd_t *fd = NULL; - loc_t loc = {0, }; - struct iatt iatt = {0, }; - struct iatt parent_buf = {0, }; - pid_t pid = 0; - br_child_t *child = NULL; - xlator_t *this = NULL; - unsigned char *md = NULL; - inode_t *linked_inode = NULL; - br_isignature_out_t sign = {0,}; + int32_t ret = -1; + fd_t *fd = NULL; + loc_t loc = {0, }; + struct iatt iatt = {0, }; + struct iatt parent_buf = {0, }; + pid_t pid = 0; + br_child_t *child = NULL; + xlator_t *this = NULL; + unsigned char *md = NULL; + inode_t *linked_inode = NULL; + br_isignature_out_t *sign = NULL; + unsigned long signedversion = 0; GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out); GF_VALIDATE_OR_GOTO ("bit-rot", data, out); @@ -189,6 +284,9 @@ bitd_start_scrub (xlator_t *subvol, if (linked_inode) inode_lookup (linked_inode); + gf_log (this->name, GF_LOG_DEBUG, "Scrubbing object %s [GFID: %s]", + entry->d_name, uuid_utoa (linked_inode->gfid)); + if (iatt.ia_type != IA_IFREG) { gf_log (this->name, GF_LOG_DEBUG, "%s is not a regular " "file", entry->d_name); @@ -221,7 +319,7 @@ bitd_start_scrub (xlator_t *subvol, * - presence of bad object * - signature staleness */ - ret = bitd_scrub_pre_compute_check (this, child, fd); + ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion); if (ret) goto unrefd; /* skip this object */ @@ -243,13 +341,16 @@ bitd_start_scrub (xlator_t *subvol, * perform post compute checks as an object's signature may have * become stale while scrubber calculated checksum. */ - ret = bitd_scrub_post_compute_check (this, child, &sign, fd); + ret = bitd_scrub_post_compute_check (this, child, + fd, signedversion, &sign); if (ret) goto free_md; - ret = bitd_compare_ckum (this, &sign, md, + ret = bitd_compare_ckum (this, sign, md, linked_inode, entry, fd, child); + GF_FREE (sign); /* alloced on post-compute */ + /** fd_unref() takes care of closing fd.. like syncop_close() */ free_md: @@ -263,8 +364,8 @@ bitd_start_scrub (xlator_t *subvol, return ret; } -#define BR_SCRUB_THROTTLE_COUNT 10 -#define BR_SCRUB_THROTTLE_ZZZ 100 +#define BR_SCRUB_THROTTLE_COUNT 30 +#define BR_SCRUB_THROTTLE_ZZZ 60 void * br_scrubber (void *arg) { @@ -284,7 +385,7 @@ br_scrubber (void *arg) GF_CLIENT_PID_SCRUB, child, bitd_start_scrub, BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ); - sleep (BR_SCRUB_THROTTLE_ZZZ * BR_SCRUB_THROTTLE_COUNT); + sleep (BR_SCRUB_THROTTLE_ZZZ); } return NULL; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index d808c0dc69c..6a4de70bfbb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -139,8 +139,14 @@ br_prepare_signature (const unsigned char *sign, if (!signature) return NULL; + /* object version */ signature->signedversion = object->signedversion; + + /* signature length & type */ + signature->signaturelen = hashlen; signature->signaturetype = hashtype; + + /* signature itself */ memcpy (signature->signature, (char *)sign, hashlen); signature->signature[hashlen+1] = '\0'; @@ -167,7 +173,7 @@ bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd) "trusted.glusterfs.bad-file", NULL); if (!ret) { - gf_log (this->name, GF_LOG_ERROR, "[GFID: %s] is marked " + gf_log (this->name, GF_LOG_DEBUG, "[GFID: %s] is marked " "corrupted", uuid_utoa (inode->gfid)); bad_file = _gf_true; } @@ -918,8 +924,11 @@ bitd_oneshot_crawl (xlator_t *subvol, * if there are any fds present for that inode) and handle properly. */ - if (bitd_is_bad_file (this, child, &loc, NULL)) + if (bitd_is_bad_file (this, child, &loc, NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "Entry [%s] is marked corrupted.. skipping.", loc.path); goto unref_inode; + } ret = syncop_getxattr (child->xl, &loc, &xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, NULL); diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h index fdf23142768..699323170d3 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-common.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h @@ -76,6 +76,7 @@ typedef struct br_isignature_in { unsigned long signedversion; /* version against which the object was signed */ + size_t signaturelen; /* signature length */ char signature[0]; /* object signature */ } br_isignature_t; @@ -86,11 +87,14 @@ typedef struct br_isignature_in { typedef struct br_isignature_out { char stale; /* stale signature? */ + unsigned long version; /* current signed version */ + uint32_t time[2]; /* time when the object got dirtied */ int8_t signaturetype; /* hash type */ - char signature[0]; /* signature (hash) */ + size_t signaturelen; /* signature length */ + char signature[0]; /* signature (hash) */ } br_isignature_out_t; typedef struct br_stub_init { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index f8c8f59c1f8..0db500659b5 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -566,7 +566,7 @@ br_stub_prepare_signature (xlator_t *this, dict_t *dict, if (!br_is_signature_type_valid (sign->signaturetype)) goto error_return; - signaturelen = strlen (sign->signature); + signaturelen = sign->signaturelen; ret = br_stub_alloc_versions (NULL, &sbuf, signaturelen); if (ret) goto error_return; @@ -657,8 +657,8 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { int32_t ret = 0; - ssize_t totallen = 0; - ssize_t signaturelen = 0; + size_t totallen = 0; + size_t signaturelen = 0; br_version_t *obuf = NULL; br_signature_t *sbuf = NULL; br_isignature_out_t *sign = NULL; @@ -681,8 +681,21 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, || (status == BR_VXATTR_STATUS_UNSIGNED)) goto delkeys; - signaturelen = strlen (sbuf->signature); - totallen = signaturelen + sizeof (br_isignature_out_t); + /** + * okay.. we have enough information to satisfy the request, + * namely: version and signing extended attribute. what's + * pending is the signature length -- that's figured out + * indirectly via the size of the _whole_ xattr and the + * on-disk signing xattr header size. + */ + op_errno = EINVAL; + ret = dict_get_uint32 (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, + (uint32_t *)&signaturelen); + if (ret) + goto delkeys; + + signaturelen -= sizeof (br_signature_t); + totallen = sizeof (br_isignature_out_t) + signaturelen; op_errno = ENOMEM; sign = GF_CALLOC (1, totallen, gf_br_stub_mt_signature_t); @@ -692,10 +705,12 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, sign->time[0] = obuf->timebuf[0]; sign->time[1] = obuf->timebuf[1]; - /* Object's dirty state */ + /* Object's dirty state & current signed version */ + sign->version = sbuf->signedversion; sign->stale = (obuf->ongoingversion != sbuf->signedversion) ? 1 : 0; /* Object's signature */ + sign->signaturelen = signaturelen; sign->signaturetype = sbuf->signaturetype; (void) memcpy (sign->signature, sbuf->signature, signaturelen); diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index d565112b1ad..5db0e321c20 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -263,6 +263,7 @@ br_stub_remove_vxattrs (dict_t *xattr) if (xattr) { dict_del (xattr, BITROT_CURRENT_VERSION_KEY); dict_del (xattr, BITROT_SIGNING_VERSION_KEY); + dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY); } } |