summaryrefslogtreecommitdiffstats
path: root/xlators/features
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-03-31 15:48:18 +0530
committerVijay Bellur <vbellur@redhat.com>2015-04-08 07:01:32 +0000
commitf0cd1d73c63001740cd7691a77df7631c9b8e8dc (patch)
tree336a8ee694dc71a2fde6927d8a40682bef6368ef /xlators/features
parent7fb55dbdabf73f9169b0f3021a42fa120d64b373 (diff)
bitrot/scrub: Scrubber fixes
This patch fixes a handful of problem with scrubber which are detailed below. Scrubber used to skip objects for verification due to missing fd iterface to fetch versioning extended attributes. Similar to the inode interface, an fd based interface in POSIX is now introduced. Moreover, this patch also fixes potential false reporting by scrubber due to: An object gets dirtied and signed when scrubber is busy calculatingobject checksum. This is fixed by caching the signed version when an object is first inspected for stalenes, i.e., during pre-compute stage. This version is used to verify checksum in the post-compute stage when the signatures are compared for possible corruption. Side effect of _not_ sending signature length during signing resulted in "truncated" signature to be set for an object. Now, at the time of signing, the signature length is sent and is used in place of invoking strlen() to get signature length (which could have possible 00s). The signature length itself is not persisted in the signature xattr, but is calculated on-the-fly by substracting the xattr length by the "structure" header size. Some of the log entries are made more meaningful (as and aid for debugging). Change-Id: I938bee5aea6688d5d99eb2640053613af86d6269 BUG: 1207624 Signed-off-by: Venky Shankar <vshankar@redhat.com> Reviewed-on: http://review.gluster.org/10118 Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/features')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c247
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c13
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h6
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c27
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h1
5 files changed, 212 insertions, 82 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index 1712aa1529b..d2b179027a2 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -24,23 +24,26 @@
#include "bit-rot-scrub.h"
#include <pthread.h>
+/**
+ * fetch signature extended attribute from an object's fd.
+ * NOTE: On success @xattr is not unref'd as @sign points
+ * to the dictionary value.
+ */
static inline int32_t
-bitd_fetch_signature (xlator_t *this,
- br_child_t *child, fd_t *fd, br_isignature_out_t *sign)
+bitd_fetch_signature (xlator_t *this, br_child_t *child,
+ fd_t *fd, dict_t **xattr, br_isignature_out_t **sign)
{
- int32_t ret = -1;
- dict_t *xattr = NULL;
- br_isignature_out_t *sigptr = NULL;
+ int32_t ret = -1;
- ret = syncop_fgetxattr (child->xl, fd, &xattr,
+ ret = syncop_fgetxattr (child->xl, fd, xattr,
GLUSTERFS_GET_OBJECT_SIGNATURE, NULL);
if (ret < 0) {
- br_log_object (this, "getxattr", fd->inode->gfid, -ret);
+ br_log_object (this, "fgetxattr", fd->inode->gfid, -ret);
goto out;
}
- ret = dict_get_ptr (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE,
- (void **)&sigptr);
+ ret = dict_get_ptr
+ (*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **) sign);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to extract signature info [GFID: %s]",
@@ -48,67 +51,141 @@ bitd_fetch_signature (xlator_t *this,
goto unref_dict;
}
- ret = 0;
- (void) memcpy (sign, sigptr, sizeof (br_isignature_out_t));
+ return 0;
unref_dict:
- dict_unref (xattr);
+ dict_unref (*xattr);
out:
- return ret;
+ return -1;
}
-static inline int32_t
+/**
+ * POST COMPUTE CHECK
+ *
+ * Checks to be performed before verifying calculated signature
+ * Object is skipped if:
+ * - has stale signature
+ * - mismatches versions caches in pre-compute check
+ */
+
+int32_t
bitd_scrub_post_compute_check (xlator_t *this,
br_child_t *child,
- br_isignature_out_t *sign, fd_t *fd)
+ fd_t *fd, unsigned long version,
+ br_isignature_out_t **signature)
{
- int32_t ret = 0;
+ int32_t ret = 0;
+ size_t signlen = 0;
+ dict_t *xattr = NULL;
+ br_isignature_out_t *signptr = NULL;
- ret = bitd_fetch_signature (this, child, fd, sign);
- if (ret)
+ ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr);
+ if (ret < 0)
goto out;
- if (sign->stale)
+
+ /**
+ * Either the object got dirtied during the time the signature was
+ * calculated OR the version we saved during pre-compute check does
+ * not match now, implying that the object got dirtied and signed in
+ * between scrubs pre & post compute checks (checksum window).
+ *
+ * The log entry looks pretty ugly, but helps in debugging..
+ */
+ if (signptr->stale || (signptr->version != version)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "<STAGE: POST> Object [GFID: %s] either has a stale "
+ "signature OR underwent signing during checksumming "
+ "{Stale: %d | Version: %lu,%lu}",
+ uuid_utoa (fd->inode->gfid), (signptr->stale) ? 1 : 0,
+ version, signptr->version);
ret = -1;
+ goto unref_dict;
+ }
+
+ signlen = signptr->signaturelen;
+ *signature = GF_CALLOC (1, sizeof (br_isignature_out_t) + signlen,
+ gf_common_mt_char);
+ (void) memcpy (*signature, signptr,
+ sizeof (br_isignature_out_t) + signlen);
+
+ unref_dict:
+ dict_unref (xattr);
out:
return ret;
}
static inline int32_t
-bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, fd_t *fd)
+bitd_signature_staleness (xlator_t *this,
+ br_child_t *child, fd_t *fd,
+ int *stale, unsigned long *version)
{
int32_t ret = -1;
- br_isignature_out_t sign = {0,};
+ dict_t *xattr = NULL;
+ br_isignature_out_t *signptr = NULL;
- /* if the object is already marked bad, don't bother checking */
- if (bitd_is_bad_file (this, child, NULL, fd))
+ ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr);
+ if (ret < 0)
goto out;
- /* else, check for signature staleness */
- ret = bitd_fetch_signature (this, child, fd, &sign);
- if (ret)
- goto out;
- if (sign.stale) {
- ret = -1;
+ /**
+ * save verison for validation in post compute stage
+ * c.f. bitd_scrub_post_compute_check()
+ */
+ *stale = signptr->stale ? 1 : 0;
+ *version = signptr->version;
+
+ dict_unref (xattr);
+
+ out:
+ return ret;
+}
+
+/**
+ * PRE COMPUTE CHECK
+ *
+ * Checks to be performed before initiating object signature calculation.
+ * An object is skipped if:
+ * - it's already marked corrupted
+ * - has stale signature
+ */
+int32_t
+bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child,
+ fd_t *fd, unsigned long *version)
+{
+ int stale = 0;
+ int32_t ret = -1;
+
+ if (bitd_is_bad_file (this, child, NULL, fd)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Object [GFID: %s] is marked corrupted, skipping..",
+ uuid_utoa (fd->inode->gfid));
goto out;
}
- ret = 0;
+ ret = bitd_signature_staleness (this, child, fd, &stale, version);
+ if (!ret && stale) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "<STAGE: PRE> Object [GFID: %s] has stale signature",
+ uuid_utoa (fd->inode->gfid));
+ ret = -1;
+ }
out:
return ret;
}
-static inline int
+/* static inline int */
+int
bitd_compare_ckum (xlator_t *this,
br_isignature_out_t *sign,
unsigned char *md, inode_t *linked_inode,
gf_dirent_t *entry, fd_t *fd, br_child_t *child)
{
int ret = -1;
- dict_t xattr = {0,};
+ dict_t *xattr = NULL;
GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
GF_VALIDATE_OR_GOTO (this->name, sign, out);
@@ -118,52 +195,70 @@ bitd_compare_ckum (xlator_t *this,
GF_VALIDATE_OR_GOTO (this->name, md, out);
GF_VALIDATE_OR_GOTO (this->name, entry, out);
- if (strncmp (sign->signature, (char *)md, strlen (sign->signature))) {
- gf_log (this->name, GF_LOG_WARNING, "checksums does not match "
- "for the entry %s (gfid: %s)", entry->d_name,
- uuid_utoa (linked_inode->gfid));
- ret = dict_set_int32 (&xattr, "trusted.glusterfs.bad-file",
- _gf_true);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "dict-set for "
- "bad-file (entry: %s, gfid: %s) failed",
- entry->d_name, uuid_utoa (linked_inode->gfid));
- goto out;
- }
-
- ret = syncop_fsetxattr (child->xl, fd, &xattr, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "setxattr to mark "
- "the file %s (gfid: %s) as bad failed",
- entry->d_name, uuid_utoa (linked_inode->gfid));
- goto out;
- }
+ if (strncmp
+ (sign->signature, (char *) md, strlen (sign->signature)) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Entry %s [GFID: %s] matches calculated checksum",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+ return 0;
}
-out:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Object checksumsum mismatch: %s [GFID: %s]",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+
+ /* Perform bad-file marking */
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr, "trusted.glusterfs.bad-file", _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting bad-file marker for %s [GFID: %s]",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+ goto dictfree;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Marking %s [GFID: %s] as corrupted..",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+ ret = syncop_fsetxattr (child->xl, fd, xattr, 0);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error marking object %s [GFID: %s] as corrupted",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+
+ dictfree:
+ dict_unref (xattr);
+ out:
return ret;
}
/**
- * This is the scrubber. As of now there's a mix of fd and inode
- * operations. Better to move them to fd based to be clean and
- * avoid code cluttering.
+ * "The Scrubber"
+ *
+ * Perform signature validation for a given object with the assumption
+ * that the signature is SHA256 (because signer as of now _always_
+ * signs with SHA256).
*/
int
bitd_start_scrub (xlator_t *subvol,
gf_dirent_t *entry, loc_t *parent, void *data)
{
- int32_t ret = -1;
- fd_t *fd = NULL;
- loc_t loc = {0, };
- struct iatt iatt = {0, };
- struct iatt parent_buf = {0, };
- pid_t pid = 0;
- br_child_t *child = NULL;
- xlator_t *this = NULL;
- unsigned char *md = NULL;
- inode_t *linked_inode = NULL;
- br_isignature_out_t sign = {0,};
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ struct iatt parent_buf = {0, };
+ pid_t pid = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ unsigned char *md = NULL;
+ inode_t *linked_inode = NULL;
+ br_isignature_out_t *sign = NULL;
+ unsigned long signedversion = 0;
GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
@@ -189,6 +284,9 @@ bitd_start_scrub (xlator_t *subvol,
if (linked_inode)
inode_lookup (linked_inode);
+ gf_log (this->name, GF_LOG_DEBUG, "Scrubbing object %s [GFID: %s]",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+
if (iatt.ia_type != IA_IFREG) {
gf_log (this->name, GF_LOG_DEBUG, "%s is not a regular "
"file", entry->d_name);
@@ -221,7 +319,7 @@ bitd_start_scrub (xlator_t *subvol,
* - presence of bad object
* - signature staleness
*/
- ret = bitd_scrub_pre_compute_check (this, child, fd);
+ ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion);
if (ret)
goto unrefd; /* skip this object */
@@ -243,13 +341,16 @@ bitd_start_scrub (xlator_t *subvol,
* perform post compute checks as an object's signature may have
* become stale while scrubber calculated checksum.
*/
- ret = bitd_scrub_post_compute_check (this, child, &sign, fd);
+ ret = bitd_scrub_post_compute_check (this, child,
+ fd, signedversion, &sign);
if (ret)
goto free_md;
- ret = bitd_compare_ckum (this, &sign, md,
+ ret = bitd_compare_ckum (this, sign, md,
linked_inode, entry, fd, child);
+ GF_FREE (sign); /* alloced on post-compute */
+
/** fd_unref() takes care of closing fd.. like syncop_close() */
free_md:
@@ -263,8 +364,8 @@ bitd_start_scrub (xlator_t *subvol,
return ret;
}
-#define BR_SCRUB_THROTTLE_COUNT 10
-#define BR_SCRUB_THROTTLE_ZZZ 100
+#define BR_SCRUB_THROTTLE_COUNT 30
+#define BR_SCRUB_THROTTLE_ZZZ 60
void *
br_scrubber (void *arg)
{
@@ -284,7 +385,7 @@ br_scrubber (void *arg)
GF_CLIENT_PID_SCRUB, child, bitd_start_scrub,
BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ);
- sleep (BR_SCRUB_THROTTLE_ZZZ * BR_SCRUB_THROTTLE_COUNT);
+ sleep (BR_SCRUB_THROTTLE_ZZZ);
}
return NULL;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index d808c0dc69c..6a4de70bfbb 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -139,8 +139,14 @@ br_prepare_signature (const unsigned char *sign,
if (!signature)
return NULL;
+ /* object version */
signature->signedversion = object->signedversion;
+
+ /* signature length & type */
+ signature->signaturelen = hashlen;
signature->signaturetype = hashtype;
+
+ /* signature itself */
memcpy (signature->signature, (char *)sign, hashlen);
signature->signature[hashlen+1] = '\0';
@@ -167,7 +173,7 @@ bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)
"trusted.glusterfs.bad-file", NULL);
if (!ret) {
- gf_log (this->name, GF_LOG_ERROR, "[GFID: %s] is marked "
+ gf_log (this->name, GF_LOG_DEBUG, "[GFID: %s] is marked "
"corrupted", uuid_utoa (inode->gfid));
bad_file = _gf_true;
}
@@ -918,8 +924,11 @@ bitd_oneshot_crawl (xlator_t *subvol,
* if there are any fds present for that inode) and handle properly.
*/
- if (bitd_is_bad_file (this, child, &loc, NULL))
+ if (bitd_is_bad_file (this, child, &loc, NULL)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Entry [%s] is marked corrupted.. skipping.", loc.path);
goto unref_inode;
+ }
ret = syncop_getxattr (child->xl, &loc, &xattr,
GLUSTERFS_GET_OBJECT_SIGNATURE, NULL);
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
index fdf23142768..699323170d3 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-common.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -76,6 +76,7 @@ typedef struct br_isignature_in {
unsigned long signedversion; /* version against which the
object was signed */
+ size_t signaturelen; /* signature length */
char signature[0]; /* object signature */
} br_isignature_t;
@@ -86,11 +87,14 @@ typedef struct br_isignature_in {
typedef struct br_isignature_out {
char stale; /* stale signature? */
+ unsigned long version; /* current signed version */
+
uint32_t time[2]; /* time when the object
got dirtied */
int8_t signaturetype; /* hash type */
- char signature[0]; /* signature (hash) */
+ size_t signaturelen; /* signature length */
+ char signature[0]; /* signature (hash) */
} br_isignature_out_t;
typedef struct br_stub_init {
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index f8c8f59c1f8..0db500659b5 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -566,7 +566,7 @@ br_stub_prepare_signature (xlator_t *this, dict_t *dict,
if (!br_is_signature_type_valid (sign->signaturetype))
goto error_return;
- signaturelen = strlen (sign->signature);
+ signaturelen = sign->signaturelen;
ret = br_stub_alloc_versions (NULL, &sbuf, signaturelen);
if (ret)
goto error_return;
@@ -657,8 +657,8 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int32_t ret = 0;
- ssize_t totallen = 0;
- ssize_t signaturelen = 0;
+ size_t totallen = 0;
+ size_t signaturelen = 0;
br_version_t *obuf = NULL;
br_signature_t *sbuf = NULL;
br_isignature_out_t *sign = NULL;
@@ -681,8 +681,21 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|| (status == BR_VXATTR_STATUS_UNSIGNED))
goto delkeys;
- signaturelen = strlen (sbuf->signature);
- totallen = signaturelen + sizeof (br_isignature_out_t);
+ /**
+ * okay.. we have enough information to satisfy the request,
+ * namely: version and signing extended attribute. what's
+ * pending is the signature length -- that's figured out
+ * indirectly via the size of the _whole_ xattr and the
+ * on-disk signing xattr header size.
+ */
+ op_errno = EINVAL;
+ ret = dict_get_uint32 (xattr, BITROT_SIGNING_XATTR_SIZE_KEY,
+ (uint32_t *)&signaturelen);
+ if (ret)
+ goto delkeys;
+
+ signaturelen -= sizeof (br_signature_t);
+ totallen = sizeof (br_isignature_out_t) + signaturelen;
op_errno = ENOMEM;
sign = GF_CALLOC (1, totallen, gf_br_stub_mt_signature_t);
@@ -692,10 +705,12 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
sign->time[0] = obuf->timebuf[0];
sign->time[1] = obuf->timebuf[1];
- /* Object's dirty state */
+ /* Object's dirty state & current signed version */
+ sign->version = sbuf->signedversion;
sign->stale = (obuf->ongoingversion != sbuf->signedversion) ? 1 : 0;
/* Object's signature */
+ sign->signaturelen = signaturelen;
sign->signaturetype = sbuf->signaturetype;
(void) memcpy (sign->signature, sbuf->signature, signaturelen);
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
index d565112b1ad..5db0e321c20 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -263,6 +263,7 @@ br_stub_remove_vxattrs (dict_t *xattr)
if (xattr) {
dict_del (xattr, BITROT_CURRENT_VERSION_KEY);
dict_del (xattr, BITROT_SIGNING_VERSION_KEY);
+ dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
}
}