diff options
author | Xiaofei Du <xiaofeidu@fb.com> | 2016-08-18 10:05:32 -0700 |
---|---|---|
committer | Shreyas Siravara <sshreyas@fb.com> | 2016-12-08 08:27:24 -0800 |
commit | 1508cde8e6022ae40a63cfaf94a7ee387e188dcf (patch) | |
tree | 2398a7545d802298dc0ec3812b214aeb3d8c666a /libglusterfs/src | |
parent | 0e75c988f02b4fe36a99d34e5039d61c0ab1b64f (diff) |
dict_t: make dict_t a real dictionary
Summary:
- During testing, I found dict_t actually always has a hash_size
of 1. So basically it's not a dictionary. It's list. This diff fixed that problem.
A bug in dictionary is also fixed here. SuperFastHash generates
uint32_t, but it was assigned to a int, which could suffer from
overflow. Previously the hash_size is always 1, so the bug was not triggered.
Under new hash_size, it's easy to trigger that bug.
- For existing GlusterFS codebase, dict_new need to be calling dict_t
*get_new_dict_full (uint32_t size_hint) to use the new logic. An
estimated number of items you are going to insert into the dictionary
is the input for that function.
- This is a port of D3736252 to 3.8
Test Plan: Prove tests
Reviewed By: kvigor
Signed-off-by: Shreyas Siravara <sshreyas@fb.com>
Change-Id: Ic760eabed82e58881076acbaa46a295dc23e0634
Reviewed-on: http://review.gluster.org/16061
Tested-by: Shreyas Siravara <sshreyas@fb.com>
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Kevin Vigor <kvigor@fb.com>
Diffstat (limited to 'libglusterfs/src')
-rw-r--r-- | libglusterfs/src/dict.c | 95 | ||||
-rw-r--r-- | libglusterfs/src/dict.h | 9 |
2 files changed, 82 insertions, 22 deletions
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index 25ddff0d8c4..6a61e641e19 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -27,6 +27,45 @@ #include "statedump.h" #include "libglusterfs-messages.h" +/* this goes with the bucket_size lookup table below */ +#define NUM_DISTINCT_SIZES_32_BIT 32 + +/* this bucket_size lookup table is borrowed from GNU libstdc++ */ +static const uint32_t bucket_sizes[NUM_DISTINCT_SIZES_32_BIT] = { + /* 0 */ 5ul, + /* 1 */ 11ul, + /* 2 */ 23ul, + /* 3 */ 47ul, + /* 4 */ 97ul, + /* 5 */ 199ul, + /* 6 */ 409ul, + /* 7 */ 823ul, + /* 8 */ 1741ul, + /* 9 */ 3469ul, + /* 10 */ 6949ul, + /* 11 */ 14033ul, + /* 12 */ 28411ul, + /* 13 */ 57557ul, + /* 14 */ 116731ul, + /* 15 */ 236897ul, + /* 16 */ 480881ul, + /* 17 */ 976369ul, + /* 18 */ 1982627ul, + /* 19 */ 4026031ul, + /* 20 */ 8175383ul, + /* 21 */ 16601593ul, + /* 22 */ 33712729ul, + /* 23 */ 68460391ul, + /* 24 */ 139022417ul, + /* 25 */ 282312799ul, + /* 26 */ 573292817ul, + /* 27 */ 1164186217ul, + /* 28 */ 2364114217ul, + /* 29 */ 4294967291ul, + /* 30 */ 4294967291ul, + /* 31 */ 4294967291ul, +}; + struct dict_cmp { dict_t *dict; gf_boolean_t (*value_ignore) (char *k); @@ -47,7 +86,7 @@ get_new_data () } dict_t * -get_new_dict_full (int size_hint) +get_new_dict_full (uint32_t size_hint) { dict_t *dict = mem_get0 (THIS->ctx->dict_pool); @@ -67,17 +106,8 @@ get_new_dict_full (int size_hint) dict->members = &dict->members_internal; } else { - /* - * We actually need to allocate space for size_hint *pointers* - * but we actually allocate space for one *structure*. Since - * a data_pair_t consists of five pointers, we're wasting four - * pointers' worth for N=1, and will overrun what we allocated - * for N>5. If anybody ever starts using size_hint, we'll need - * to fix this. - */ - GF_ASSERT (size_hint <= - (sizeof(data_pair_t) / sizeof(data_pair_t *))); - dict->members = mem_get0 (THIS->ctx->dict_pair_pool); + dict->members = GF_CALLOC (size_hint, sizeof (data_pair_t *), + gf_common_mt_data_pair_t); if (!dict->members) { mem_put (dict); return NULL; @@ -108,6 +138,35 @@ dict_new (void) return dict; } +dict_t * +dict_new_by_size (uint32_t num) +{ + int32_t highest_bit = 0; + uint32_t bucket_size = 0; + dict_t *dict = NULL; + + if (num == 0) + goto out; + +#ifdef _GNU_SOURCE + highest_bit = 32 - __builtin_clz (num); +#else + while (num != 0) { + highest_bit++; + num >>= 1; + } +#endif + + bucket_size = bucket_sizes[highest_bit - 1]; + dict = get_new_dict_full (bucket_size); + + if (dict) + dict_ref (dict); + +out: + return dict; +} + int32_t is_data_equal (data_t *one, data_t *two) @@ -268,7 +327,7 @@ err_out: static data_pair_t * dict_lookup_common (dict_t *this, char *key) { - int hashval = 0; + uint32_t hashval = 0; if (!this || !key) { gf_msg_callingfn ("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, @@ -279,7 +338,7 @@ dict_lookup_common (dict_t *this, char *key) /* If the divisor is 1, the modulo is always 0, * in such case avoid hash calculation. */ - if (this->hash_size != 1) + if (this->hash_size > 1) hashval = SuperFastHash (key, strlen (key)) % this->hash_size; data_pair_t *pair; @@ -319,7 +378,7 @@ dict_lookup (dict_t *this, char *key, data_t **data) static int32_t dict_set_lk (dict_t *this, char *key, data_t *value, gf_boolean_t replace) { - int hashval = 0; + uint32_t hashval = 0; data_pair_t *pair; char key_free = 0; int tmp = 0; @@ -336,7 +395,7 @@ dict_set_lk (dict_t *this, char *key, data_t *value, gf_boolean_t replace) /* If the divisor is 1, the modulo is always 0, * in such case avoid hash calculation. */ - if (this->hash_size != 1) { + if (this->hash_size > 1) { tmp = SuperFastHash (key, strlen (key)); hashval = (tmp % this->hash_size); } @@ -478,7 +537,7 @@ dict_get (dict_t *this, char *key) void dict_del (dict_t *this, char *key) { - int hashval = 0; + uint32_t hashval = 0; if (!this || !key) { gf_msg_callingfn ("dict", GF_LOG_WARNING, EINVAL, @@ -491,7 +550,7 @@ dict_del (dict_t *this, char *key) /* If the divisor is 1, the modulo is always 0, * in such case avoid hash calculation. */ - if (this->hash_size != 1) + if (this->hash_size > 1) hashval = SuperFastHash (key, strlen (key)) % this->hash_size; data_pair_t *pair = this->members[hashval]; diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h index c5b82677e2e..1f6c1a0eae9 100644 --- a/libglusterfs/src/dict.h +++ b/libglusterfs/src/dict.h @@ -79,9 +79,9 @@ struct _data_pair { struct _dict { unsigned char is_static:1; - int32_t hash_size; - int32_t count; - int32_t refcount; + uint32_t hash_size; + uint32_t count; + uint32_t refcount; data_pair_t **members; data_pair_t *members_list; char *extra_free; @@ -156,7 +156,7 @@ void *data_to_ptr (data_t *data); data_t *get_new_data (); data_t * data_copy (data_t *old); -dict_t *get_new_dict_full (int size_hint); +dict_t *get_new_dict_full (uint32_t size_hint); dict_t *get_new_dict (); int dict_foreach (dict_t *this, @@ -196,6 +196,7 @@ int dict_keys_join (void *value, int size, dict_t *dict, /* CLEANED UP FUNCTIONS DECLARATIONS */ GF_MUST_CHECK dict_t *dict_new (void); +GF_MUST_CHECK dict_t *dict_new_by_size (uint32_t num); dict_t *dict_copy_with_ref (dict_t *this, dict_t *new); GF_MUST_CHECK int dict_reset (dict_t *dict); |