summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src
diff options
context:
space:
mode:
authorXiaofei Du <xiaofeidu@fb.com>2016-08-18 10:05:32 -0700
committerShreyas Siravara <sshreyas@fb.com>2016-12-08 08:27:24 -0800
commit1508cde8e6022ae40a63cfaf94a7ee387e188dcf (patch)
tree2398a7545d802298dc0ec3812b214aeb3d8c666a /libglusterfs/src
parent0e75c988f02b4fe36a99d34e5039d61c0ab1b64f (diff)
dict_t: make dict_t a real dictionary
Summary: - During testing, I found dict_t actually always has a hash_size of 1. So basically it's not a dictionary. It's list. This diff fixed that problem. A bug in dictionary is also fixed here. SuperFastHash generates uint32_t, but it was assigned to a int, which could suffer from overflow. Previously the hash_size is always 1, so the bug was not triggered. Under new hash_size, it's easy to trigger that bug. - For existing GlusterFS codebase, dict_new need to be calling dict_t *get_new_dict_full (uint32_t size_hint) to use the new logic. An estimated number of items you are going to insert into the dictionary is the input for that function. - This is a port of D3736252 to 3.8 Test Plan: Prove tests Reviewed By: kvigor Signed-off-by: Shreyas Siravara <sshreyas@fb.com> Change-Id: Ic760eabed82e58881076acbaa46a295dc23e0634 Reviewed-on: http://review.gluster.org/16061 Tested-by: Shreyas Siravara <sshreyas@fb.com> Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Kevin Vigor <kvigor@fb.com>
Diffstat (limited to 'libglusterfs/src')
-rw-r--r--libglusterfs/src/dict.c95
-rw-r--r--libglusterfs/src/dict.h9
2 files changed, 82 insertions, 22 deletions
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 25ddff0d8c4..6a61e641e19 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -27,6 +27,45 @@
#include "statedump.h"
#include "libglusterfs-messages.h"
+/* this goes with the bucket_size lookup table below */
+#define NUM_DISTINCT_SIZES_32_BIT 32
+
+/* this bucket_size lookup table is borrowed from GNU libstdc++ */
+static const uint32_t bucket_sizes[NUM_DISTINCT_SIZES_32_BIT] = {
+ /* 0 */ 5ul,
+ /* 1 */ 11ul,
+ /* 2 */ 23ul,
+ /* 3 */ 47ul,
+ /* 4 */ 97ul,
+ /* 5 */ 199ul,
+ /* 6 */ 409ul,
+ /* 7 */ 823ul,
+ /* 8 */ 1741ul,
+ /* 9 */ 3469ul,
+ /* 10 */ 6949ul,
+ /* 11 */ 14033ul,
+ /* 12 */ 28411ul,
+ /* 13 */ 57557ul,
+ /* 14 */ 116731ul,
+ /* 15 */ 236897ul,
+ /* 16 */ 480881ul,
+ /* 17 */ 976369ul,
+ /* 18 */ 1982627ul,
+ /* 19 */ 4026031ul,
+ /* 20 */ 8175383ul,
+ /* 21 */ 16601593ul,
+ /* 22 */ 33712729ul,
+ /* 23 */ 68460391ul,
+ /* 24 */ 139022417ul,
+ /* 25 */ 282312799ul,
+ /* 26 */ 573292817ul,
+ /* 27 */ 1164186217ul,
+ /* 28 */ 2364114217ul,
+ /* 29 */ 4294967291ul,
+ /* 30 */ 4294967291ul,
+ /* 31 */ 4294967291ul,
+};
+
struct dict_cmp {
dict_t *dict;
gf_boolean_t (*value_ignore) (char *k);
@@ -47,7 +86,7 @@ get_new_data ()
}
dict_t *
-get_new_dict_full (int size_hint)
+get_new_dict_full (uint32_t size_hint)
{
dict_t *dict = mem_get0 (THIS->ctx->dict_pool);
@@ -67,17 +106,8 @@ get_new_dict_full (int size_hint)
dict->members = &dict->members_internal;
}
else {
- /*
- * We actually need to allocate space for size_hint *pointers*
- * but we actually allocate space for one *structure*. Since
- * a data_pair_t consists of five pointers, we're wasting four
- * pointers' worth for N=1, and will overrun what we allocated
- * for N>5. If anybody ever starts using size_hint, we'll need
- * to fix this.
- */
- GF_ASSERT (size_hint <=
- (sizeof(data_pair_t) / sizeof(data_pair_t *)));
- dict->members = mem_get0 (THIS->ctx->dict_pair_pool);
+ dict->members = GF_CALLOC (size_hint, sizeof (data_pair_t *),
+ gf_common_mt_data_pair_t);
if (!dict->members) {
mem_put (dict);
return NULL;
@@ -108,6 +138,35 @@ dict_new (void)
return dict;
}
+dict_t *
+dict_new_by_size (uint32_t num)
+{
+ int32_t highest_bit = 0;
+ uint32_t bucket_size = 0;
+ dict_t *dict = NULL;
+
+ if (num == 0)
+ goto out;
+
+#ifdef _GNU_SOURCE
+ highest_bit = 32 - __builtin_clz (num);
+#else
+ while (num != 0) {
+ highest_bit++;
+ num >>= 1;
+ }
+#endif
+
+ bucket_size = bucket_sizes[highest_bit - 1];
+ dict = get_new_dict_full (bucket_size);
+
+ if (dict)
+ dict_ref (dict);
+
+out:
+ return dict;
+}
+
int32_t
is_data_equal (data_t *one,
data_t *two)
@@ -268,7 +327,7 @@ err_out:
static data_pair_t *
dict_lookup_common (dict_t *this, char *key)
{
- int hashval = 0;
+ uint32_t hashval = 0;
if (!this || !key) {
gf_msg_callingfn ("dict", GF_LOG_WARNING, EINVAL,
LG_MSG_INVALID_ARG,
@@ -279,7 +338,7 @@ dict_lookup_common (dict_t *this, char *key)
/* If the divisor is 1, the modulo is always 0,
* in such case avoid hash calculation.
*/
- if (this->hash_size != 1)
+ if (this->hash_size > 1)
hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
data_pair_t *pair;
@@ -319,7 +378,7 @@ dict_lookup (dict_t *this, char *key, data_t **data)
static int32_t
dict_set_lk (dict_t *this, char *key, data_t *value, gf_boolean_t replace)
{
- int hashval = 0;
+ uint32_t hashval = 0;
data_pair_t *pair;
char key_free = 0;
int tmp = 0;
@@ -336,7 +395,7 @@ dict_set_lk (dict_t *this, char *key, data_t *value, gf_boolean_t replace)
/* If the divisor is 1, the modulo is always 0,
* in such case avoid hash calculation.
*/
- if (this->hash_size != 1) {
+ if (this->hash_size > 1) {
tmp = SuperFastHash (key, strlen (key));
hashval = (tmp % this->hash_size);
}
@@ -478,7 +537,7 @@ dict_get (dict_t *this, char *key)
void
dict_del (dict_t *this, char *key)
{
- int hashval = 0;
+ uint32_t hashval = 0;
if (!this || !key) {
gf_msg_callingfn ("dict", GF_LOG_WARNING, EINVAL,
@@ -491,7 +550,7 @@ dict_del (dict_t *this, char *key)
/* If the divisor is 1, the modulo is always 0,
* in such case avoid hash calculation.
*/
- if (this->hash_size != 1)
+ if (this->hash_size > 1)
hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
data_pair_t *pair = this->members[hashval];
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index c5b82677e2e..1f6c1a0eae9 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -79,9 +79,9 @@ struct _data_pair {
struct _dict {
unsigned char is_static:1;
- int32_t hash_size;
- int32_t count;
- int32_t refcount;
+ uint32_t hash_size;
+ uint32_t count;
+ uint32_t refcount;
data_pair_t **members;
data_pair_t *members_list;
char *extra_free;
@@ -156,7 +156,7 @@ void *data_to_ptr (data_t *data);
data_t *get_new_data ();
data_t * data_copy (data_t *old);
-dict_t *get_new_dict_full (int size_hint);
+dict_t *get_new_dict_full (uint32_t size_hint);
dict_t *get_new_dict ();
int dict_foreach (dict_t *this,
@@ -196,6 +196,7 @@ int dict_keys_join (void *value, int size, dict_t *dict,
/* CLEANED UP FUNCTIONS DECLARATIONS */
GF_MUST_CHECK dict_t *dict_new (void);
+GF_MUST_CHECK dict_t *dict_new_by_size (uint32_t num);
dict_t *dict_copy_with_ref (dict_t *this, dict_t *new);
GF_MUST_CHECK int dict_reset (dict_t *dict);