...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package hashing
18
19 import (
20 "math/rand"
21 "testing"
22
23 "github.com/stretchr/testify/assert"
24 )
25
26 func MakeDistinctIntegers(nvals int) map[int]bool {
27 r := rand.New(rand.NewSource(42))
28 values := make(map[int]bool)
29 for len(values) < nvals {
30 values[r.Int()] = true
31 }
32 return values
33 }
34
35 func MakeSequentialIntegers(nvals int) map[int]bool {
36 values := make(map[int]bool)
37 for i := 0; i < nvals; i++ {
38 values[i] = true
39 }
40 return values
41 }
42
43 func MakeDistinctStrings(nvals int) map[string]bool {
44 values := make(map[string]bool)
45
46 r := rand.New(rand.NewSource(42))
47
48 max := 'z'
49 min := '0'
50 for len(values) < nvals {
51 data := make([]byte, r.Intn(24))
52 for idx := range data {
53 data[idx] = byte(r.Intn(int(max-min+1)) + int(min))
54 }
55 values[string(data)] = true
56 }
57 return values
58 }
59
60 func TestHashingQualityInt(t *testing.T) {
61 const nvalues = 10000
62
63 tests := []struct {
64 name string
65 values map[int]bool
66 quality float64
67 }{
68 {"distinct", MakeDistinctIntegers(nvalues), 0.96},
69 {"sequential", MakeSequentialIntegers(nvalues), 0.96},
70 }
71
72 for _, tt := range tests {
73 t.Run(tt.name, func(t *testing.T) {
74 hashes := make(map[uint64]bool)
75 for k := range tt.values {
76 hashes[hashInt(uint64(k), 0)] = true
77 hashes[hashInt(uint64(k), 1)] = true
78 }
79 assert.GreaterOrEqual(t, float64(len(hashes)), tt.quality*float64(2*len(tt.values)))
80 })
81 }
82 }
83
84 func TestHashingBoundsStrings(t *testing.T) {
85 sizes := []int{1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 18, 19, 20, 21}
86 for _, s := range sizes {
87 str := make([]byte, s)
88 for idx := range str {
89 str[idx] = uint8(idx)
90 }
91
92 h := Hash(str, 1)
93 diff := 0
94 for i := 0; i < 120; i++ {
95 str[len(str)-1] = uint8(i)
96 if Hash(str, 1) != h {
97 diff++
98 }
99 }
100 assert.GreaterOrEqual(t, diff, 118)
101 }
102 }
103
104 func TestHashingQualityString(t *testing.T) {
105 const nvalues = 10000
106 values := MakeDistinctStrings(nvalues)
107
108 hashes := make(map[uint64]bool)
109 for k := range values {
110 hashes[hashString(k, 0)] = true
111 hashes[hashString(k, 1)] = true
112 }
113 assert.GreaterOrEqual(t, float64(len(hashes)), 0.96*float64(2*len(values)))
114 }
115
View as plain text