...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package z
22
23 import (
24 "bytes"
25 "encoding/json"
26 "log"
27 "math"
28 "unsafe"
29 )
30
31
32 var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
33
34 func getSize(ui64 uint64) (size uint64, exponent uint64) {
35 if ui64 < uint64(512) {
36 ui64 = uint64(512)
37 }
38 size = uint64(1)
39 for size < ui64 {
40 size <<= 1
41 exponent++
42 }
43 return size, exponent
44 }
45
46 func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
47 size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
48 locs := math.Ceil(float64(0.69314718056) * size / numEntries)
49 return uint64(size), uint64(locs)
50 }
51
52
53 func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
54 var entries, locs uint64
55 if len(params) == 2 {
56 if params[1] < 1 {
57 entries, locs = calcSizeByWrongPositives(params[0], params[1])
58 } else {
59 entries, locs = uint64(params[0]), uint64(params[1])
60 }
61 } else {
62 log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
63 " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
64 " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
65 }
66 size, exponent := getSize(entries)
67 bloomfilter = &Bloom{
68 sizeExp: exponent,
69 size: size - 1,
70 setLocs: locs,
71 shift: 64 - exponent,
72 }
73 bloomfilter.Size(size)
74 return bloomfilter
75 }
76
77
78 type Bloom struct {
79 bitset []uint64
80 ElemNum uint64
81 sizeExp uint64
82 size uint64
83 setLocs uint64
84 shift uint64
85 }
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101 func (bl *Bloom) Add(hash uint64) {
102 h := hash >> bl.shift
103 l := hash << bl.shift >> bl.shift
104 for i := uint64(0); i < bl.setLocs; i++ {
105 bl.Set((h + i*l) & bl.size)
106 bl.ElemNum++
107 }
108 }
109
110
111
112 func (bl Bloom) Has(hash uint64) bool {
113 h := hash >> bl.shift
114 l := hash << bl.shift >> bl.shift
115 for i := uint64(0); i < bl.setLocs; i++ {
116 if !bl.IsSet((h + i*l) & bl.size) {
117 return false
118 }
119 }
120 return true
121 }
122
123
124
125
126 func (bl *Bloom) AddIfNotHas(hash uint64) bool {
127 if bl.Has(hash) {
128 return false
129 }
130 bl.Add(hash)
131 return true
132 }
133
134
135 func (bl *Bloom) Size(sz uint64) {
136 bl.bitset = make([]uint64, sz>>6)
137 }
138
139
140 func (bl *Bloom) Clear() {
141 for i := range bl.bitset {
142 bl.bitset[i] = 0
143 }
144 }
145
146
147 func (bl *Bloom) Set(idx uint64) {
148 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
149 *(*uint8)(ptr) |= mask[idx%8]
150 }
151
152
153 func (bl *Bloom) IsSet(idx uint64) bool {
154 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
155 r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
156 return r == 1
157 }
158
159
160
161 type bloomJSONImExport struct {
162 FilterSet []byte
163 SetLocs uint64
164 }
165
166
167
168 func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
169 bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
170 for i, b := range *bs {
171 *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
172 }
173 return bloomfilter
174 }
175
176
177
178 func JSONUnmarshal(dbData []byte) (*Bloom, error) {
179 bloomImEx := bloomJSONImExport{}
180 if err := json.Unmarshal(dbData, &bloomImEx); err != nil {
181 return nil, err
182 }
183 buf := bytes.NewBuffer(bloomImEx.FilterSet)
184 bs := buf.Bytes()
185 bf := newWithBoolset(&bs, bloomImEx.SetLocs)
186 return bf, nil
187 }
188
189
190 func (bl Bloom) JSONMarshal() []byte {
191 bloomImEx := bloomJSONImExport{}
192 bloomImEx.SetLocs = bl.setLocs
193 bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
194 for i := range bloomImEx.FilterSet {
195 bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
196 uintptr(i)))
197 }
198 data, err := json.Marshal(bloomImEx)
199 if err != nil {
200 log.Fatal("json.Marshal failed: ", err)
201 }
202 return data
203 }
204
View as plain text