1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package bitutils
18
19 import (
20 "encoding/binary"
21 "math/bits"
22
23 "github.com/apache/arrow/go/v15/arrow/bitutil"
24 "github.com/apache/arrow/go/v15/internal/utils"
25 )
26
27
28 func IsMultipleOf64(v int64) bool { return v&63 == 0 }
29
30
31
32
33 func LeastSignificantBitMask(index int64) uint64 {
34 return (uint64(1) << index) - 1
35 }
36
37
38
39 type SetBitRun struct {
40 Pos int64
41 Length int64
42 }
43
44
45
46 func (s SetBitRun) AtEnd() bool {
47 return s.Length == 0
48 }
49
50
51 func (s SetBitRun) Equal(rhs SetBitRun) bool {
52 return s.Pos == rhs.Pos && s.Length == rhs.Length
53 }
54
55
56
57
58 type SetBitRunReader interface {
59
60 NextRun() SetBitRun
61
62
63 Reset([]byte, int64, int64)
64
65
66
67 VisitSetBitRuns(visitFn VisitFn) error
68 }
69
70 type baseSetBitRunReader struct {
71 bitmap []byte
72 pos int64
73 length int64
74 remaining int64
75 curWord uint64
76 curNumBits int32
77 reversed bool
78
79 firstBit uint64
80 }
81
82
83
84 func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
85 return newBaseSetBitRunReader(validBits, startOffset, numValues, false)
86 }
87
88
89
90
91
92
93 func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
94 return newBaseSetBitRunReader(validBits, startOffset, numValues, true)
95 }
96
97 func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader {
98 ret := &baseSetBitRunReader{reversed: reverse}
99 ret.Reset(bitmap, startOffset, length)
100 return ret
101 }
102
103 func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) {
104 br.bitmap = bitmap
105 br.length = length
106 br.remaining = length
107 br.curNumBits = 0
108 br.curWord = 0
109
110 if !br.reversed {
111 br.pos = startOffset / 8
112 br.firstBit = 1
113
114 bitOffset := int8(startOffset % 8)
115 if length > 0 && bitOffset != 0 {
116 br.curNumBits = int32(utils.Min(int(length), int(8-bitOffset)))
117 br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits))
118 }
119 return
120 }
121
122 br.pos = (startOffset + length) / 8
123 br.firstBit = uint64(0x8000000000000000)
124 endBitOffset := int8((startOffset + length) % 8)
125 if length > 0 && endBitOffset != 0 {
126 br.pos++
127 br.curNumBits = int32(utils.Min(int(length), int(endBitOffset)))
128 br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits))
129 }
130 }
131
132 func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 {
133 if br.reversed {
134 return word << nbits
135 }
136 return word >> nbits
137 }
138
139 func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 {
140 if br.reversed {
141 return int32(bits.LeadingZeros64(word))
142 }
143 return int32(bits.TrailingZeros64(word))
144 }
145
146 func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 {
147 var word [8]byte
148 nbytes := bitutil.BytesForBits(numBits)
149 if br.reversed {
150 br.pos -= nbytes
151 copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes])
152 return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits)
153 }
154
155 copy(word[:], br.bitmap[br.pos:br.pos+nbytes])
156 br.pos += nbytes
157 return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits)
158 }
159
160 func (br *baseSetBitRunReader) findCurrentRun() SetBitRun {
161 nzeros := br.countFirstZeros(br.curWord)
162 if nzeros >= br.curNumBits {
163 br.remaining -= int64(br.curNumBits)
164 br.curWord = 0
165 br.curNumBits = 0
166 return SetBitRun{0, 0}
167 }
168
169 br.curWord = br.consumeBits(br.curWord, nzeros)
170 br.curNumBits -= nzeros
171 br.remaining -= int64(nzeros)
172 pos := br.position()
173
174 numOnes := br.countFirstZeros(^br.curWord)
175 br.curWord = br.consumeBits(br.curWord, numOnes)
176 br.curNumBits -= numOnes
177 br.remaining -= int64(numOnes)
178 return SetBitRun{pos, int64(numOnes)}
179 }
180
181 func (br *baseSetBitRunReader) position() int64 {
182 if br.reversed {
183 return br.remaining
184 }
185 return br.length - br.remaining
186 }
187
188 func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun {
189 if br.reversed {
190 run.Pos -= run.Length
191 }
192 return run
193 }
194
195 func (br *baseSetBitRunReader) loadFull() (ret uint64) {
196 if br.reversed {
197 br.pos -= 8
198 }
199 ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8])
200 if !br.reversed {
201 br.pos += 8
202 }
203 return
204 }
205
206 func (br *baseSetBitRunReader) skipNextZeros() {
207 for br.remaining >= 64 {
208 br.curWord = br.loadFull()
209 nzeros := br.countFirstZeros(br.curWord)
210 if nzeros < 64 {
211 br.curWord = br.consumeBits(br.curWord, nzeros)
212 br.curNumBits = 64 - nzeros
213 br.remaining -= int64(nzeros)
214 return
215 }
216 br.remaining -= 64
217 }
218
219 if br.remaining > 0 {
220 br.curWord = br.loadPartial(0, br.remaining)
221 br.curNumBits = int32(br.remaining)
222 nzeros := int32(utils.Min(int(br.curNumBits), int(br.countFirstZeros(br.curWord))))
223 br.curWord = br.consumeBits(br.curWord, nzeros)
224 br.curNumBits -= nzeros
225 br.remaining -= int64(nzeros)
226 }
227 }
228
229 func (br *baseSetBitRunReader) countNextOnes() int64 {
230 var length int64
231 if ^br.curWord != 0 {
232 numOnes := br.countFirstZeros(^br.curWord)
233 br.remaining -= int64(numOnes)
234 br.curWord = br.consumeBits(br.curWord, numOnes)
235 br.curNumBits -= numOnes
236 if br.curNumBits != 0 {
237 return int64(numOnes)
238 }
239 length = int64(numOnes)
240 } else {
241 br.remaining -= 64
242 br.curNumBits = 0
243 length = 64
244 }
245
246 for br.remaining >= 64 {
247 br.curWord = br.loadFull()
248 numOnes := br.countFirstZeros(^br.curWord)
249 length += int64(numOnes)
250 br.remaining -= int64(numOnes)
251 if numOnes < 64 {
252 br.curWord = br.consumeBits(br.curWord, numOnes)
253 br.curNumBits = 64 - numOnes
254 return length
255 }
256 }
257
258 if br.remaining > 0 {
259 br.curWord = br.loadPartial(0, br.remaining)
260 br.curNumBits = int32(br.remaining)
261 numOnes := br.countFirstZeros(^br.curWord)
262 br.curWord = br.consumeBits(br.curWord, numOnes)
263 br.curNumBits -= numOnes
264 br.remaining -= int64(numOnes)
265 length += int64(numOnes)
266 }
267 return length
268 }
269
270 func (br *baseSetBitRunReader) NextRun() SetBitRun {
271 var (
272 pos int64 = 0
273 length int64 = 0
274 )
275
276 if br.curNumBits != 0 {
277 run := br.findCurrentRun()
278 if run.Length != 0 && br.curNumBits != 0 {
279 return br.adjustRun(run)
280 }
281 pos = run.Pos
282 length = run.Length
283 }
284
285 if length == 0 {
286
287
288 br.skipNextZeros()
289 if br.remaining == 0 {
290 return SetBitRun{0, 0}
291 }
292 pos = br.position()
293 } else if br.curNumBits == 0 {
294 if br.remaining >= 64 {
295 br.curWord = br.loadFull()
296 br.curNumBits = 64
297 } else if br.remaining > 0 {
298 br.curWord = br.loadPartial(0, br.remaining)
299 br.curNumBits = int32(br.remaining)
300 } else {
301 return br.adjustRun(SetBitRun{pos, length})
302 }
303 if (br.curWord & br.firstBit) == 0 {
304 return br.adjustRun(SetBitRun{pos, length})
305 }
306 }
307
308 length += br.countNextOnes()
309 return br.adjustRun(SetBitRun{pos, length})
310 }
311
312
313 type VisitFn func(pos int64, length int64) error
314
315 func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error {
316 for {
317 run := br.NextRun()
318 if run.Length == 0 {
319 break
320 }
321
322 if err := visitFn(run.Pos, run.Length); err != nil {
323 return err
324 }
325 }
326 return nil
327 }
328
329
330 func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error {
331 if bitmap == nil {
332 return visitFn(0, length)
333 }
334 rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
335 for {
336 run := rdr.NextRun()
337 if run.Length == 0 {
338 break
339 }
340
341 if err := visitFn(run.Pos, run.Length); err != nil {
342 return err
343 }
344 }
345 return nil
346 }
347
348 func VisitSetBitRunsNoErr(bitmap []byte, bitmapOffset int64, length int64, visitFn func(pos, length int64)) {
349 if bitmap == nil {
350 visitFn(0, length)
351 return
352 }
353 rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
354 for {
355 run := rdr.NextRun()
356 if run.Length == 0 {
357 break
358 }
359 visitFn(run.Pos, run.Length)
360 }
361 }
362
View as plain text