1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package bitutils
18
19 import (
20 "math"
21 "math/bits"
22 "unsafe"
23
24 "github.com/apache/arrow/go/v15/arrow/bitutil"
25 "github.com/apache/arrow/go/v15/internal/utils"
26 )
27
28 func loadWord(byt []byte) uint64 {
29 return utils.ToLEUint64(*(*uint64)(unsafe.Pointer(&byt[0])))
30 }
31
32 func shiftWord(current, next uint64, shift int64) uint64 {
33 if shift == 0 {
34 return current
35 }
36 return (current >> shift) | (next << (64 - shift))
37 }
38
39
40
41
42 type BitBlockCount struct {
43 Len int16
44 Popcnt int16
45 }
46
47
48 func (b BitBlockCount) NoneSet() bool {
49 return b.Popcnt == 0
50 }
51
52
53 func (b BitBlockCount) AllSet() bool {
54 return b.Len == b.Popcnt
55 }
56
57
58
59 type BitBlockCounter struct {
60 bitmap []byte
61 bitsRemaining int64
62 bitOffset int8
63 }
64
65 const (
66 wordBits int64 = 64
67 fourWordsBits int64 = wordBits * 4
68 )
69
70
71
72 func NewBitBlockCounter(bitmap []byte, startOffset, nbits int64) *BitBlockCounter {
73 return &BitBlockCounter{
74 bitmap: bitmap[startOffset/8:],
75 bitsRemaining: nbits,
76 bitOffset: int8(startOffset % 8),
77 }
78 }
79
80
81
82 func (b *BitBlockCounter) getBlockSlow(blockSize int64) BitBlockCount {
83 runlen := int16(utils.Min(b.bitsRemaining, blockSize))
84 popcnt := int16(bitutil.CountSetBits(b.bitmap, int(b.bitOffset), int(runlen)))
85 b.bitsRemaining -= int64(runlen)
86 b.bitmap = b.bitmap[runlen/8:]
87 return BitBlockCount{runlen, popcnt}
88 }
89
90
91
92
93
94
95 func (b *BitBlockCounter) NextFourWords() BitBlockCount {
96 if b.bitsRemaining == 0 {
97 return BitBlockCount{0, 0}
98 }
99
100 totalPopcnt := 0
101 if b.bitOffset == 0 {
102
103
104 if b.bitsRemaining < fourWordsBits {
105 return b.getBlockSlow(fourWordsBits)
106 }
107 totalPopcnt += bits.OnesCount64(loadWord(b.bitmap))
108 totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[8:]))
109 totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[16:]))
110 totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[24:]))
111 } else {
112
113
114 if b.bitsRemaining < 5*fourWordsBits-int64(b.bitOffset) {
115 return b.getBlockSlow(fourWordsBits)
116 }
117
118 current := loadWord(b.bitmap)
119 next := loadWord(b.bitmap[8:])
120 totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
121
122 current = next
123 next = loadWord(b.bitmap[16:])
124 totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
125
126 current = next
127 next = loadWord(b.bitmap[24:])
128 totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
129
130 current = next
131 next = loadWord(b.bitmap[32:])
132 totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
133 }
134 b.bitmap = b.bitmap[bitutil.BytesForBits(fourWordsBits):]
135 b.bitsRemaining -= fourWordsBits
136 return BitBlockCount{256, int16(totalPopcnt)}
137 }
138
139
140
141
142
143
144 func (b *BitBlockCounter) NextWord() BitBlockCount {
145 if b.bitsRemaining == 0 {
146 return BitBlockCount{0, 0}
147 }
148 popcnt := 0
149 if b.bitOffset == 0 {
150 if b.bitsRemaining < wordBits {
151 return b.getBlockSlow(wordBits)
152 }
153 popcnt = bits.OnesCount64(loadWord(b.bitmap))
154 } else {
155
156
157 if b.bitsRemaining < (2*wordBits - int64(b.bitOffset)) {
158 return b.getBlockSlow(wordBits)
159 }
160 popcnt = bits.OnesCount64(shiftWord(loadWord(b.bitmap), loadWord(b.bitmap[8:]), int64(b.bitOffset)))
161 }
162 b.bitmap = b.bitmap[wordBits/8:]
163 b.bitsRemaining -= wordBits
164 return BitBlockCount{64, int16(popcnt)}
165 }
166
167
168
169
170 type OptionalBitBlockCounter struct {
171 hasBitmap bool
172 pos int64
173 len int64
174 counter *BitBlockCounter
175 }
176
177
178
179
180 func NewOptionalBitBlockCounter(bitmap []byte, offset, length int64) *OptionalBitBlockCounter {
181 var counter *BitBlockCounter
182 if bitmap != nil {
183 counter = NewBitBlockCounter(bitmap, offset, length)
184 }
185 return &OptionalBitBlockCounter{
186 hasBitmap: bitmap != nil,
187 pos: 0,
188 len: length,
189 counter: counter,
190 }
191 }
192
193
194
195
196 func (obc *OptionalBitBlockCounter) NextBlock() BitBlockCount {
197 const maxBlockSize = math.MaxInt16
198 if obc.hasBitmap {
199 block := obc.counter.NextWord()
200 obc.pos += int64(block.Len)
201 return block
202 }
203
204 blockSize := int16(utils.Min(maxBlockSize, obc.len-obc.pos))
205 obc.pos += int64(blockSize)
206
207 return BitBlockCount{blockSize, blockSize}
208 }
209
210
211
212 func (obc *OptionalBitBlockCounter) NextWord() BitBlockCount {
213 const wordsize = 64
214 if obc.hasBitmap {
215 block := obc.counter.NextWord()
216 obc.pos += int64(block.Len)
217 return block
218 }
219 blockSize := int16(utils.Min(wordsize, obc.len-obc.pos))
220 obc.pos += int64(blockSize)
221
222 return BitBlockCount{blockSize, blockSize}
223 }
224
225
226
227
228
229
230 func VisitBitBlocks(bitmap []byte, offset, length int64, visitValid func(pos int64), visitInvalid func()) {
231 counter := NewOptionalBitBlockCounter(bitmap, offset, length)
232 pos := int64(0)
233 for pos < length {
234 block := counter.NextBlock()
235 if block.AllSet() {
236 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
237 visitValid(pos)
238 }
239 } else if block.NoneSet() {
240 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
241 visitInvalid()
242 }
243 } else {
244 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
245 if bitutil.BitIsSet(bitmap, int(offset+pos)) {
246 visitValid(pos)
247 } else {
248 visitInvalid()
249 }
250 }
251 }
252 }
253 }
254
255
256
257
258
259
260 func VisitBitBlocksShort(bitmap []byte, offset, length int64, visitValid func(pos int64) error, visitInvalid func() error) error {
261 counter := NewOptionalBitBlockCounter(bitmap, offset, length)
262 pos := int64(0)
263 for pos < length {
264 block := counter.NextBlock()
265 if block.AllSet() {
266 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
267 if err := visitValid(pos); err != nil {
268 return err
269 }
270 }
271 } else if block.NoneSet() {
272 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
273 if err := visitInvalid(); err != nil {
274 return err
275 }
276 }
277 } else {
278 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
279 if bitutil.BitIsSet(bitmap, int(offset+pos)) {
280 if err := visitValid(pos); err != nil {
281 return err
282 }
283 } else {
284 if err := visitInvalid(); err != nil {
285 return err
286 }
287 }
288 }
289 }
290 }
291 return nil
292 }
293
294 func VisitTwoBitBlocks(leftBitmap, rightBitmap []byte, leftOffset, rightOffset int64, len int64, visitValid func(pos int64), visitNull func()) {
295 if leftBitmap == nil || rightBitmap == nil {
296
297 if leftBitmap == nil {
298 VisitBitBlocks(rightBitmap, rightOffset, len, visitValid, visitNull)
299 } else {
300 VisitBitBlocks(leftBitmap, leftOffset, len, visitValid, visitNull)
301 }
302 return
303 }
304
305 bitCounter := NewBinaryBitBlockCounter(leftBitmap, rightBitmap, leftOffset, rightOffset, len)
306 var pos int64
307 for pos < len {
308 block := bitCounter.NextAndWord()
309 if block.AllSet() {
310 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
311 visitValid(pos)
312 }
313 } else if block.NoneSet() {
314 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
315 visitNull()
316 }
317 } else {
318 for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
319 if bitutil.BitIsSet(leftBitmap, int(leftOffset+pos)) && bitutil.BitIsSet(rightBitmap, int(rightOffset+pos)) {
320 visitValid(pos)
321 } else {
322 visitNull()
323 }
324 }
325 }
326 }
327 }
328
329 type bitOp struct {
330 bit func(bool, bool) bool
331 word func(uint64, uint64) uint64
332 }
333
334 var (
335 bitBlockAnd = bitOp{
336 bit: func(a, b bool) bool { return a && b },
337 word: func(a, b uint64) uint64 { return a & b },
338 }
339 bitBlockAndNot = bitOp{
340 bit: func(a, b bool) bool { return a && !b },
341 word: func(a, b uint64) uint64 { return a &^ b },
342 }
343 bitBlockOr = bitOp{
344 bit: func(a, b bool) bool { return a || b },
345 word: func(a, b uint64) uint64 { return a | b },
346 }
347 bitBlockOrNot = bitOp{
348 bit: func(a, b bool) bool { return a || !b },
349 word: func(a, b uint64) uint64 { return a | ^b },
350 }
351 )
352
353
354
355
356
357 type BinaryBitBlockCounter struct {
358 left []byte
359 right []byte
360 bitsRemaining int64
361 leftOffset, rightOffset int64
362
363 bitsRequiredForWords int64
364 }
365
366
367
368
369 func NewBinaryBitBlockCounter(left, right []byte, leftOffset, rightOffset int64, length int64) *BinaryBitBlockCounter {
370 ret := &BinaryBitBlockCounter{
371 left: left[leftOffset/8:],
372 right: right[rightOffset/8:],
373 leftOffset: leftOffset % 8,
374 rightOffset: rightOffset % 8,
375 bitsRemaining: length,
376 }
377
378 leftBitsReq := int64(64)
379 if ret.leftOffset != 0 {
380 leftBitsReq = 64 + (64 - ret.leftOffset)
381 }
382 rightBitsReq := int64(64)
383 if ret.rightOffset != 0 {
384 rightBitsReq = 64 + (64 - ret.rightOffset)
385 }
386
387 if leftBitsReq > rightBitsReq {
388 ret.bitsRequiredForWords = leftBitsReq
389 } else {
390 ret.bitsRequiredForWords = rightBitsReq
391 }
392
393 return ret
394 }
395
396
397
398
399
400
401 func (b *BinaryBitBlockCounter) NextAndWord() BitBlockCount { return b.nextWord(bitBlockAnd) }
402
403
404 func (b *BinaryBitBlockCounter) NextAndNotWord() BitBlockCount { return b.nextWord(bitBlockAndNot) }
405
406
407 func (b *BinaryBitBlockCounter) NextOrWord() BitBlockCount { return b.nextWord(bitBlockOr) }
408
409
410 func (b *BinaryBitBlockCounter) NextOrNotWord() BitBlockCount { return b.nextWord(bitBlockOrNot) }
411
412 func (b *BinaryBitBlockCounter) nextWord(op bitOp) BitBlockCount {
413 if b.bitsRemaining == 0 {
414 return BitBlockCount{}
415 }
416
417
418
419 if b.bitsRemaining < b.bitsRequiredForWords {
420 runLength := int16(b.bitsRemaining)
421 if runLength > int16(wordBits) {
422 runLength = int16(wordBits)
423 }
424
425 var popcount int16
426 for i := int16(0); i < runLength; i++ {
427 if op.bit(bitutil.BitIsSet(b.left, int(b.leftOffset)+int(i)),
428 bitutil.BitIsSet(b.right, int(b.rightOffset)+int(i))) {
429 popcount++
430 }
431 }
432
433
434 b.left = b.left[runLength/8:]
435 b.right = b.right[runLength/8:]
436 b.bitsRemaining -= int64(runLength)
437 return BitBlockCount{Len: runLength, Popcnt: popcount}
438 }
439
440 var popcount int
441 if b.leftOffset == 0 && b.rightOffset == 0 {
442 popcount = bits.OnesCount64(op.word(loadWord(b.left), loadWord(b.right)))
443 } else {
444 leftWord := shiftWord(loadWord(b.left), loadWord(b.left[8:]), b.leftOffset)
445 rightWord := shiftWord(loadWord(b.right), loadWord(b.right[8:]), b.rightOffset)
446 popcount = bits.OnesCount64(op.word(leftWord, rightWord))
447 }
448 b.left = b.left[wordBits/8:]
449 b.right = b.right[wordBits/8:]
450 b.bitsRemaining -= wordBits
451 return BitBlockCount{Len: int16(wordBits), Popcnt: int16(popcount)}
452 }
453
View as plain text