1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package hashing
20
21 import (
22 "math"
23
24 "github.com/apache/arrow/go/v15/arrow"
25 "github.com/apache/arrow/go/v15/arrow/bitutil"
26 "github.com/apache/arrow/go/v15/internal/utils"
27 )
28
29 type payloadInt8 struct {
30 val int8
31 memoIdx int32
32 }
33
34 type entryInt8 struct {
35 h uint64
36 payload payloadInt8
37 }
38
39 func (e entryInt8) Valid() bool { return e.h != sentinel }
40
41
42
43
44 type Int8HashTable struct {
45 cap uint64
46 capMask uint64
47 size uint64
48
49 entries []entryInt8
50 }
51
52
53
54 func NewInt8HashTable(cap uint64) *Int8HashTable {
55 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
56 ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
57 ret.entries = make([]entryInt8, initCap)
58 return ret
59 }
60
61
62
63
64 func (h *Int8HashTable) Reset(cap uint64) {
65 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
66 h.capMask = h.cap - 1
67 h.size = 0
68 h.entries = make([]entryInt8, h.cap)
69 }
70
71
72
73 func (h *Int8HashTable) CopyValues(out []int8) {
74 h.CopyValuesSubset(0, out)
75 }
76
77
78
79 func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) {
80 h.VisitEntries(func(e *entryInt8) {
81 idx := e.payload.memoIdx - int32(start)
82 if idx >= 0 {
83 out[idx] = e.payload.val
84 }
85 })
86 }
87
88 func (h *Int8HashTable) WriteOut(out []byte) {
89 h.WriteOutSubset(0, out)
90 }
91
92 func (h *Int8HashTable) WriteOutSubset(start int, out []byte) {
93 data := arrow.Int8Traits.CastFromBytes(out)
94 h.VisitEntries(func(e *entryInt8) {
95 idx := e.payload.memoIdx - int32(start)
96 if idx >= 0 {
97 data[idx] = e.payload.val
98 }
99 })
100 }
101
102 func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
103
104 func (Int8HashTable) fixHash(v uint64) uint64 {
105 if v == sentinel {
106 return 42
107 }
108 return v
109 }
110
111
112
113
114 func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) {
115 idx, ok := h.lookup(v, h.capMask, cmp)
116 return &h.entries[idx], ok
117 }
118
119 func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) {
120 const perturbShift uint8 = 5
121
122 var (
123 idx uint64
124 perturb uint64
125 e *entryInt8
126 )
127
128 v = h.fixHash(v)
129 idx = v & szMask
130 perturb = (v >> uint64(perturbShift)) + 1
131
132 for {
133 e = &h.entries[idx]
134 if e.h == v && cmp(e.payload.val) {
135 return idx, true
136 }
137
138 if e.h == sentinel {
139 return idx, false
140 }
141
142
143
144
145 idx = (idx + perturb) & szMask
146 perturb = (perturb >> uint64(perturbShift)) + 1
147 }
148 }
149
150 func (h *Int8HashTable) upsize(newcap uint64) error {
151 newMask := newcap - 1
152
153 oldEntries := h.entries
154 h.entries = make([]entryInt8, newcap)
155 for _, e := range oldEntries {
156 if e.Valid() {
157 idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false })
158 h.entries[idx] = e
159 }
160 }
161 h.cap = newcap
162 h.capMask = newMask
163 return nil
164 }
165
166
167
168 func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error {
169 e.h = h.fixHash(v)
170 e.payload.val = val
171 e.payload.memoIdx = memoIdx
172 h.size++
173
174 if h.needUpsize() {
175 h.upsize(h.cap * uint64(loadFactor) * 2)
176 }
177 return nil
178 }
179
180
181
182 func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) {
183 for _, e := range h.entries {
184 if e.Valid() {
185 visit(&e)
186 }
187 }
188 }
189
190
191
192
193 type Int8MemoTable struct {
194 tbl *Int8HashTable
195 nullIdx int32
196 }
197
198
199
200 func NewInt8MemoTable(num int64) *Int8MemoTable {
201 return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound}
202 }
203
204 func (Int8MemoTable) TypeTraits() TypeTraits {
205 return arrow.Int8Traits
206 }
207
208
209 func (s *Int8MemoTable) Reset() {
210 s.tbl.Reset(32)
211 s.nullIdx = KeyNotFound
212 }
213
214
215
216 func (s *Int8MemoTable) Size() int {
217 sz := int(s.tbl.size)
218 if _, ok := s.GetNull(); ok {
219 sz++
220 }
221 return sz
222 }
223
224
225
226 func (s *Int8MemoTable) GetNull() (int, bool) {
227 return int(s.nullIdx), s.nullIdx != KeyNotFound
228 }
229
230
231
232
233 func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) {
234 idx, found = s.GetNull()
235 if !found {
236 idx = s.Size()
237 s.nullIdx = int32(idx)
238 }
239 return
240 }
241
242
243
244 func (s *Int8MemoTable) CopyValues(out interface{}) {
245 s.CopyValuesSubset(0, out)
246 }
247
248
249
250 func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) {
251 s.tbl.CopyValuesSubset(start, out.([]int8))
252 }
253
254 func (s *Int8MemoTable) WriteOut(out []byte) {
255 s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out))
256 }
257
258 func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) {
259 s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out))
260 }
261
262 func (s *Int8MemoTable) WriteOutLE(out []byte) {
263 s.tbl.WriteOut(out)
264 }
265
266 func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) {
267 s.tbl.WriteOutSubset(start, out)
268 }
269
270
271
272 func (s *Int8MemoTable) Get(val interface{}) (int, bool) {
273
274 h := hashInt(uint64(val.(int8)), 0)
275 if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok {
276 return int(e.payload.memoIdx), ok
277 }
278 return KeyNotFound, false
279 }
280
281
282
283
284 func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
285
286 h := hashInt(uint64(val.(int8)), 0)
287 e, ok := s.tbl.Lookup(h, func(v int8) bool {
288 return val.(int8) == v
289 })
290
291 if ok {
292 idx = int(e.payload.memoIdx)
293 found = true
294 } else {
295 idx = s.Size()
296 s.tbl.Insert(e, h, val.(int8), int32(idx))
297 }
298 return
299 }
300
301
302 func (s *Int8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
303 panic("unimplemented")
304 }
305
306 type payloadUint8 struct {
307 val uint8
308 memoIdx int32
309 }
310
311 type entryUint8 struct {
312 h uint64
313 payload payloadUint8
314 }
315
316 func (e entryUint8) Valid() bool { return e.h != sentinel }
317
318
319
320
321 type Uint8HashTable struct {
322 cap uint64
323 capMask uint64
324 size uint64
325
326 entries []entryUint8
327 }
328
329
330
331 func NewUint8HashTable(cap uint64) *Uint8HashTable {
332 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
333 ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
334 ret.entries = make([]entryUint8, initCap)
335 return ret
336 }
337
338
339
340
341 func (h *Uint8HashTable) Reset(cap uint64) {
342 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
343 h.capMask = h.cap - 1
344 h.size = 0
345 h.entries = make([]entryUint8, h.cap)
346 }
347
348
349
350 func (h *Uint8HashTable) CopyValues(out []uint8) {
351 h.CopyValuesSubset(0, out)
352 }
353
354
355
356 func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) {
357 h.VisitEntries(func(e *entryUint8) {
358 idx := e.payload.memoIdx - int32(start)
359 if idx >= 0 {
360 out[idx] = e.payload.val
361 }
362 })
363 }
364
365 func (h *Uint8HashTable) WriteOut(out []byte) {
366 h.WriteOutSubset(0, out)
367 }
368
369 func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) {
370 data := arrow.Uint8Traits.CastFromBytes(out)
371 h.VisitEntries(func(e *entryUint8) {
372 idx := e.payload.memoIdx - int32(start)
373 if idx >= 0 {
374 data[idx] = e.payload.val
375 }
376 })
377 }
378
379 func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
380
381 func (Uint8HashTable) fixHash(v uint64) uint64 {
382 if v == sentinel {
383 return 42
384 }
385 return v
386 }
387
388
389
390
391 func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) {
392 idx, ok := h.lookup(v, h.capMask, cmp)
393 return &h.entries[idx], ok
394 }
395
396 func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) {
397 const perturbShift uint8 = 5
398
399 var (
400 idx uint64
401 perturb uint64
402 e *entryUint8
403 )
404
405 v = h.fixHash(v)
406 idx = v & szMask
407 perturb = (v >> uint64(perturbShift)) + 1
408
409 for {
410 e = &h.entries[idx]
411 if e.h == v && cmp(e.payload.val) {
412 return idx, true
413 }
414
415 if e.h == sentinel {
416 return idx, false
417 }
418
419
420
421
422 idx = (idx + perturb) & szMask
423 perturb = (perturb >> uint64(perturbShift)) + 1
424 }
425 }
426
427 func (h *Uint8HashTable) upsize(newcap uint64) error {
428 newMask := newcap - 1
429
430 oldEntries := h.entries
431 h.entries = make([]entryUint8, newcap)
432 for _, e := range oldEntries {
433 if e.Valid() {
434 idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false })
435 h.entries[idx] = e
436 }
437 }
438 h.cap = newcap
439 h.capMask = newMask
440 return nil
441 }
442
443
444
445 func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error {
446 e.h = h.fixHash(v)
447 e.payload.val = val
448 e.payload.memoIdx = memoIdx
449 h.size++
450
451 if h.needUpsize() {
452 h.upsize(h.cap * uint64(loadFactor) * 2)
453 }
454 return nil
455 }
456
457
458
459 func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) {
460 for _, e := range h.entries {
461 if e.Valid() {
462 visit(&e)
463 }
464 }
465 }
466
467
468
469
470 type Uint8MemoTable struct {
471 tbl *Uint8HashTable
472 nullIdx int32
473 }
474
475
476
477 func NewUint8MemoTable(num int64) *Uint8MemoTable {
478 return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound}
479 }
480
481 func (Uint8MemoTable) TypeTraits() TypeTraits {
482 return arrow.Uint8Traits
483 }
484
485
486 func (s *Uint8MemoTable) Reset() {
487 s.tbl.Reset(32)
488 s.nullIdx = KeyNotFound
489 }
490
491
492
493 func (s *Uint8MemoTable) Size() int {
494 sz := int(s.tbl.size)
495 if _, ok := s.GetNull(); ok {
496 sz++
497 }
498 return sz
499 }
500
501
502
503 func (s *Uint8MemoTable) GetNull() (int, bool) {
504 return int(s.nullIdx), s.nullIdx != KeyNotFound
505 }
506
507
508
509
510 func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) {
511 idx, found = s.GetNull()
512 if !found {
513 idx = s.Size()
514 s.nullIdx = int32(idx)
515 }
516 return
517 }
518
519
520
521 func (s *Uint8MemoTable) CopyValues(out interface{}) {
522 s.CopyValuesSubset(0, out)
523 }
524
525
526
527 func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) {
528 s.tbl.CopyValuesSubset(start, out.([]uint8))
529 }
530
531 func (s *Uint8MemoTable) WriteOut(out []byte) {
532 s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out))
533 }
534
535 func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) {
536 s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out))
537 }
538
539 func (s *Uint8MemoTable) WriteOutLE(out []byte) {
540 s.tbl.WriteOut(out)
541 }
542
543 func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) {
544 s.tbl.WriteOutSubset(start, out)
545 }
546
547
548
549 func (s *Uint8MemoTable) Get(val interface{}) (int, bool) {
550
551 h := hashInt(uint64(val.(uint8)), 0)
552 if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok {
553 return int(e.payload.memoIdx), ok
554 }
555 return KeyNotFound, false
556 }
557
558
559
560
561 func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
562
563 h := hashInt(uint64(val.(uint8)), 0)
564 e, ok := s.tbl.Lookup(h, func(v uint8) bool {
565 return val.(uint8) == v
566 })
567
568 if ok {
569 idx = int(e.payload.memoIdx)
570 found = true
571 } else {
572 idx = s.Size()
573 s.tbl.Insert(e, h, val.(uint8), int32(idx))
574 }
575 return
576 }
577
578
579 func (s *Uint8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
580 panic("unimplemented")
581 }
582
583 type payloadInt16 struct {
584 val int16
585 memoIdx int32
586 }
587
588 type entryInt16 struct {
589 h uint64
590 payload payloadInt16
591 }
592
593 func (e entryInt16) Valid() bool { return e.h != sentinel }
594
595
596
597
598 type Int16HashTable struct {
599 cap uint64
600 capMask uint64
601 size uint64
602
603 entries []entryInt16
604 }
605
606
607
608 func NewInt16HashTable(cap uint64) *Int16HashTable {
609 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
610 ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
611 ret.entries = make([]entryInt16, initCap)
612 return ret
613 }
614
615
616
617
618 func (h *Int16HashTable) Reset(cap uint64) {
619 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
620 h.capMask = h.cap - 1
621 h.size = 0
622 h.entries = make([]entryInt16, h.cap)
623 }
624
625
626
627 func (h *Int16HashTable) CopyValues(out []int16) {
628 h.CopyValuesSubset(0, out)
629 }
630
631
632
633 func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) {
634 h.VisitEntries(func(e *entryInt16) {
635 idx := e.payload.memoIdx - int32(start)
636 if idx >= 0 {
637 out[idx] = e.payload.val
638 }
639 })
640 }
641
642 func (h *Int16HashTable) WriteOut(out []byte) {
643 h.WriteOutSubset(0, out)
644 }
645
646 func (h *Int16HashTable) WriteOutSubset(start int, out []byte) {
647 data := arrow.Int16Traits.CastFromBytes(out)
648 h.VisitEntries(func(e *entryInt16) {
649 idx := e.payload.memoIdx - int32(start)
650 if idx >= 0 {
651 data[idx] = utils.ToLEInt16(e.payload.val)
652 }
653 })
654 }
655
656 func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
657
658 func (Int16HashTable) fixHash(v uint64) uint64 {
659 if v == sentinel {
660 return 42
661 }
662 return v
663 }
664
665
666
667
668 func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) {
669 idx, ok := h.lookup(v, h.capMask, cmp)
670 return &h.entries[idx], ok
671 }
672
673 func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) {
674 const perturbShift uint8 = 5
675
676 var (
677 idx uint64
678 perturb uint64
679 e *entryInt16
680 )
681
682 v = h.fixHash(v)
683 idx = v & szMask
684 perturb = (v >> uint64(perturbShift)) + 1
685
686 for {
687 e = &h.entries[idx]
688 if e.h == v && cmp(e.payload.val) {
689 return idx, true
690 }
691
692 if e.h == sentinel {
693 return idx, false
694 }
695
696
697
698
699 idx = (idx + perturb) & szMask
700 perturb = (perturb >> uint64(perturbShift)) + 1
701 }
702 }
703
704 func (h *Int16HashTable) upsize(newcap uint64) error {
705 newMask := newcap - 1
706
707 oldEntries := h.entries
708 h.entries = make([]entryInt16, newcap)
709 for _, e := range oldEntries {
710 if e.Valid() {
711 idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false })
712 h.entries[idx] = e
713 }
714 }
715 h.cap = newcap
716 h.capMask = newMask
717 return nil
718 }
719
720
721
722 func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error {
723 e.h = h.fixHash(v)
724 e.payload.val = val
725 e.payload.memoIdx = memoIdx
726 h.size++
727
728 if h.needUpsize() {
729 h.upsize(h.cap * uint64(loadFactor) * 2)
730 }
731 return nil
732 }
733
734
735
736 func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) {
737 for _, e := range h.entries {
738 if e.Valid() {
739 visit(&e)
740 }
741 }
742 }
743
744
745
746
747 type Int16MemoTable struct {
748 tbl *Int16HashTable
749 nullIdx int32
750 }
751
752
753
754 func NewInt16MemoTable(num int64) *Int16MemoTable {
755 return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound}
756 }
757
758 func (Int16MemoTable) TypeTraits() TypeTraits {
759 return arrow.Int16Traits
760 }
761
762
763 func (s *Int16MemoTable) Reset() {
764 s.tbl.Reset(32)
765 s.nullIdx = KeyNotFound
766 }
767
768
769
770 func (s *Int16MemoTable) Size() int {
771 sz := int(s.tbl.size)
772 if _, ok := s.GetNull(); ok {
773 sz++
774 }
775 return sz
776 }
777
778
779
780 func (s *Int16MemoTable) GetNull() (int, bool) {
781 return int(s.nullIdx), s.nullIdx != KeyNotFound
782 }
783
784
785
786
787 func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) {
788 idx, found = s.GetNull()
789 if !found {
790 idx = s.Size()
791 s.nullIdx = int32(idx)
792 }
793 return
794 }
795
796
797
798 func (s *Int16MemoTable) CopyValues(out interface{}) {
799 s.CopyValuesSubset(0, out)
800 }
801
802
803
804 func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) {
805 s.tbl.CopyValuesSubset(start, out.([]int16))
806 }
807
808 func (s *Int16MemoTable) WriteOut(out []byte) {
809 s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out))
810 }
811
812 func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) {
813 s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out))
814 }
815
816 func (s *Int16MemoTable) WriteOutLE(out []byte) {
817 s.tbl.WriteOut(out)
818 }
819
820 func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) {
821 s.tbl.WriteOutSubset(start, out)
822 }
823
824
825
826 func (s *Int16MemoTable) Get(val interface{}) (int, bool) {
827
828 h := hashInt(uint64(val.(int16)), 0)
829 if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok {
830 return int(e.payload.memoIdx), ok
831 }
832 return KeyNotFound, false
833 }
834
835
836
837
838 func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
839
840 h := hashInt(uint64(val.(int16)), 0)
841 e, ok := s.tbl.Lookup(h, func(v int16) bool {
842 return val.(int16) == v
843 })
844
845 if ok {
846 idx = int(e.payload.memoIdx)
847 found = true
848 } else {
849 idx = s.Size()
850 s.tbl.Insert(e, h, val.(int16), int32(idx))
851 }
852 return
853 }
854
855
856 func (s *Int16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
857 panic("unimplemented")
858 }
859
860 type payloadUint16 struct {
861 val uint16
862 memoIdx int32
863 }
864
865 type entryUint16 struct {
866 h uint64
867 payload payloadUint16
868 }
869
870 func (e entryUint16) Valid() bool { return e.h != sentinel }
871
872
873
874
875 type Uint16HashTable struct {
876 cap uint64
877 capMask uint64
878 size uint64
879
880 entries []entryUint16
881 }
882
883
884
885 func NewUint16HashTable(cap uint64) *Uint16HashTable {
886 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
887 ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
888 ret.entries = make([]entryUint16, initCap)
889 return ret
890 }
891
892
893
894
895 func (h *Uint16HashTable) Reset(cap uint64) {
896 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
897 h.capMask = h.cap - 1
898 h.size = 0
899 h.entries = make([]entryUint16, h.cap)
900 }
901
902
903
904 func (h *Uint16HashTable) CopyValues(out []uint16) {
905 h.CopyValuesSubset(0, out)
906 }
907
908
909
910 func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) {
911 h.VisitEntries(func(e *entryUint16) {
912 idx := e.payload.memoIdx - int32(start)
913 if idx >= 0 {
914 out[idx] = e.payload.val
915 }
916 })
917 }
918
919 func (h *Uint16HashTable) WriteOut(out []byte) {
920 h.WriteOutSubset(0, out)
921 }
922
923 func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) {
924 data := arrow.Uint16Traits.CastFromBytes(out)
925 h.VisitEntries(func(e *entryUint16) {
926 idx := e.payload.memoIdx - int32(start)
927 if idx >= 0 {
928 data[idx] = utils.ToLEUint16(e.payload.val)
929 }
930 })
931 }
932
933 func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
934
935 func (Uint16HashTable) fixHash(v uint64) uint64 {
936 if v == sentinel {
937 return 42
938 }
939 return v
940 }
941
942
943
944
945 func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) {
946 idx, ok := h.lookup(v, h.capMask, cmp)
947 return &h.entries[idx], ok
948 }
949
950 func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) {
951 const perturbShift uint8 = 5
952
953 var (
954 idx uint64
955 perturb uint64
956 e *entryUint16
957 )
958
959 v = h.fixHash(v)
960 idx = v & szMask
961 perturb = (v >> uint64(perturbShift)) + 1
962
963 for {
964 e = &h.entries[idx]
965 if e.h == v && cmp(e.payload.val) {
966 return idx, true
967 }
968
969 if e.h == sentinel {
970 return idx, false
971 }
972
973
974
975
976 idx = (idx + perturb) & szMask
977 perturb = (perturb >> uint64(perturbShift)) + 1
978 }
979 }
980
981 func (h *Uint16HashTable) upsize(newcap uint64) error {
982 newMask := newcap - 1
983
984 oldEntries := h.entries
985 h.entries = make([]entryUint16, newcap)
986 for _, e := range oldEntries {
987 if e.Valid() {
988 idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false })
989 h.entries[idx] = e
990 }
991 }
992 h.cap = newcap
993 h.capMask = newMask
994 return nil
995 }
996
997
998
999 func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error {
1000 e.h = h.fixHash(v)
1001 e.payload.val = val
1002 e.payload.memoIdx = memoIdx
1003 h.size++
1004
1005 if h.needUpsize() {
1006 h.upsize(h.cap * uint64(loadFactor) * 2)
1007 }
1008 return nil
1009 }
1010
1011
1012
1013 func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) {
1014 for _, e := range h.entries {
1015 if e.Valid() {
1016 visit(&e)
1017 }
1018 }
1019 }
1020
1021
1022
1023
1024 type Uint16MemoTable struct {
1025 tbl *Uint16HashTable
1026 nullIdx int32
1027 }
1028
1029
1030
1031 func NewUint16MemoTable(num int64) *Uint16MemoTable {
1032 return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound}
1033 }
1034
1035 func (Uint16MemoTable) TypeTraits() TypeTraits {
1036 return arrow.Uint16Traits
1037 }
1038
1039
1040 func (s *Uint16MemoTable) Reset() {
1041 s.tbl.Reset(32)
1042 s.nullIdx = KeyNotFound
1043 }
1044
1045
1046
1047 func (s *Uint16MemoTable) Size() int {
1048 sz := int(s.tbl.size)
1049 if _, ok := s.GetNull(); ok {
1050 sz++
1051 }
1052 return sz
1053 }
1054
1055
1056
1057 func (s *Uint16MemoTable) GetNull() (int, bool) {
1058 return int(s.nullIdx), s.nullIdx != KeyNotFound
1059 }
1060
1061
1062
1063
1064 func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) {
1065 idx, found = s.GetNull()
1066 if !found {
1067 idx = s.Size()
1068 s.nullIdx = int32(idx)
1069 }
1070 return
1071 }
1072
1073
1074
1075 func (s *Uint16MemoTable) CopyValues(out interface{}) {
1076 s.CopyValuesSubset(0, out)
1077 }
1078
1079
1080
1081 func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) {
1082 s.tbl.CopyValuesSubset(start, out.([]uint16))
1083 }
1084
1085 func (s *Uint16MemoTable) WriteOut(out []byte) {
1086 s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out))
1087 }
1088
1089 func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) {
1090 s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out))
1091 }
1092
1093 func (s *Uint16MemoTable) WriteOutLE(out []byte) {
1094 s.tbl.WriteOut(out)
1095 }
1096
1097 func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) {
1098 s.tbl.WriteOutSubset(start, out)
1099 }
1100
1101
1102
1103 func (s *Uint16MemoTable) Get(val interface{}) (int, bool) {
1104
1105 h := hashInt(uint64(val.(uint16)), 0)
1106 if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok {
1107 return int(e.payload.memoIdx), ok
1108 }
1109 return KeyNotFound, false
1110 }
1111
1112
1113
1114
1115 func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
1116
1117 h := hashInt(uint64(val.(uint16)), 0)
1118 e, ok := s.tbl.Lookup(h, func(v uint16) bool {
1119 return val.(uint16) == v
1120 })
1121
1122 if ok {
1123 idx = int(e.payload.memoIdx)
1124 found = true
1125 } else {
1126 idx = s.Size()
1127 s.tbl.Insert(e, h, val.(uint16), int32(idx))
1128 }
1129 return
1130 }
1131
1132
1133 func (s *Uint16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
1134 panic("unimplemented")
1135 }
1136
1137 type payloadInt32 struct {
1138 val int32
1139 memoIdx int32
1140 }
1141
1142 type entryInt32 struct {
1143 h uint64
1144 payload payloadInt32
1145 }
1146
1147 func (e entryInt32) Valid() bool { return e.h != sentinel }
1148
1149
1150
1151
1152 type Int32HashTable struct {
1153 cap uint64
1154 capMask uint64
1155 size uint64
1156
1157 entries []entryInt32
1158 }
1159
1160
1161
1162 func NewInt32HashTable(cap uint64) *Int32HashTable {
1163 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1164 ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
1165 ret.entries = make([]entryInt32, initCap)
1166 return ret
1167 }
1168
1169
1170
1171
1172 func (h *Int32HashTable) Reset(cap uint64) {
1173 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1174 h.capMask = h.cap - 1
1175 h.size = 0
1176 h.entries = make([]entryInt32, h.cap)
1177 }
1178
1179
1180
1181 func (h *Int32HashTable) CopyValues(out []int32) {
1182 h.CopyValuesSubset(0, out)
1183 }
1184
1185
1186
1187 func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) {
1188 h.VisitEntries(func(e *entryInt32) {
1189 idx := e.payload.memoIdx - int32(start)
1190 if idx >= 0 {
1191 out[idx] = e.payload.val
1192 }
1193 })
1194 }
1195
1196 func (h *Int32HashTable) WriteOut(out []byte) {
1197 h.WriteOutSubset(0, out)
1198 }
1199
1200 func (h *Int32HashTable) WriteOutSubset(start int, out []byte) {
1201 data := arrow.Int32Traits.CastFromBytes(out)
1202 h.VisitEntries(func(e *entryInt32) {
1203 idx := e.payload.memoIdx - int32(start)
1204 if idx >= 0 {
1205 data[idx] = utils.ToLEInt32(e.payload.val)
1206 }
1207 })
1208 }
1209
1210 func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
1211
1212 func (Int32HashTable) fixHash(v uint64) uint64 {
1213 if v == sentinel {
1214 return 42
1215 }
1216 return v
1217 }
1218
1219
1220
1221
1222 func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) {
1223 idx, ok := h.lookup(v, h.capMask, cmp)
1224 return &h.entries[idx], ok
1225 }
1226
1227 func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) {
1228 const perturbShift uint8 = 5
1229
1230 var (
1231 idx uint64
1232 perturb uint64
1233 e *entryInt32
1234 )
1235
1236 v = h.fixHash(v)
1237 idx = v & szMask
1238 perturb = (v >> uint64(perturbShift)) + 1
1239
1240 for {
1241 e = &h.entries[idx]
1242 if e.h == v && cmp(e.payload.val) {
1243 return idx, true
1244 }
1245
1246 if e.h == sentinel {
1247 return idx, false
1248 }
1249
1250
1251
1252
1253 idx = (idx + perturb) & szMask
1254 perturb = (perturb >> uint64(perturbShift)) + 1
1255 }
1256 }
1257
1258 func (h *Int32HashTable) upsize(newcap uint64) error {
1259 newMask := newcap - 1
1260
1261 oldEntries := h.entries
1262 h.entries = make([]entryInt32, newcap)
1263 for _, e := range oldEntries {
1264 if e.Valid() {
1265 idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false })
1266 h.entries[idx] = e
1267 }
1268 }
1269 h.cap = newcap
1270 h.capMask = newMask
1271 return nil
1272 }
1273
1274
1275
1276 func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error {
1277 e.h = h.fixHash(v)
1278 e.payload.val = val
1279 e.payload.memoIdx = memoIdx
1280 h.size++
1281
1282 if h.needUpsize() {
1283 h.upsize(h.cap * uint64(loadFactor) * 2)
1284 }
1285 return nil
1286 }
1287
1288
1289
1290 func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) {
1291 for _, e := range h.entries {
1292 if e.Valid() {
1293 visit(&e)
1294 }
1295 }
1296 }
1297
1298
1299
1300
1301 type Int32MemoTable struct {
1302 tbl *Int32HashTable
1303 nullIdx int32
1304 }
1305
1306
1307
1308 func NewInt32MemoTable(num int64) *Int32MemoTable {
1309 return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound}
1310 }
1311
1312 func (Int32MemoTable) TypeTraits() TypeTraits {
1313 return arrow.Int32Traits
1314 }
1315
1316
1317 func (s *Int32MemoTable) Reset() {
1318 s.tbl.Reset(32)
1319 s.nullIdx = KeyNotFound
1320 }
1321
1322
1323
1324 func (s *Int32MemoTable) Size() int {
1325 sz := int(s.tbl.size)
1326 if _, ok := s.GetNull(); ok {
1327 sz++
1328 }
1329 return sz
1330 }
1331
1332
1333
1334 func (s *Int32MemoTable) GetNull() (int, bool) {
1335 return int(s.nullIdx), s.nullIdx != KeyNotFound
1336 }
1337
1338
1339
1340
1341 func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) {
1342 idx, found = s.GetNull()
1343 if !found {
1344 idx = s.Size()
1345 s.nullIdx = int32(idx)
1346 }
1347 return
1348 }
1349
1350
1351
1352 func (s *Int32MemoTable) CopyValues(out interface{}) {
1353 s.CopyValuesSubset(0, out)
1354 }
1355
1356
1357
1358 func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) {
1359 s.tbl.CopyValuesSubset(start, out.([]int32))
1360 }
1361
1362 func (s *Int32MemoTable) WriteOut(out []byte) {
1363 s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out))
1364 }
1365
1366 func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) {
1367 s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out))
1368 }
1369
1370 func (s *Int32MemoTable) WriteOutLE(out []byte) {
1371 s.tbl.WriteOut(out)
1372 }
1373
1374 func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) {
1375 s.tbl.WriteOutSubset(start, out)
1376 }
1377
1378
1379
1380 func (s *Int32MemoTable) Get(val interface{}) (int, bool) {
1381
1382 h := hashInt(uint64(val.(int32)), 0)
1383 if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok {
1384 return int(e.payload.memoIdx), ok
1385 }
1386 return KeyNotFound, false
1387 }
1388
1389
1390
1391
1392 func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
1393
1394 h := hashInt(uint64(val.(int32)), 0)
1395 e, ok := s.tbl.Lookup(h, func(v int32) bool {
1396 return val.(int32) == v
1397 })
1398
1399 if ok {
1400 idx = int(e.payload.memoIdx)
1401 found = true
1402 } else {
1403 idx = s.Size()
1404 s.tbl.Insert(e, h, val.(int32), int32(idx))
1405 }
1406 return
1407 }
1408
1409
1410 func (s *Int32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
1411 panic("unimplemented")
1412 }
1413
1414 type payloadInt64 struct {
1415 val int64
1416 memoIdx int32
1417 }
1418
1419 type entryInt64 struct {
1420 h uint64
1421 payload payloadInt64
1422 }
1423
1424 func (e entryInt64) Valid() bool { return e.h != sentinel }
1425
1426
1427
1428
1429 type Int64HashTable struct {
1430 cap uint64
1431 capMask uint64
1432 size uint64
1433
1434 entries []entryInt64
1435 }
1436
1437
1438
1439 func NewInt64HashTable(cap uint64) *Int64HashTable {
1440 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1441 ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
1442 ret.entries = make([]entryInt64, initCap)
1443 return ret
1444 }
1445
1446
1447
1448
1449 func (h *Int64HashTable) Reset(cap uint64) {
1450 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1451 h.capMask = h.cap - 1
1452 h.size = 0
1453 h.entries = make([]entryInt64, h.cap)
1454 }
1455
1456
1457
1458 func (h *Int64HashTable) CopyValues(out []int64) {
1459 h.CopyValuesSubset(0, out)
1460 }
1461
1462
1463
1464 func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) {
1465 h.VisitEntries(func(e *entryInt64) {
1466 idx := e.payload.memoIdx - int32(start)
1467 if idx >= 0 {
1468 out[idx] = e.payload.val
1469 }
1470 })
1471 }
1472
1473 func (h *Int64HashTable) WriteOut(out []byte) {
1474 h.WriteOutSubset(0, out)
1475 }
1476
1477 func (h *Int64HashTable) WriteOutSubset(start int, out []byte) {
1478 data := arrow.Int64Traits.CastFromBytes(out)
1479 h.VisitEntries(func(e *entryInt64) {
1480 idx := e.payload.memoIdx - int32(start)
1481 if idx >= 0 {
1482 data[idx] = utils.ToLEInt64(e.payload.val)
1483 }
1484 })
1485 }
1486
1487 func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
1488
1489 func (Int64HashTable) fixHash(v uint64) uint64 {
1490 if v == sentinel {
1491 return 42
1492 }
1493 return v
1494 }
1495
1496
1497
1498
1499 func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) {
1500 idx, ok := h.lookup(v, h.capMask, cmp)
1501 return &h.entries[idx], ok
1502 }
1503
1504 func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) {
1505 const perturbShift uint8 = 5
1506
1507 var (
1508 idx uint64
1509 perturb uint64
1510 e *entryInt64
1511 )
1512
1513 v = h.fixHash(v)
1514 idx = v & szMask
1515 perturb = (v >> uint64(perturbShift)) + 1
1516
1517 for {
1518 e = &h.entries[idx]
1519 if e.h == v && cmp(e.payload.val) {
1520 return idx, true
1521 }
1522
1523 if e.h == sentinel {
1524 return idx, false
1525 }
1526
1527
1528
1529
1530 idx = (idx + perturb) & szMask
1531 perturb = (perturb >> uint64(perturbShift)) + 1
1532 }
1533 }
1534
1535 func (h *Int64HashTable) upsize(newcap uint64) error {
1536 newMask := newcap - 1
1537
1538 oldEntries := h.entries
1539 h.entries = make([]entryInt64, newcap)
1540 for _, e := range oldEntries {
1541 if e.Valid() {
1542 idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false })
1543 h.entries[idx] = e
1544 }
1545 }
1546 h.cap = newcap
1547 h.capMask = newMask
1548 return nil
1549 }
1550
1551
1552
1553 func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error {
1554 e.h = h.fixHash(v)
1555 e.payload.val = val
1556 e.payload.memoIdx = memoIdx
1557 h.size++
1558
1559 if h.needUpsize() {
1560 h.upsize(h.cap * uint64(loadFactor) * 2)
1561 }
1562 return nil
1563 }
1564
1565
1566
1567 func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) {
1568 for _, e := range h.entries {
1569 if e.Valid() {
1570 visit(&e)
1571 }
1572 }
1573 }
1574
1575
1576
1577
1578 type Int64MemoTable struct {
1579 tbl *Int64HashTable
1580 nullIdx int32
1581 }
1582
1583
1584
1585 func NewInt64MemoTable(num int64) *Int64MemoTable {
1586 return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound}
1587 }
1588
1589 func (Int64MemoTable) TypeTraits() TypeTraits {
1590 return arrow.Int64Traits
1591 }
1592
1593
1594 func (s *Int64MemoTable) Reset() {
1595 s.tbl.Reset(32)
1596 s.nullIdx = KeyNotFound
1597 }
1598
1599
1600
1601 func (s *Int64MemoTable) Size() int {
1602 sz := int(s.tbl.size)
1603 if _, ok := s.GetNull(); ok {
1604 sz++
1605 }
1606 return sz
1607 }
1608
1609
1610
1611 func (s *Int64MemoTable) GetNull() (int, bool) {
1612 return int(s.nullIdx), s.nullIdx != KeyNotFound
1613 }
1614
1615
1616
1617
1618 func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) {
1619 idx, found = s.GetNull()
1620 if !found {
1621 idx = s.Size()
1622 s.nullIdx = int32(idx)
1623 }
1624 return
1625 }
1626
1627
1628
1629 func (s *Int64MemoTable) CopyValues(out interface{}) {
1630 s.CopyValuesSubset(0, out)
1631 }
1632
1633
1634
1635 func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) {
1636 s.tbl.CopyValuesSubset(start, out.([]int64))
1637 }
1638
1639 func (s *Int64MemoTable) WriteOut(out []byte) {
1640 s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out))
1641 }
1642
1643 func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) {
1644 s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out))
1645 }
1646
1647 func (s *Int64MemoTable) WriteOutLE(out []byte) {
1648 s.tbl.WriteOut(out)
1649 }
1650
1651 func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) {
1652 s.tbl.WriteOutSubset(start, out)
1653 }
1654
1655
1656
1657 func (s *Int64MemoTable) Get(val interface{}) (int, bool) {
1658
1659 h := hashInt(uint64(val.(int64)), 0)
1660 if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok {
1661 return int(e.payload.memoIdx), ok
1662 }
1663 return KeyNotFound, false
1664 }
1665
1666
1667
1668
1669 func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
1670
1671 h := hashInt(uint64(val.(int64)), 0)
1672 e, ok := s.tbl.Lookup(h, func(v int64) bool {
1673 return val.(int64) == v
1674 })
1675
1676 if ok {
1677 idx = int(e.payload.memoIdx)
1678 found = true
1679 } else {
1680 idx = s.Size()
1681 s.tbl.Insert(e, h, val.(int64), int32(idx))
1682 }
1683 return
1684 }
1685
1686
1687 func (s *Int64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
1688 panic("unimplemented")
1689 }
1690
1691 type payloadUint32 struct {
1692 val uint32
1693 memoIdx int32
1694 }
1695
1696 type entryUint32 struct {
1697 h uint64
1698 payload payloadUint32
1699 }
1700
1701 func (e entryUint32) Valid() bool { return e.h != sentinel }
1702
1703
1704
1705
1706 type Uint32HashTable struct {
1707 cap uint64
1708 capMask uint64
1709 size uint64
1710
1711 entries []entryUint32
1712 }
1713
1714
1715
1716 func NewUint32HashTable(cap uint64) *Uint32HashTable {
1717 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1718 ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
1719 ret.entries = make([]entryUint32, initCap)
1720 return ret
1721 }
1722
1723
1724
1725
1726 func (h *Uint32HashTable) Reset(cap uint64) {
1727 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1728 h.capMask = h.cap - 1
1729 h.size = 0
1730 h.entries = make([]entryUint32, h.cap)
1731 }
1732
1733
1734
1735 func (h *Uint32HashTable) CopyValues(out []uint32) {
1736 h.CopyValuesSubset(0, out)
1737 }
1738
1739
1740
1741 func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) {
1742 h.VisitEntries(func(e *entryUint32) {
1743 idx := e.payload.memoIdx - int32(start)
1744 if idx >= 0 {
1745 out[idx] = e.payload.val
1746 }
1747 })
1748 }
1749
1750 func (h *Uint32HashTable) WriteOut(out []byte) {
1751 h.WriteOutSubset(0, out)
1752 }
1753
1754 func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) {
1755 data := arrow.Uint32Traits.CastFromBytes(out)
1756 h.VisitEntries(func(e *entryUint32) {
1757 idx := e.payload.memoIdx - int32(start)
1758 if idx >= 0 {
1759 data[idx] = utils.ToLEUint32(e.payload.val)
1760 }
1761 })
1762 }
1763
1764 func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
1765
1766 func (Uint32HashTable) fixHash(v uint64) uint64 {
1767 if v == sentinel {
1768 return 42
1769 }
1770 return v
1771 }
1772
1773
1774
1775
1776 func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) {
1777 idx, ok := h.lookup(v, h.capMask, cmp)
1778 return &h.entries[idx], ok
1779 }
1780
1781 func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) {
1782 const perturbShift uint8 = 5
1783
1784 var (
1785 idx uint64
1786 perturb uint64
1787 e *entryUint32
1788 )
1789
1790 v = h.fixHash(v)
1791 idx = v & szMask
1792 perturb = (v >> uint64(perturbShift)) + 1
1793
1794 for {
1795 e = &h.entries[idx]
1796 if e.h == v && cmp(e.payload.val) {
1797 return idx, true
1798 }
1799
1800 if e.h == sentinel {
1801 return idx, false
1802 }
1803
1804
1805
1806
1807 idx = (idx + perturb) & szMask
1808 perturb = (perturb >> uint64(perturbShift)) + 1
1809 }
1810 }
1811
1812 func (h *Uint32HashTable) upsize(newcap uint64) error {
1813 newMask := newcap - 1
1814
1815 oldEntries := h.entries
1816 h.entries = make([]entryUint32, newcap)
1817 for _, e := range oldEntries {
1818 if e.Valid() {
1819 idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false })
1820 h.entries[idx] = e
1821 }
1822 }
1823 h.cap = newcap
1824 h.capMask = newMask
1825 return nil
1826 }
1827
1828
1829
1830 func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error {
1831 e.h = h.fixHash(v)
1832 e.payload.val = val
1833 e.payload.memoIdx = memoIdx
1834 h.size++
1835
1836 if h.needUpsize() {
1837 h.upsize(h.cap * uint64(loadFactor) * 2)
1838 }
1839 return nil
1840 }
1841
1842
1843
1844 func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) {
1845 for _, e := range h.entries {
1846 if e.Valid() {
1847 visit(&e)
1848 }
1849 }
1850 }
1851
1852
1853
1854
1855 type Uint32MemoTable struct {
1856 tbl *Uint32HashTable
1857 nullIdx int32
1858 }
1859
1860
1861
1862 func NewUint32MemoTable(num int64) *Uint32MemoTable {
1863 return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound}
1864 }
1865
1866 func (Uint32MemoTable) TypeTraits() TypeTraits {
1867 return arrow.Uint32Traits
1868 }
1869
1870
1871 func (s *Uint32MemoTable) Reset() {
1872 s.tbl.Reset(32)
1873 s.nullIdx = KeyNotFound
1874 }
1875
1876
1877
1878 func (s *Uint32MemoTable) Size() int {
1879 sz := int(s.tbl.size)
1880 if _, ok := s.GetNull(); ok {
1881 sz++
1882 }
1883 return sz
1884 }
1885
1886
1887
1888 func (s *Uint32MemoTable) GetNull() (int, bool) {
1889 return int(s.nullIdx), s.nullIdx != KeyNotFound
1890 }
1891
1892
1893
1894
1895 func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) {
1896 idx, found = s.GetNull()
1897 if !found {
1898 idx = s.Size()
1899 s.nullIdx = int32(idx)
1900 }
1901 return
1902 }
1903
1904
1905
1906 func (s *Uint32MemoTable) CopyValues(out interface{}) {
1907 s.CopyValuesSubset(0, out)
1908 }
1909
1910
1911
1912 func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) {
1913 s.tbl.CopyValuesSubset(start, out.([]uint32))
1914 }
1915
1916 func (s *Uint32MemoTable) WriteOut(out []byte) {
1917 s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out))
1918 }
1919
1920 func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) {
1921 s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out))
1922 }
1923
1924 func (s *Uint32MemoTable) WriteOutLE(out []byte) {
1925 s.tbl.WriteOut(out)
1926 }
1927
1928 func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) {
1929 s.tbl.WriteOutSubset(start, out)
1930 }
1931
1932
1933
1934 func (s *Uint32MemoTable) Get(val interface{}) (int, bool) {
1935
1936 h := hashInt(uint64(val.(uint32)), 0)
1937 if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok {
1938 return int(e.payload.memoIdx), ok
1939 }
1940 return KeyNotFound, false
1941 }
1942
1943
1944
1945
1946 func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
1947
1948 h := hashInt(uint64(val.(uint32)), 0)
1949 e, ok := s.tbl.Lookup(h, func(v uint32) bool {
1950 return val.(uint32) == v
1951 })
1952
1953 if ok {
1954 idx = int(e.payload.memoIdx)
1955 found = true
1956 } else {
1957 idx = s.Size()
1958 s.tbl.Insert(e, h, val.(uint32), int32(idx))
1959 }
1960 return
1961 }
1962
1963
1964 func (s *Uint32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
1965 panic("unimplemented")
1966 }
1967
1968 type payloadUint64 struct {
1969 val uint64
1970 memoIdx int32
1971 }
1972
1973 type entryUint64 struct {
1974 h uint64
1975 payload payloadUint64
1976 }
1977
1978 func (e entryUint64) Valid() bool { return e.h != sentinel }
1979
1980
1981
1982
1983 type Uint64HashTable struct {
1984 cap uint64
1985 capMask uint64
1986 size uint64
1987
1988 entries []entryUint64
1989 }
1990
1991
1992
1993 func NewUint64HashTable(cap uint64) *Uint64HashTable {
1994 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
1995 ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
1996 ret.entries = make([]entryUint64, initCap)
1997 return ret
1998 }
1999
2000
2001
2002
2003 func (h *Uint64HashTable) Reset(cap uint64) {
2004 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
2005 h.capMask = h.cap - 1
2006 h.size = 0
2007 h.entries = make([]entryUint64, h.cap)
2008 }
2009
2010
2011
2012 func (h *Uint64HashTable) CopyValues(out []uint64) {
2013 h.CopyValuesSubset(0, out)
2014 }
2015
2016
2017
2018 func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) {
2019 h.VisitEntries(func(e *entryUint64) {
2020 idx := e.payload.memoIdx - int32(start)
2021 if idx >= 0 {
2022 out[idx] = e.payload.val
2023 }
2024 })
2025 }
2026
2027 func (h *Uint64HashTable) WriteOut(out []byte) {
2028 h.WriteOutSubset(0, out)
2029 }
2030
2031 func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) {
2032 data := arrow.Uint64Traits.CastFromBytes(out)
2033 h.VisitEntries(func(e *entryUint64) {
2034 idx := e.payload.memoIdx - int32(start)
2035 if idx >= 0 {
2036 data[idx] = utils.ToLEUint64(e.payload.val)
2037 }
2038 })
2039 }
2040
2041 func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
2042
2043 func (Uint64HashTable) fixHash(v uint64) uint64 {
2044 if v == sentinel {
2045 return 42
2046 }
2047 return v
2048 }
2049
2050
2051
2052
2053 func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) {
2054 idx, ok := h.lookup(v, h.capMask, cmp)
2055 return &h.entries[idx], ok
2056 }
2057
2058 func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) {
2059 const perturbShift uint8 = 5
2060
2061 var (
2062 idx uint64
2063 perturb uint64
2064 e *entryUint64
2065 )
2066
2067 v = h.fixHash(v)
2068 idx = v & szMask
2069 perturb = (v >> uint64(perturbShift)) + 1
2070
2071 for {
2072 e = &h.entries[idx]
2073 if e.h == v && cmp(e.payload.val) {
2074 return idx, true
2075 }
2076
2077 if e.h == sentinel {
2078 return idx, false
2079 }
2080
2081
2082
2083
2084 idx = (idx + perturb) & szMask
2085 perturb = (perturb >> uint64(perturbShift)) + 1
2086 }
2087 }
2088
2089 func (h *Uint64HashTable) upsize(newcap uint64) error {
2090 newMask := newcap - 1
2091
2092 oldEntries := h.entries
2093 h.entries = make([]entryUint64, newcap)
2094 for _, e := range oldEntries {
2095 if e.Valid() {
2096 idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false })
2097 h.entries[idx] = e
2098 }
2099 }
2100 h.cap = newcap
2101 h.capMask = newMask
2102 return nil
2103 }
2104
2105
2106
2107 func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error {
2108 e.h = h.fixHash(v)
2109 e.payload.val = val
2110 e.payload.memoIdx = memoIdx
2111 h.size++
2112
2113 if h.needUpsize() {
2114 h.upsize(h.cap * uint64(loadFactor) * 2)
2115 }
2116 return nil
2117 }
2118
2119
2120
2121 func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) {
2122 for _, e := range h.entries {
2123 if e.Valid() {
2124 visit(&e)
2125 }
2126 }
2127 }
2128
2129
2130
2131
2132 type Uint64MemoTable struct {
2133 tbl *Uint64HashTable
2134 nullIdx int32
2135 }
2136
2137
2138
2139 func NewUint64MemoTable(num int64) *Uint64MemoTable {
2140 return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound}
2141 }
2142
2143 func (Uint64MemoTable) TypeTraits() TypeTraits {
2144 return arrow.Uint64Traits
2145 }
2146
2147
2148 func (s *Uint64MemoTable) Reset() {
2149 s.tbl.Reset(32)
2150 s.nullIdx = KeyNotFound
2151 }
2152
2153
2154
2155 func (s *Uint64MemoTable) Size() int {
2156 sz := int(s.tbl.size)
2157 if _, ok := s.GetNull(); ok {
2158 sz++
2159 }
2160 return sz
2161 }
2162
2163
2164
2165 func (s *Uint64MemoTable) GetNull() (int, bool) {
2166 return int(s.nullIdx), s.nullIdx != KeyNotFound
2167 }
2168
2169
2170
2171
2172 func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) {
2173 idx, found = s.GetNull()
2174 if !found {
2175 idx = s.Size()
2176 s.nullIdx = int32(idx)
2177 }
2178 return
2179 }
2180
2181
2182
2183 func (s *Uint64MemoTable) CopyValues(out interface{}) {
2184 s.CopyValuesSubset(0, out)
2185 }
2186
2187
2188
2189 func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) {
2190 s.tbl.CopyValuesSubset(start, out.([]uint64))
2191 }
2192
2193 func (s *Uint64MemoTable) WriteOut(out []byte) {
2194 s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out))
2195 }
2196
2197 func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) {
2198 s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out))
2199 }
2200
2201 func (s *Uint64MemoTable) WriteOutLE(out []byte) {
2202 s.tbl.WriteOut(out)
2203 }
2204
2205 func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) {
2206 s.tbl.WriteOutSubset(start, out)
2207 }
2208
2209
2210
2211 func (s *Uint64MemoTable) Get(val interface{}) (int, bool) {
2212
2213 h := hashInt(uint64(val.(uint64)), 0)
2214 if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok {
2215 return int(e.payload.memoIdx), ok
2216 }
2217 return KeyNotFound, false
2218 }
2219
2220
2221
2222
2223 func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
2224
2225 h := hashInt(uint64(val.(uint64)), 0)
2226 e, ok := s.tbl.Lookup(h, func(v uint64) bool {
2227 return val.(uint64) == v
2228 })
2229
2230 if ok {
2231 idx = int(e.payload.memoIdx)
2232 found = true
2233 } else {
2234 idx = s.Size()
2235 s.tbl.Insert(e, h, val.(uint64), int32(idx))
2236 }
2237 return
2238 }
2239
2240
2241 func (s *Uint64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
2242 panic("unimplemented")
2243 }
2244
2245 type payloadFloat32 struct {
2246 val float32
2247 memoIdx int32
2248 }
2249
2250 type entryFloat32 struct {
2251 h uint64
2252 payload payloadFloat32
2253 }
2254
2255 func (e entryFloat32) Valid() bool { return e.h != sentinel }
2256
2257
2258
2259
2260 type Float32HashTable struct {
2261 cap uint64
2262 capMask uint64
2263 size uint64
2264
2265 entries []entryFloat32
2266 }
2267
2268
2269
2270 func NewFloat32HashTable(cap uint64) *Float32HashTable {
2271 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
2272 ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
2273 ret.entries = make([]entryFloat32, initCap)
2274 return ret
2275 }
2276
2277
2278
2279
2280 func (h *Float32HashTable) Reset(cap uint64) {
2281 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
2282 h.capMask = h.cap - 1
2283 h.size = 0
2284 h.entries = make([]entryFloat32, h.cap)
2285 }
2286
2287
2288
2289 func (h *Float32HashTable) CopyValues(out []float32) {
2290 h.CopyValuesSubset(0, out)
2291 }
2292
2293
2294
2295 func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) {
2296 h.VisitEntries(func(e *entryFloat32) {
2297 idx := e.payload.memoIdx - int32(start)
2298 if idx >= 0 {
2299 out[idx] = e.payload.val
2300 }
2301 })
2302 }
2303
2304 func (h *Float32HashTable) WriteOut(out []byte) {
2305 h.WriteOutSubset(0, out)
2306 }
2307
2308 func (h *Float32HashTable) WriteOutSubset(start int, out []byte) {
2309 data := arrow.Float32Traits.CastFromBytes(out)
2310 h.VisitEntries(func(e *entryFloat32) {
2311 idx := e.payload.memoIdx - int32(start)
2312 if idx >= 0 {
2313 data[idx] = utils.ToLEFloat32(e.payload.val)
2314 }
2315 })
2316 }
2317
2318 func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
2319
2320 func (Float32HashTable) fixHash(v uint64) uint64 {
2321 if v == sentinel {
2322 return 42
2323 }
2324 return v
2325 }
2326
2327
2328
2329
2330 func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) {
2331 idx, ok := h.lookup(v, h.capMask, cmp)
2332 return &h.entries[idx], ok
2333 }
2334
2335 func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) {
2336 const perturbShift uint8 = 5
2337
2338 var (
2339 idx uint64
2340 perturb uint64
2341 e *entryFloat32
2342 )
2343
2344 v = h.fixHash(v)
2345 idx = v & szMask
2346 perturb = (v >> uint64(perturbShift)) + 1
2347
2348 for {
2349 e = &h.entries[idx]
2350 if e.h == v && cmp(e.payload.val) {
2351 return idx, true
2352 }
2353
2354 if e.h == sentinel {
2355 return idx, false
2356 }
2357
2358
2359
2360
2361 idx = (idx + perturb) & szMask
2362 perturb = (perturb >> uint64(perturbShift)) + 1
2363 }
2364 }
2365
2366 func (h *Float32HashTable) upsize(newcap uint64) error {
2367 newMask := newcap - 1
2368
2369 oldEntries := h.entries
2370 h.entries = make([]entryFloat32, newcap)
2371 for _, e := range oldEntries {
2372 if e.Valid() {
2373 idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false })
2374 h.entries[idx] = e
2375 }
2376 }
2377 h.cap = newcap
2378 h.capMask = newMask
2379 return nil
2380 }
2381
2382
2383
2384 func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error {
2385 e.h = h.fixHash(v)
2386 e.payload.val = val
2387 e.payload.memoIdx = memoIdx
2388 h.size++
2389
2390 if h.needUpsize() {
2391 h.upsize(h.cap * uint64(loadFactor) * 2)
2392 }
2393 return nil
2394 }
2395
2396
2397
2398 func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) {
2399 for _, e := range h.entries {
2400 if e.Valid() {
2401 visit(&e)
2402 }
2403 }
2404 }
2405
2406
2407
2408
2409 type Float32MemoTable struct {
2410 tbl *Float32HashTable
2411 nullIdx int32
2412 }
2413
2414
2415
2416 func NewFloat32MemoTable(num int64) *Float32MemoTable {
2417 return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound}
2418 }
2419
2420 func (Float32MemoTable) TypeTraits() TypeTraits {
2421 return arrow.Float32Traits
2422 }
2423
2424
2425 func (s *Float32MemoTable) Reset() {
2426 s.tbl.Reset(32)
2427 s.nullIdx = KeyNotFound
2428 }
2429
2430
2431
2432 func (s *Float32MemoTable) Size() int {
2433 sz := int(s.tbl.size)
2434 if _, ok := s.GetNull(); ok {
2435 sz++
2436 }
2437 return sz
2438 }
2439
2440
2441
2442 func (s *Float32MemoTable) GetNull() (int, bool) {
2443 return int(s.nullIdx), s.nullIdx != KeyNotFound
2444 }
2445
2446
2447
2448
2449 func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) {
2450 idx, found = s.GetNull()
2451 if !found {
2452 idx = s.Size()
2453 s.nullIdx = int32(idx)
2454 }
2455 return
2456 }
2457
2458
2459
2460 func (s *Float32MemoTable) CopyValues(out interface{}) {
2461 s.CopyValuesSubset(0, out)
2462 }
2463
2464
2465
2466 func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) {
2467 s.tbl.CopyValuesSubset(start, out.([]float32))
2468 }
2469
2470 func (s *Float32MemoTable) WriteOut(out []byte) {
2471 s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out))
2472 }
2473
2474 func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) {
2475 s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out))
2476 }
2477
2478 func (s *Float32MemoTable) WriteOutLE(out []byte) {
2479 s.tbl.WriteOut(out)
2480 }
2481
2482 func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) {
2483 s.tbl.WriteOutSubset(start, out)
2484 }
2485
2486
2487
2488 func (s *Float32MemoTable) Get(val interface{}) (int, bool) {
2489 var cmp func(float32) bool
2490
2491 if math.IsNaN(float64(val.(float32))) {
2492 cmp = isNan32Cmp
2493
2494
2495 val = float32(math.NaN())
2496 } else {
2497 cmp = func(v float32) bool { return val.(float32) == v }
2498 }
2499
2500 h := hashFloat32(val.(float32), 0)
2501 if e, ok := s.tbl.Lookup(h, cmp); ok {
2502 return int(e.payload.memoIdx), ok
2503 }
2504 return KeyNotFound, false
2505 }
2506
2507
2508
2509
2510 func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
2511
2512 var cmp func(float32) bool
2513
2514 if math.IsNaN(float64(val.(float32))) {
2515 cmp = isNan32Cmp
2516
2517
2518 val = float32(math.NaN())
2519 } else {
2520 cmp = func(v float32) bool { return val.(float32) == v }
2521 }
2522
2523 h := hashFloat32(val.(float32), 0)
2524 e, ok := s.tbl.Lookup(h, cmp)
2525
2526 if ok {
2527 idx = int(e.payload.memoIdx)
2528 found = true
2529 } else {
2530 idx = s.Size()
2531 s.tbl.Insert(e, h, val.(float32), int32(idx))
2532 }
2533 return
2534 }
2535
2536
2537 func (s *Float32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
2538 panic("unimplemented")
2539 }
2540
2541 type payloadFloat64 struct {
2542 val float64
2543 memoIdx int32
2544 }
2545
2546 type entryFloat64 struct {
2547 h uint64
2548 payload payloadFloat64
2549 }
2550
2551 func (e entryFloat64) Valid() bool { return e.h != sentinel }
2552
2553
2554
2555
2556 type Float64HashTable struct {
2557 cap uint64
2558 capMask uint64
2559 size uint64
2560
2561 entries []entryFloat64
2562 }
2563
2564
2565
2566 func NewFloat64HashTable(cap uint64) *Float64HashTable {
2567 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
2568 ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
2569 ret.entries = make([]entryFloat64, initCap)
2570 return ret
2571 }
2572
2573
2574
2575
2576 func (h *Float64HashTable) Reset(cap uint64) {
2577 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
2578 h.capMask = h.cap - 1
2579 h.size = 0
2580 h.entries = make([]entryFloat64, h.cap)
2581 }
2582
2583
2584
2585 func (h *Float64HashTable) CopyValues(out []float64) {
2586 h.CopyValuesSubset(0, out)
2587 }
2588
2589
2590
2591 func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) {
2592 h.VisitEntries(func(e *entryFloat64) {
2593 idx := e.payload.memoIdx - int32(start)
2594 if idx >= 0 {
2595 out[idx] = e.payload.val
2596 }
2597 })
2598 }
2599
2600 func (h *Float64HashTable) WriteOut(out []byte) {
2601 h.WriteOutSubset(0, out)
2602 }
2603
2604 func (h *Float64HashTable) WriteOutSubset(start int, out []byte) {
2605 data := arrow.Float64Traits.CastFromBytes(out)
2606 h.VisitEntries(func(e *entryFloat64) {
2607 idx := e.payload.memoIdx - int32(start)
2608 if idx >= 0 {
2609 data[idx] = utils.ToLEFloat64(e.payload.val)
2610 }
2611 })
2612 }
2613
2614 func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
2615
2616 func (Float64HashTable) fixHash(v uint64) uint64 {
2617 if v == sentinel {
2618 return 42
2619 }
2620 return v
2621 }
2622
2623
2624
2625
2626 func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) {
2627 idx, ok := h.lookup(v, h.capMask, cmp)
2628 return &h.entries[idx], ok
2629 }
2630
2631 func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) {
2632 const perturbShift uint8 = 5
2633
2634 var (
2635 idx uint64
2636 perturb uint64
2637 e *entryFloat64
2638 )
2639
2640 v = h.fixHash(v)
2641 idx = v & szMask
2642 perturb = (v >> uint64(perturbShift)) + 1
2643
2644 for {
2645 e = &h.entries[idx]
2646 if e.h == v && cmp(e.payload.val) {
2647 return idx, true
2648 }
2649
2650 if e.h == sentinel {
2651 return idx, false
2652 }
2653
2654
2655
2656
2657 idx = (idx + perturb) & szMask
2658 perturb = (perturb >> uint64(perturbShift)) + 1
2659 }
2660 }
2661
2662 func (h *Float64HashTable) upsize(newcap uint64) error {
2663 newMask := newcap - 1
2664
2665 oldEntries := h.entries
2666 h.entries = make([]entryFloat64, newcap)
2667 for _, e := range oldEntries {
2668 if e.Valid() {
2669 idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false })
2670 h.entries[idx] = e
2671 }
2672 }
2673 h.cap = newcap
2674 h.capMask = newMask
2675 return nil
2676 }
2677
2678
2679
2680 func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error {
2681 e.h = h.fixHash(v)
2682 e.payload.val = val
2683 e.payload.memoIdx = memoIdx
2684 h.size++
2685
2686 if h.needUpsize() {
2687 h.upsize(h.cap * uint64(loadFactor) * 2)
2688 }
2689 return nil
2690 }
2691
2692
2693
2694 func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) {
2695 for _, e := range h.entries {
2696 if e.Valid() {
2697 visit(&e)
2698 }
2699 }
2700 }
2701
2702
2703
2704
2705 type Float64MemoTable struct {
2706 tbl *Float64HashTable
2707 nullIdx int32
2708 }
2709
2710
2711
2712 func NewFloat64MemoTable(num int64) *Float64MemoTable {
2713 return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound}
2714 }
2715
2716 func (Float64MemoTable) TypeTraits() TypeTraits {
2717 return arrow.Float64Traits
2718 }
2719
2720
2721 func (s *Float64MemoTable) Reset() {
2722 s.tbl.Reset(32)
2723 s.nullIdx = KeyNotFound
2724 }
2725
2726
2727
2728 func (s *Float64MemoTable) Size() int {
2729 sz := int(s.tbl.size)
2730 if _, ok := s.GetNull(); ok {
2731 sz++
2732 }
2733 return sz
2734 }
2735
2736
2737
2738 func (s *Float64MemoTable) GetNull() (int, bool) {
2739 return int(s.nullIdx), s.nullIdx != KeyNotFound
2740 }
2741
2742
2743
2744
2745 func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) {
2746 idx, found = s.GetNull()
2747 if !found {
2748 idx = s.Size()
2749 s.nullIdx = int32(idx)
2750 }
2751 return
2752 }
2753
2754
2755
2756 func (s *Float64MemoTable) CopyValues(out interface{}) {
2757 s.CopyValuesSubset(0, out)
2758 }
2759
2760
2761
2762 func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) {
2763 s.tbl.CopyValuesSubset(start, out.([]float64))
2764 }
2765
2766 func (s *Float64MemoTable) WriteOut(out []byte) {
2767 s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out))
2768 }
2769
2770 func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) {
2771 s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out))
2772 }
2773
2774 func (s *Float64MemoTable) WriteOutLE(out []byte) {
2775 s.tbl.WriteOut(out)
2776 }
2777
2778 func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) {
2779 s.tbl.WriteOutSubset(start, out)
2780 }
2781
2782
2783
2784 func (s *Float64MemoTable) Get(val interface{}) (int, bool) {
2785 var cmp func(float64) bool
2786 if math.IsNaN(val.(float64)) {
2787 cmp = math.IsNaN
2788
2789
2790 val = math.NaN()
2791 } else {
2792 cmp = func(v float64) bool { return val.(float64) == v }
2793 }
2794
2795 h := hashFloat64(val.(float64), 0)
2796 if e, ok := s.tbl.Lookup(h, cmp); ok {
2797 return int(e.payload.memoIdx), ok
2798 }
2799 return KeyNotFound, false
2800 }
2801
2802
2803
2804
2805 func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
2806
2807 var cmp func(float64) bool
2808 if math.IsNaN(val.(float64)) {
2809 cmp = math.IsNaN
2810
2811
2812 val = math.NaN()
2813 } else {
2814 cmp = func(v float64) bool { return val.(float64) == v }
2815 }
2816
2817 h := hashFloat64(val.(float64), 0)
2818 e, ok := s.tbl.Lookup(h, cmp)
2819
2820 if ok {
2821 idx = int(e.payload.memoIdx)
2822 found = true
2823 } else {
2824 idx = s.Size()
2825 s.tbl.Insert(e, h, val.(float64), int32(idx))
2826 }
2827 return
2828 }
2829
2830
2831 func (s *Float64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
2832 panic("unimplemented")
2833 }
2834
View as plain text