1
2
3
4
5 package zstd
6
7 import "fmt"
8
9 const (
10 betterLongTableBits = 19
11 betterLongTableSize = 1 << betterLongTableBits
12 betterLongLen = 8
13
14
15
16
17
18 betterShortTableBits = 13
19 betterShortTableSize = 1 << betterShortTableBits
20 betterShortLen = 5
21
22 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits)
23 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt
24
25 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits)
26 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt
27 )
28
29 type prevEntry struct {
30 offset int32
31 prev int32
32 }
33
34
35
36
37
38
39
40 type betterFastEncoder struct {
41 fastBase
42 table [betterShortTableSize]tableEntry
43 longTable [betterLongTableSize]prevEntry
44 }
45
46 type betterFastEncoderDict struct {
47 betterFastEncoder
48 dictTable []tableEntry
49 dictLongTable []prevEntry
50 shortTableShardDirty [betterShortTableShardCnt]bool
51 longTableShardDirty [betterLongTableShardCnt]bool
52 allDirty bool
53 }
54
55
56 func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
57 const (
58
59
60 inputMargin = 8 + 2
61 minNonLiteralBlockSize = 16
62 )
63
64
65 for e.cur >= e.bufferReset-int32(len(e.hist)) {
66 if len(e.hist) == 0 {
67 e.table = [betterShortTableSize]tableEntry{}
68 e.longTable = [betterLongTableSize]prevEntry{}
69 e.cur = e.maxMatchOff
70 break
71 }
72
73 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
74 for i := range e.table[:] {
75 v := e.table[i].offset
76 if v < minOff {
77 v = 0
78 } else {
79 v = v - e.cur + e.maxMatchOff
80 }
81 e.table[i].offset = v
82 }
83 for i := range e.longTable[:] {
84 v := e.longTable[i].offset
85 v2 := e.longTable[i].prev
86 if v < minOff {
87 v = 0
88 v2 = 0
89 } else {
90 v = v - e.cur + e.maxMatchOff
91 if v2 < minOff {
92 v2 = 0
93 } else {
94 v2 = v2 - e.cur + e.maxMatchOff
95 }
96 }
97 e.longTable[i] = prevEntry{
98 offset: v,
99 prev: v2,
100 }
101 }
102 e.cur = e.maxMatchOff
103 break
104 }
105
106 s := e.addBlock(src)
107 blk.size = len(src)
108
109
110 if len(src) > zstdMinMatch {
111 ml := matchLen(src[1:], src)
112 if ml == len(src)-1 {
113 blk.literals = append(blk.literals, src[0])
114 blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
115 return
116 }
117 }
118
119 if len(src) < minNonLiteralBlockSize {
120 blk.extraLits = len(src)
121 blk.literals = blk.literals[:len(src)]
122 copy(blk.literals, src)
123 return
124 }
125
126
127 src = e.hist
128 sLimit := int32(len(src)) - inputMargin
129
130
131 const stepSize = 1
132
133 const kSearchStrength = 9
134
135
136 nextEmit := s
137 cv := load6432(src, s)
138
139
140 offset1 := int32(blk.recentOffsets[0])
141 offset2 := int32(blk.recentOffsets[1])
142
143 addLiterals := func(s *seq, until int32) {
144 if until == nextEmit {
145 return
146 }
147 blk.literals = append(blk.literals, src[nextEmit:until]...)
148 s.litLen = uint32(until - nextEmit)
149 }
150 if debugEncoder {
151 println("recent offsets:", blk.recentOffsets)
152 }
153
154 encodeLoop:
155 for {
156 var t int32
157
158 canRepeat := len(blk.sequences) > 2
159 var matched, index0 int32
160
161 for {
162 if debugAsserts && canRepeat && offset1 == 0 {
163 panic("offset0 was 0")
164 }
165
166 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
167 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
168 candidateL := e.longTable[nextHashL]
169 candidateS := e.table[nextHashS]
170
171 const repOff = 1
172 repIndex := s - offset1 + repOff
173 off := s + e.cur
174 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
175 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
176 index0 = s + 1
177
178 if canRepeat {
179 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
180
181 var seq seq
182 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
183
184 seq.matchLen = uint32(lenght - zstdMinMatch)
185
186
187
188 start := s + repOff
189
190
191 startLimit := nextEmit + 1
192
193 tMin := s - e.maxMatchOff
194 if tMin < 0 {
195 tMin = 0
196 }
197 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
198 repIndex--
199 start--
200 seq.matchLen++
201 }
202 addLiterals(&seq, start)
203
204
205 seq.offset = 1
206 if debugSequences {
207 println("repeat sequence", seq, "next s:", s)
208 }
209 blk.sequences = append(blk.sequences, seq)
210
211
212 index0 := s + repOff
213 s += lenght + repOff
214
215 nextEmit = s
216 if s >= sLimit {
217 if debugEncoder {
218 println("repeat ended", s, lenght)
219
220 }
221 break encodeLoop
222 }
223
224 for index0 < s-1 {
225 cv0 := load6432(src, index0)
226 cv1 := cv0 >> 8
227 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
228 off := index0 + e.cur
229 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
230 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
231 index0 += 2
232 }
233 cv = load6432(src, s)
234 continue
235 }
236 const repOff2 = 1
237
238
239
240
241 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
242
243 var seq seq
244 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
245
246 seq.matchLen = uint32(lenght - zstdMinMatch)
247
248
249
250 start := s + repOff2
251
252
253 startLimit := nextEmit + 1
254
255 tMin := s - e.maxMatchOff
256 if tMin < 0 {
257 tMin = 0
258 }
259 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
260 repIndex--
261 start--
262 seq.matchLen++
263 }
264 addLiterals(&seq, start)
265
266
267 seq.offset = 2
268 if debugSequences {
269 println("repeat sequence 2", seq, "next s:", s)
270 }
271 blk.sequences = append(blk.sequences, seq)
272
273 s += lenght + repOff2
274 nextEmit = s
275 if s >= sLimit {
276 if debugEncoder {
277 println("repeat ended", s, lenght)
278
279 }
280 break encodeLoop
281 }
282
283
284 for index0 < s-1 {
285 cv0 := load6432(src, index0)
286 cv1 := cv0 >> 8
287 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
288 off := index0 + e.cur
289 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
290 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
291 index0 += 2
292 }
293 cv = load6432(src, s)
294
295 offset1, offset2 = offset2, offset1
296 continue
297 }
298 }
299
300 coffsetL := candidateL.offset - e.cur
301 coffsetLP := candidateL.prev - e.cur
302
303
304 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
305
306 matched = e.matchlen(s+8, coffsetL+8, src) + 8
307 t = coffsetL
308 if debugAsserts && s <= t {
309 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
310 }
311 if debugAsserts && s-t > e.maxMatchOff {
312 panic("s - t >e.maxMatchOff")
313 }
314 if debugMatches {
315 println("long match")
316 }
317
318 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
319
320 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
321 if prevMatch > matched {
322 matched = prevMatch
323 t = coffsetLP
324 }
325 if debugAsserts && s <= t {
326 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
327 }
328 if debugAsserts && s-t > e.maxMatchOff {
329 panic("s - t >e.maxMatchOff")
330 }
331 if debugMatches {
332 println("long match")
333 }
334 }
335 break
336 }
337
338
339 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
340
341 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
342 t = coffsetLP
343 if debugAsserts && s <= t {
344 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
345 }
346 if debugAsserts && s-t > e.maxMatchOff {
347 panic("s - t >e.maxMatchOff")
348 }
349 if debugMatches {
350 println("long match")
351 }
352 break
353 }
354
355 coffsetS := candidateS.offset - e.cur
356
357
358 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
359
360 matched = e.matchlen(s+4, coffsetS+4, src) + 4
361
362
363 const checkAt = 1
364 cv := load6432(src, s+checkAt)
365 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
366 candidateL = e.longTable[nextHashL]
367 coffsetL = candidateL.offset - e.cur
368
369
370 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
371 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
372
373 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
374 if matchedNext > matched {
375 t = coffsetL
376 s += checkAt
377 matched = matchedNext
378 if debugMatches {
379 println("long match (after short)")
380 }
381 break
382 }
383 }
384
385
386 coffsetL = candidateL.prev - e.cur
387 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
388
389 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
390 if matchedNext > matched {
391 t = coffsetL
392 s += checkAt
393 matched = matchedNext
394 if debugMatches {
395 println("prev long match (after short)")
396 }
397 break
398 }
399 }
400 t = coffsetS
401 if debugAsserts && s <= t {
402 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
403 }
404 if debugAsserts && s-t > e.maxMatchOff {
405 panic("s - t >e.maxMatchOff")
406 }
407 if debugAsserts && t < 0 {
408 panic("t<0")
409 }
410 if debugMatches {
411 println("short match")
412 }
413 break
414 }
415
416
417 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
418 if s >= sLimit {
419 break encodeLoop
420 }
421 cv = load6432(src, s)
422 }
423
424
425 if s+matched < sLimit {
426
427
428
429
430 const skipBeginning = 3
431
432 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
433 s2 := s + skipBeginning
434 cv := load3232(src, s2)
435 candidateL := e.longTable[nextHashL]
436 coffsetL := candidateL.offset - e.cur - matched + skipBeginning
437 if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
438
439 matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
440 if matchedNext > matched {
441 t = coffsetL
442 s = s2
443 matched = matchedNext
444 if debugMatches {
445 println("long match at end-of-match")
446 }
447 }
448 }
449
450
451 if true {
452 coffsetL = candidateL.prev - e.cur - matched + skipBeginning
453 if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
454
455 matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
456 if matchedNext > matched {
457 t = coffsetL
458 s = s2
459 matched = matchedNext
460 if debugMatches {
461 println("prev long match at end-of-match")
462 }
463 }
464 }
465 }
466 }
467
468 offset2 = offset1
469 offset1 = s - t
470
471 if debugAsserts && s <= t {
472 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
473 }
474
475 if debugAsserts && canRepeat && int(offset1) > len(src) {
476 panic("invalid offset")
477 }
478
479
480 l := matched
481
482
483 tMin := s - e.maxMatchOff
484 if tMin < 0 {
485 tMin = 0
486 }
487 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
488 s--
489 t--
490 l++
491 }
492
493
494 var seq seq
495 seq.litLen = uint32(s - nextEmit)
496 seq.matchLen = uint32(l - zstdMinMatch)
497 if seq.litLen > 0 {
498 blk.literals = append(blk.literals, src[nextEmit:s]...)
499 }
500 seq.offset = uint32(s-t) + 3
501 s += l
502 if debugSequences {
503 println("sequence", seq, "next s:", s)
504 }
505 blk.sequences = append(blk.sequences, seq)
506 nextEmit = s
507 if s >= sLimit {
508 break encodeLoop
509 }
510
511
512 off := index0 + e.cur
513 for index0 < s-1 {
514 cv0 := load6432(src, index0)
515 cv1 := cv0 >> 8
516 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
517 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
518 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
519 index0 += 2
520 off += 2
521 }
522
523 cv = load6432(src, s)
524 if !canRepeat {
525 continue
526 }
527
528
529 for {
530 o2 := s - offset2
531 if load3232(src, o2) != uint32(cv) {
532
533 break
534 }
535
536
537 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
538 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
539
540
541
542 l := 4 + e.matchlen(s+4, o2+4, src)
543
544 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
545 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
546 seq.matchLen = uint32(l) - zstdMinMatch
547 seq.litLen = 0
548
549
550 seq.offset = 1
551 s += l
552 nextEmit = s
553 if debugSequences {
554 println("sequence", seq, "next s:", s)
555 }
556 blk.sequences = append(blk.sequences, seq)
557
558
559 offset1, offset2 = offset2, offset1
560 if s >= sLimit {
561
562 break encodeLoop
563 }
564 cv = load6432(src, s)
565 }
566 }
567
568 if int(nextEmit) < len(src) {
569 blk.literals = append(blk.literals, src[nextEmit:]...)
570 blk.extraLits = len(src) - int(nextEmit)
571 }
572 blk.recentOffsets[0] = uint32(offset1)
573 blk.recentOffsets[1] = uint32(offset2)
574 if debugEncoder {
575 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
576 }
577 }
578
579
580
581
582 func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
583 e.ensureHist(len(src))
584 e.Encode(blk, src)
585 }
586
587
588 func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
589 const (
590
591
592 inputMargin = 8 + 2
593 minNonLiteralBlockSize = 16
594 )
595
596
597 for e.cur >= e.bufferReset-int32(len(e.hist)) {
598 if len(e.hist) == 0 {
599 for i := range e.table[:] {
600 e.table[i] = tableEntry{}
601 }
602 for i := range e.longTable[:] {
603 e.longTable[i] = prevEntry{}
604 }
605 e.cur = e.maxMatchOff
606 e.allDirty = true
607 break
608 }
609
610 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
611 for i := range e.table[:] {
612 v := e.table[i].offset
613 if v < minOff {
614 v = 0
615 } else {
616 v = v - e.cur + e.maxMatchOff
617 }
618 e.table[i].offset = v
619 }
620 for i := range e.longTable[:] {
621 v := e.longTable[i].offset
622 v2 := e.longTable[i].prev
623 if v < minOff {
624 v = 0
625 v2 = 0
626 } else {
627 v = v - e.cur + e.maxMatchOff
628 if v2 < minOff {
629 v2 = 0
630 } else {
631 v2 = v2 - e.cur + e.maxMatchOff
632 }
633 }
634 e.longTable[i] = prevEntry{
635 offset: v,
636 prev: v2,
637 }
638 }
639 e.allDirty = true
640 e.cur = e.maxMatchOff
641 break
642 }
643
644 s := e.addBlock(src)
645 blk.size = len(src)
646 if len(src) < minNonLiteralBlockSize {
647 blk.extraLits = len(src)
648 blk.literals = blk.literals[:len(src)]
649 copy(blk.literals, src)
650 return
651 }
652
653
654 src = e.hist
655 sLimit := int32(len(src)) - inputMargin
656
657
658 const stepSize = 1
659
660 const kSearchStrength = 9
661
662
663 nextEmit := s
664 cv := load6432(src, s)
665
666
667 offset1 := int32(blk.recentOffsets[0])
668 offset2 := int32(blk.recentOffsets[1])
669
670 addLiterals := func(s *seq, until int32) {
671 if until == nextEmit {
672 return
673 }
674 blk.literals = append(blk.literals, src[nextEmit:until]...)
675 s.litLen = uint32(until - nextEmit)
676 }
677 if debugEncoder {
678 println("recent offsets:", blk.recentOffsets)
679 }
680
681 encodeLoop:
682 for {
683 var t int32
684
685 canRepeat := len(blk.sequences) > 2
686 var matched, index0 int32
687
688 for {
689 if debugAsserts && canRepeat && offset1 == 0 {
690 panic("offset0 was 0")
691 }
692
693 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
694 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
695 candidateL := e.longTable[nextHashL]
696 candidateS := e.table[nextHashS]
697
698 const repOff = 1
699 repIndex := s - offset1 + repOff
700 off := s + e.cur
701 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
702 e.markLongShardDirty(nextHashL)
703 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
704 e.markShortShardDirty(nextHashS)
705 index0 = s + 1
706
707 if canRepeat {
708 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
709
710 var seq seq
711 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
712
713 seq.matchLen = uint32(lenght - zstdMinMatch)
714
715
716
717 start := s + repOff
718
719
720 startLimit := nextEmit + 1
721
722 tMin := s - e.maxMatchOff
723 if tMin < 0 {
724 tMin = 0
725 }
726 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
727 repIndex--
728 start--
729 seq.matchLen++
730 }
731 addLiterals(&seq, start)
732
733
734 seq.offset = 1
735 if debugSequences {
736 println("repeat sequence", seq, "next s:", s)
737 }
738 blk.sequences = append(blk.sequences, seq)
739
740
741 s += lenght + repOff
742
743 nextEmit = s
744 if s >= sLimit {
745 if debugEncoder {
746 println("repeat ended", s, lenght)
747
748 }
749 break encodeLoop
750 }
751
752 for index0 < s-1 {
753 cv0 := load6432(src, index0)
754 cv1 := cv0 >> 8
755 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
756 off := index0 + e.cur
757 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
758 e.markLongShardDirty(h0)
759 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
760 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
761 e.markShortShardDirty(h1)
762 index0 += 2
763 }
764 cv = load6432(src, s)
765 continue
766 }
767 const repOff2 = 1
768
769
770
771
772 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
773
774 var seq seq
775 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
776
777 seq.matchLen = uint32(lenght - zstdMinMatch)
778
779
780
781 start := s + repOff2
782
783
784 startLimit := nextEmit + 1
785
786 tMin := s - e.maxMatchOff
787 if tMin < 0 {
788 tMin = 0
789 }
790 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
791 repIndex--
792 start--
793 seq.matchLen++
794 }
795 addLiterals(&seq, start)
796
797
798 seq.offset = 2
799 if debugSequences {
800 println("repeat sequence 2", seq, "next s:", s)
801 }
802 blk.sequences = append(blk.sequences, seq)
803
804 s += lenght + repOff2
805 nextEmit = s
806 if s >= sLimit {
807 if debugEncoder {
808 println("repeat ended", s, lenght)
809
810 }
811 break encodeLoop
812 }
813
814
815 for index0 < s-1 {
816 cv0 := load6432(src, index0)
817 cv1 := cv0 >> 8
818 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
819 off := index0 + e.cur
820 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
821 e.markLongShardDirty(h0)
822 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
823 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
824 e.markShortShardDirty(h1)
825 index0 += 2
826 }
827 cv = load6432(src, s)
828
829 offset1, offset2 = offset2, offset1
830 continue
831 }
832 }
833
834 coffsetL := candidateL.offset - e.cur
835 coffsetLP := candidateL.prev - e.cur
836
837
838 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
839
840 matched = e.matchlen(s+8, coffsetL+8, src) + 8
841 t = coffsetL
842 if debugAsserts && s <= t {
843 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
844 }
845 if debugAsserts && s-t > e.maxMatchOff {
846 panic("s - t >e.maxMatchOff")
847 }
848 if debugMatches {
849 println("long match")
850 }
851
852 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
853
854 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
855 if prevMatch > matched {
856 matched = prevMatch
857 t = coffsetLP
858 }
859 if debugAsserts && s <= t {
860 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
861 }
862 if debugAsserts && s-t > e.maxMatchOff {
863 panic("s - t >e.maxMatchOff")
864 }
865 if debugMatches {
866 println("long match")
867 }
868 }
869 break
870 }
871
872
873 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
874
875 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
876 t = coffsetLP
877 if debugAsserts && s <= t {
878 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
879 }
880 if debugAsserts && s-t > e.maxMatchOff {
881 panic("s - t >e.maxMatchOff")
882 }
883 if debugMatches {
884 println("long match")
885 }
886 break
887 }
888
889 coffsetS := candidateS.offset - e.cur
890
891
892 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
893
894 matched = e.matchlen(s+4, coffsetS+4, src) + 4
895
896
897 const checkAt = 1
898 cv := load6432(src, s+checkAt)
899 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
900 candidateL = e.longTable[nextHashL]
901 coffsetL = candidateL.offset - e.cur
902
903
904 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
905 e.markLongShardDirty(nextHashL)
906 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
907
908 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
909 if matchedNext > matched {
910 t = coffsetL
911 s += checkAt
912 matched = matchedNext
913 if debugMatches {
914 println("long match (after short)")
915 }
916 break
917 }
918 }
919
920
921 coffsetL = candidateL.prev - e.cur
922 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
923
924 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
925 if matchedNext > matched {
926 t = coffsetL
927 s += checkAt
928 matched = matchedNext
929 if debugMatches {
930 println("prev long match (after short)")
931 }
932 break
933 }
934 }
935 t = coffsetS
936 if debugAsserts && s <= t {
937 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
938 }
939 if debugAsserts && s-t > e.maxMatchOff {
940 panic("s - t >e.maxMatchOff")
941 }
942 if debugAsserts && t < 0 {
943 panic("t<0")
944 }
945 if debugMatches {
946 println("short match")
947 }
948 break
949 }
950
951
952 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
953 if s >= sLimit {
954 break encodeLoop
955 }
956 cv = load6432(src, s)
957 }
958
959 if s+matched < sLimit {
960 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
961 cv := load3232(src, s)
962 candidateL := e.longTable[nextHashL]
963 coffsetL := candidateL.offset - e.cur - matched
964 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
965
966 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
967 if matchedNext > matched {
968 t = coffsetL
969 matched = matchedNext
970 if debugMatches {
971 println("long match at end-of-match")
972 }
973 }
974 }
975
976
977 if true {
978 coffsetL = candidateL.prev - e.cur - matched
979 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
980
981 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
982 if matchedNext > matched {
983 t = coffsetL
984 matched = matchedNext
985 if debugMatches {
986 println("prev long match at end-of-match")
987 }
988 }
989 }
990 }
991 }
992
993 offset2 = offset1
994 offset1 = s - t
995
996 if debugAsserts && s <= t {
997 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
998 }
999
1000 if debugAsserts && canRepeat && int(offset1) > len(src) {
1001 panic("invalid offset")
1002 }
1003
1004
1005 l := matched
1006
1007
1008 tMin := s - e.maxMatchOff
1009 if tMin < 0 {
1010 tMin = 0
1011 }
1012 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
1013 s--
1014 t--
1015 l++
1016 }
1017
1018
1019 var seq seq
1020 seq.litLen = uint32(s - nextEmit)
1021 seq.matchLen = uint32(l - zstdMinMatch)
1022 if seq.litLen > 0 {
1023 blk.literals = append(blk.literals, src[nextEmit:s]...)
1024 }
1025 seq.offset = uint32(s-t) + 3
1026 s += l
1027 if debugSequences {
1028 println("sequence", seq, "next s:", s)
1029 }
1030 blk.sequences = append(blk.sequences, seq)
1031 nextEmit = s
1032 if s >= sLimit {
1033 break encodeLoop
1034 }
1035
1036
1037 off := index0 + e.cur
1038 for index0 < s-1 {
1039 cv0 := load6432(src, index0)
1040 cv1 := cv0 >> 8
1041 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
1042 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
1043 e.markLongShardDirty(h0)
1044 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
1045 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
1046 e.markShortShardDirty(h1)
1047 index0 += 2
1048 off += 2
1049 }
1050
1051 cv = load6432(src, s)
1052 if !canRepeat {
1053 continue
1054 }
1055
1056
1057 for {
1058 o2 := s - offset2
1059 if load3232(src, o2) != uint32(cv) {
1060
1061 break
1062 }
1063
1064
1065 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
1066 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
1067
1068
1069
1070 l := 4 + e.matchlen(s+4, o2+4, src)
1071
1072 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
1073 e.markLongShardDirty(nextHashL)
1074 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
1075 e.markShortShardDirty(nextHashS)
1076 seq.matchLen = uint32(l) - zstdMinMatch
1077 seq.litLen = 0
1078
1079
1080 seq.offset = 1
1081 s += l
1082 nextEmit = s
1083 if debugSequences {
1084 println("sequence", seq, "next s:", s)
1085 }
1086 blk.sequences = append(blk.sequences, seq)
1087
1088
1089 offset1, offset2 = offset2, offset1
1090 if s >= sLimit {
1091
1092 break encodeLoop
1093 }
1094 cv = load6432(src, s)
1095 }
1096 }
1097
1098 if int(nextEmit) < len(src) {
1099 blk.literals = append(blk.literals, src[nextEmit:]...)
1100 blk.extraLits = len(src) - int(nextEmit)
1101 }
1102 blk.recentOffsets[0] = uint32(offset1)
1103 blk.recentOffsets[1] = uint32(offset2)
1104 if debugEncoder {
1105 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1106 }
1107 }
1108
1109
1110 func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
1111 e.resetBase(d, singleBlock)
1112 if d != nil {
1113 panic("betterFastEncoder: Reset with dict")
1114 }
1115 }
1116
1117
1118 func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
1119 e.resetBase(d, singleBlock)
1120 if d == nil {
1121 return
1122 }
1123
1124 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
1125 if len(e.dictTable) != len(e.table) {
1126 e.dictTable = make([]tableEntry, len(e.table))
1127 }
1128 end := int32(len(d.content)) - 8 + e.maxMatchOff
1129 for i := e.maxMatchOff; i < end; i += 4 {
1130 const hashLog = betterShortTableBits
1131
1132 cv := load6432(d.content, i-e.maxMatchOff)
1133 nextHash := hashLen(cv, hashLog, betterShortLen)
1134 nextHash1 := hashLen(cv>>8, hashLog, betterShortLen)
1135 nextHash2 := hashLen(cv>>16, hashLog, betterShortLen)
1136 nextHash3 := hashLen(cv>>24, hashLog, betterShortLen)
1137 e.dictTable[nextHash] = tableEntry{
1138 val: uint32(cv),
1139 offset: i,
1140 }
1141 e.dictTable[nextHash1] = tableEntry{
1142 val: uint32(cv >> 8),
1143 offset: i + 1,
1144 }
1145 e.dictTable[nextHash2] = tableEntry{
1146 val: uint32(cv >> 16),
1147 offset: i + 2,
1148 }
1149 e.dictTable[nextHash3] = tableEntry{
1150 val: uint32(cv >> 24),
1151 offset: i + 3,
1152 }
1153 }
1154 e.lastDictID = d.id
1155 e.allDirty = true
1156 }
1157
1158
1159 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1160 if len(e.dictLongTable) != len(e.longTable) {
1161 e.dictLongTable = make([]prevEntry, len(e.longTable))
1162 }
1163 if len(d.content) >= 8 {
1164 cv := load6432(d.content, 0)
1165 h := hashLen(cv, betterLongTableBits, betterLongLen)
1166 e.dictLongTable[h] = prevEntry{
1167 offset: e.maxMatchOff,
1168 prev: e.dictLongTable[h].offset,
1169 }
1170
1171 end := int32(len(d.content)) - 8 + e.maxMatchOff
1172 off := 8
1173 for i := e.maxMatchOff + 1; i < end; i++ {
1174 cv = cv>>8 | (uint64(d.content[off]) << 56)
1175 h := hashLen(cv, betterLongTableBits, betterLongLen)
1176 e.dictLongTable[h] = prevEntry{
1177 offset: i,
1178 prev: e.dictLongTable[h].offset,
1179 }
1180 off++
1181 }
1182 }
1183 e.lastDictID = d.id
1184 e.allDirty = true
1185 }
1186
1187
1188 {
1189 dirtyShardCnt := 0
1190 if !e.allDirty {
1191 for i := range e.shortTableShardDirty {
1192 if e.shortTableShardDirty[i] {
1193 dirtyShardCnt++
1194 }
1195 }
1196 }
1197 const shardCnt = betterShortTableShardCnt
1198 const shardSize = betterShortTableShardSize
1199 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1200 copy(e.table[:], e.dictTable)
1201 for i := range e.shortTableShardDirty {
1202 e.shortTableShardDirty[i] = false
1203 }
1204 } else {
1205 for i := range e.shortTableShardDirty {
1206 if !e.shortTableShardDirty[i] {
1207 continue
1208 }
1209
1210 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1211 e.shortTableShardDirty[i] = false
1212 }
1213 }
1214 }
1215 {
1216 dirtyShardCnt := 0
1217 if !e.allDirty {
1218 for i := range e.shortTableShardDirty {
1219 if e.shortTableShardDirty[i] {
1220 dirtyShardCnt++
1221 }
1222 }
1223 }
1224 const shardCnt = betterLongTableShardCnt
1225 const shardSize = betterLongTableShardSize
1226 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1227 copy(e.longTable[:], e.dictLongTable)
1228 for i := range e.longTableShardDirty {
1229 e.longTableShardDirty[i] = false
1230 }
1231 } else {
1232 for i := range e.longTableShardDirty {
1233 if !e.longTableShardDirty[i] {
1234 continue
1235 }
1236
1237 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
1238 e.longTableShardDirty[i] = false
1239 }
1240 }
1241 }
1242 e.cur = e.maxMatchOff
1243 e.allDirty = false
1244 }
1245
1246 func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
1247 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
1248 }
1249
1250 func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
1251 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
1252 }
1253
View as plain text