1
2
3
4
5
6
7
8 package flate
9
10 import (
11 "bufio"
12 "compress/flate"
13 "fmt"
14 "io"
15 "math/bits"
16 "sync"
17 )
18
19 const (
20 maxCodeLen = 16
21 maxCodeLenMask = 15
22
23
24
25 maxNumLit = 286
26 maxNumDist = 30
27 numCodes = 19
28
29 debugDecode = false
30 )
31
32
33 type lengthExtra struct {
34 length, extra uint8
35 }
36
37 var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
38
39 var bitMask32 = [32]uint32{
40 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
41 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
42 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
43 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
44 }
45
46
47 var fixedOnce sync.Once
48 var fixedHuffmanDecoder huffmanDecoder
49
50
51 type CorruptInputError = flate.CorruptInputError
52
53
54 type InternalError string
55
56 func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
57
58
59
60
61 type ReadError = flate.ReadError
62
63
64
65
66 type WriteError = flate.WriteError
67
68
69
70
71 type Resetter interface {
72
73
74 Reset(r io.Reader, dict []byte) error
75 }
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 const (
98 huffmanChunkBits = 9
99 huffmanNumChunks = 1 << huffmanChunkBits
100 huffmanCountMask = 15
101 huffmanValueShift = 4
102 )
103
104 type huffmanDecoder struct {
105 maxRead int
106 chunks *[huffmanNumChunks]uint16
107 links [][]uint16
108 linkMask uint32
109 }
110
111
112
113
114
115
116 func (h *huffmanDecoder) init(lengths []int) bool {
117
118
119
120 const sanity = false
121
122 if h.chunks == nil {
123 h.chunks = new([huffmanNumChunks]uint16)
124 }
125
126 if h.maxRead != 0 {
127 *h = huffmanDecoder{chunks: h.chunks, links: h.links}
128 }
129
130
131
132 var count [maxCodeLen]int
133 var min, max int
134 for _, n := range lengths {
135 if n == 0 {
136 continue
137 }
138 if min == 0 || n < min {
139 min = n
140 }
141 if n > max {
142 max = n
143 }
144 count[n&maxCodeLenMask]++
145 }
146
147
148
149
150
151
152
153
154 if max == 0 {
155 return true
156 }
157
158 code := 0
159 var nextcode [maxCodeLen]int
160 for i := min; i <= max; i++ {
161 code <<= 1
162 nextcode[i&maxCodeLenMask] = code
163 code += count[i&maxCodeLenMask]
164 }
165
166
167
168
169
170
171 if code != 1<<uint(max) && !(code == 1 && max == 1) {
172 if debugDecode {
173 fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
174 }
175 return false
176 }
177
178 h.maxRead = min
179
180 chunks := h.chunks[:]
181 for i := range chunks {
182 chunks[i] = 0
183 }
184
185 if max > huffmanChunkBits {
186 numLinks := 1 << (uint(max) - huffmanChunkBits)
187 h.linkMask = uint32(numLinks - 1)
188
189
190 link := nextcode[huffmanChunkBits+1] >> 1
191 if cap(h.links) < huffmanNumChunks-link {
192 h.links = make([][]uint16, huffmanNumChunks-link)
193 } else {
194 h.links = h.links[:huffmanNumChunks-link]
195 }
196 for j := uint(link); j < huffmanNumChunks; j++ {
197 reverse := int(bits.Reverse16(uint16(j)))
198 reverse >>= uint(16 - huffmanChunkBits)
199 off := j - uint(link)
200 if sanity && h.chunks[reverse] != 0 {
201 panic("impossible: overwriting existing chunk")
202 }
203 h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
204 if cap(h.links[off]) < numLinks {
205 h.links[off] = make([]uint16, numLinks)
206 } else {
207 h.links[off] = h.links[off][:numLinks]
208 }
209 }
210 } else {
211 h.links = h.links[:0]
212 }
213
214 for i, n := range lengths {
215 if n == 0 {
216 continue
217 }
218 code := nextcode[n]
219 nextcode[n]++
220 chunk := uint16(i<<huffmanValueShift | n)
221 reverse := int(bits.Reverse16(uint16(code)))
222 reverse >>= uint(16 - n)
223 if n <= huffmanChunkBits {
224 for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
225
226
227
228
229
230 if sanity && h.chunks[off] != 0 {
231 panic("impossible: overwriting existing chunk")
232 }
233 h.chunks[off] = chunk
234 }
235 } else {
236 j := reverse & (huffmanNumChunks - 1)
237 if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
238
239
240 panic("impossible: not an indirect chunk")
241 }
242 value := h.chunks[j] >> huffmanValueShift
243 linktab := h.links[value]
244 reverse >>= huffmanChunkBits
245 for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
246 if sanity && linktab[off] != 0 {
247 panic("impossible: overwriting existing chunk")
248 }
249 linktab[off] = chunk
250 }
251 }
252 }
253
254 if sanity {
255
256
257
258 for i, chunk := range h.chunks {
259 if chunk == 0 {
260
261
262
263 if code == 1 && i%2 == 1 {
264 continue
265 }
266 panic("impossible: missing chunk")
267 }
268 }
269 for _, linktab := range h.links {
270 for _, chunk := range linktab {
271 if chunk == 0 {
272 panic("impossible: missing chunk")
273 }
274 }
275 }
276 }
277
278 return true
279 }
280
281
282
283
284 type Reader interface {
285 io.Reader
286 io.ByteReader
287 }
288
289 type step uint8
290
291 const (
292 copyData step = iota + 1
293 nextBlock
294 huffmanBytesBuffer
295 huffmanBytesReader
296 huffmanBufioReader
297 huffmanStringsReader
298 huffmanGenericReader
299 )
300
301
302 type decompressor struct {
303
304 r Reader
305 roffset int64
306
307
308 h1, h2 huffmanDecoder
309
310
311 bits *[maxNumLit + maxNumDist]int
312 codebits *[numCodes]int
313
314
315 dict dictDecoder
316
317
318
319 step step
320 stepState int
321 err error
322 toRead []byte
323 hl, hd *huffmanDecoder
324 copyLen int
325 copyDist int
326
327
328 buf [4]byte
329
330
331 b uint32
332
333 nb uint
334 final bool
335 }
336
337 func (f *decompressor) nextBlock() {
338 for f.nb < 1+2 {
339 if f.err = f.moreBits(); f.err != nil {
340 return
341 }
342 }
343 f.final = f.b&1 == 1
344 f.b >>= 1
345 typ := f.b & 3
346 f.b >>= 2
347 f.nb -= 1 + 2
348 switch typ {
349 case 0:
350 f.dataBlock()
351 if debugDecode {
352 fmt.Println("stored block")
353 }
354 case 1:
355
356 f.hl = &fixedHuffmanDecoder
357 f.hd = nil
358 f.huffmanBlockDecoder()
359 if debugDecode {
360 fmt.Println("predefinied huffman block")
361 }
362 case 2:
363
364 if f.err = f.readHuffman(); f.err != nil {
365 break
366 }
367 f.hl = &f.h1
368 f.hd = &f.h2
369 f.huffmanBlockDecoder()
370 if debugDecode {
371 fmt.Println("dynamic huffman block")
372 }
373 default:
374
375 if debugDecode {
376 fmt.Println("reserved data block encountered")
377 }
378 f.err = CorruptInputError(f.roffset)
379 }
380 }
381
382 func (f *decompressor) Read(b []byte) (int, error) {
383 for {
384 if len(f.toRead) > 0 {
385 n := copy(b, f.toRead)
386 f.toRead = f.toRead[n:]
387 if len(f.toRead) == 0 {
388 return n, f.err
389 }
390 return n, nil
391 }
392 if f.err != nil {
393 return 0, f.err
394 }
395
396 f.doStep()
397
398 if f.err != nil && len(f.toRead) == 0 {
399 f.toRead = f.dict.readFlush()
400 }
401 }
402 }
403
404
405 func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
406 total := int64(0)
407 flushed := false
408 for {
409 if len(f.toRead) > 0 {
410 n, err := w.Write(f.toRead)
411 total += int64(n)
412 if err != nil {
413 f.err = err
414 return total, err
415 }
416 if n != len(f.toRead) {
417 return total, io.ErrShortWrite
418 }
419 f.toRead = f.toRead[:0]
420 }
421 if f.err != nil && flushed {
422 if f.err == io.EOF {
423 return total, nil
424 }
425 return total, f.err
426 }
427 if f.err == nil {
428 f.doStep()
429 }
430 if len(f.toRead) == 0 && f.err != nil && !flushed {
431 f.toRead = f.dict.readFlush()
432 flushed = true
433 }
434 }
435 }
436
437 func (f *decompressor) Close() error {
438 if f.err == io.EOF {
439 return nil
440 }
441 return f.err
442 }
443
444
445
446
447 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
448
449 func (f *decompressor) readHuffman() error {
450
451 for f.nb < 5+5+4 {
452 if err := f.moreBits(); err != nil {
453 return err
454 }
455 }
456 nlit := int(f.b&0x1F) + 257
457 if nlit > maxNumLit {
458 if debugDecode {
459 fmt.Println("nlit > maxNumLit", nlit)
460 }
461 return CorruptInputError(f.roffset)
462 }
463 f.b >>= 5
464 ndist := int(f.b&0x1F) + 1
465 if ndist > maxNumDist {
466 if debugDecode {
467 fmt.Println("ndist > maxNumDist", ndist)
468 }
469 return CorruptInputError(f.roffset)
470 }
471 f.b >>= 5
472 nclen := int(f.b&0xF) + 4
473
474 f.b >>= 4
475 f.nb -= 5 + 5 + 4
476
477
478 for i := 0; i < nclen; i++ {
479 for f.nb < 3 {
480 if err := f.moreBits(); err != nil {
481 return err
482 }
483 }
484 f.codebits[codeOrder[i]] = int(f.b & 0x7)
485 f.b >>= 3
486 f.nb -= 3
487 }
488 for i := nclen; i < len(codeOrder); i++ {
489 f.codebits[codeOrder[i]] = 0
490 }
491 if !f.h1.init(f.codebits[0:]) {
492 if debugDecode {
493 fmt.Println("init codebits failed")
494 }
495 return CorruptInputError(f.roffset)
496 }
497
498
499
500 for i, n := 0, nlit+ndist; i < n; {
501 x, err := f.huffSym(&f.h1)
502 if err != nil {
503 return err
504 }
505 if x < 16 {
506
507 f.bits[i] = x
508 i++
509 continue
510 }
511
512 var rep int
513 var nb uint
514 var b int
515 switch x {
516 default:
517 return InternalError("unexpected length code")
518 case 16:
519 rep = 3
520 nb = 2
521 if i == 0 {
522 if debugDecode {
523 fmt.Println("i==0")
524 }
525 return CorruptInputError(f.roffset)
526 }
527 b = f.bits[i-1]
528 case 17:
529 rep = 3
530 nb = 3
531 b = 0
532 case 18:
533 rep = 11
534 nb = 7
535 b = 0
536 }
537 for f.nb < nb {
538 if err := f.moreBits(); err != nil {
539 if debugDecode {
540 fmt.Println("morebits:", err)
541 }
542 return err
543 }
544 }
545 rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1))
546 f.b >>= nb & regSizeMaskUint32
547 f.nb -= nb
548 if i+rep > n {
549 if debugDecode {
550 fmt.Println("i+rep > n", i, rep, n)
551 }
552 return CorruptInputError(f.roffset)
553 }
554 for j := 0; j < rep; j++ {
555 f.bits[i] = b
556 i++
557 }
558 }
559
560 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
561 if debugDecode {
562 fmt.Println("init2 failed")
563 }
564 return CorruptInputError(f.roffset)
565 }
566
567
568
569
570
571 if f.h1.maxRead < f.bits[endBlockMarker] {
572 f.h1.maxRead = f.bits[endBlockMarker]
573 }
574 if !f.final {
575
576
577
578 f.h1.maxRead += 10
579 }
580
581 return nil
582 }
583
584
585 func (f *decompressor) dataBlock() {
586
587
588 left := (f.nb) & 7
589 f.nb -= left
590 f.b >>= left
591
592 offBytes := f.nb >> 3
593
594 f.buf[0] = uint8(f.b)
595 f.buf[1] = uint8(f.b >> 8)
596 f.buf[2] = uint8(f.b >> 16)
597 f.buf[3] = uint8(f.b >> 24)
598
599 f.roffset += int64(offBytes)
600 f.nb, f.b = 0, 0
601
602
603 nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
604 f.roffset += int64(nr)
605 if err != nil {
606 f.err = noEOF(err)
607 return
608 }
609 n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
610 nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
611 if nn != ^n {
612 if debugDecode {
613 ncomp := ^n
614 fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
615 }
616 f.err = CorruptInputError(f.roffset)
617 return
618 }
619
620 if n == 0 {
621 f.toRead = f.dict.readFlush()
622 f.finishBlock()
623 return
624 }
625
626 f.copyLen = int(n)
627 f.copyData()
628 }
629
630
631
632 func (f *decompressor) copyData() {
633 buf := f.dict.writeSlice()
634 if len(buf) > f.copyLen {
635 buf = buf[:f.copyLen]
636 }
637
638 cnt, err := io.ReadFull(f.r, buf)
639 f.roffset += int64(cnt)
640 f.copyLen -= cnt
641 f.dict.writeMark(cnt)
642 if err != nil {
643 f.err = noEOF(err)
644 return
645 }
646
647 if f.dict.availWrite() == 0 || f.copyLen > 0 {
648 f.toRead = f.dict.readFlush()
649 f.step = copyData
650 return
651 }
652 f.finishBlock()
653 }
654
655 func (f *decompressor) finishBlock() {
656 if f.final {
657 if f.dict.availRead() > 0 {
658 f.toRead = f.dict.readFlush()
659 }
660 f.err = io.EOF
661 }
662 f.step = nextBlock
663 }
664
665 func (f *decompressor) doStep() {
666 switch f.step {
667 case copyData:
668 f.copyData()
669 case nextBlock:
670 f.nextBlock()
671 case huffmanBytesBuffer:
672 f.huffmanBytesBuffer()
673 case huffmanBytesReader:
674 f.huffmanBytesReader()
675 case huffmanBufioReader:
676 f.huffmanBufioReader()
677 case huffmanStringsReader:
678 f.huffmanStringsReader()
679 case huffmanGenericReader:
680 f.huffmanGenericReader()
681 default:
682 panic("BUG: unexpected step state")
683 }
684 }
685
686
687 func noEOF(e error) error {
688 if e == io.EOF {
689 return io.ErrUnexpectedEOF
690 }
691 return e
692 }
693
694 func (f *decompressor) moreBits() error {
695 c, err := f.r.ReadByte()
696 if err != nil {
697 return noEOF(err)
698 }
699 f.roffset++
700 f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
701 f.nb += 8
702 return nil
703 }
704
705
706 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
707
708
709
710
711 n := uint(h.maxRead)
712
713
714
715 nb, b := f.nb, f.b
716 for {
717 for nb < n {
718 c, err := f.r.ReadByte()
719 if err != nil {
720 f.b = b
721 f.nb = nb
722 return 0, noEOF(err)
723 }
724 f.roffset++
725 b |= uint32(c) << (nb & regSizeMaskUint32)
726 nb += 8
727 }
728 chunk := h.chunks[b&(huffmanNumChunks-1)]
729 n = uint(chunk & huffmanCountMask)
730 if n > huffmanChunkBits {
731 chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
732 n = uint(chunk & huffmanCountMask)
733 }
734 if n <= nb {
735 if n == 0 {
736 f.b = b
737 f.nb = nb
738 if debugDecode {
739 fmt.Println("huffsym: n==0")
740 }
741 f.err = CorruptInputError(f.roffset)
742 return 0, f.err
743 }
744 f.b = b >> (n & regSizeMaskUint32)
745 f.nb = nb - n
746 return int(chunk >> huffmanValueShift), nil
747 }
748 }
749 }
750
751 func makeReader(r io.Reader) Reader {
752 if rr, ok := r.(Reader); ok {
753 return rr
754 }
755 return bufio.NewReader(r)
756 }
757
758 func fixedHuffmanDecoderInit() {
759 fixedOnce.Do(func() {
760
761 var bits [288]int
762 for i := 0; i < 144; i++ {
763 bits[i] = 8
764 }
765 for i := 144; i < 256; i++ {
766 bits[i] = 9
767 }
768 for i := 256; i < 280; i++ {
769 bits[i] = 7
770 }
771 for i := 280; i < 288; i++ {
772 bits[i] = 8
773 }
774 fixedHuffmanDecoder.init(bits[:])
775 })
776 }
777
778 func (f *decompressor) Reset(r io.Reader, dict []byte) error {
779 *f = decompressor{
780 r: makeReader(r),
781 bits: f.bits,
782 codebits: f.codebits,
783 h1: f.h1,
784 h2: f.h2,
785 dict: f.dict,
786 step: nextBlock,
787 }
788 f.dict.init(maxMatchOffset, dict)
789 return nil
790 }
791
792
793
794
795
796
797
798
799
800 func NewReader(r io.Reader) io.ReadCloser {
801 fixedHuffmanDecoderInit()
802
803 var f decompressor
804 f.r = makeReader(r)
805 f.bits = new([maxNumLit + maxNumDist]int)
806 f.codebits = new([numCodes]int)
807 f.step = nextBlock
808 f.dict.init(maxMatchOffset, nil)
809 return &f
810 }
811
812
813
814
815
816
817
818
819 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
820 fixedHuffmanDecoderInit()
821
822 var f decompressor
823 f.r = makeReader(r)
824 f.bits = new([maxNumLit + maxNumDist]int)
825 f.codebits = new([numCodes]int)
826 f.step = nextBlock
827 f.dict.init(maxMatchOffset, dict)
828 return &f
829 }
830
View as plain text