inflate.go

Documentation: github.com/klauspost/compress/flate

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package flate implements the DEFLATE compressed data format, described in
     6  // RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
     7  // formats.
     8  package flate
     9  
    10  import (
    11  	"bufio"
    12  	"compress/flate"
    13  	"fmt"
    14  	"io"
    15  	"math/bits"
    16  	"sync"
    17  )
    18  
    19  const (
    20  	maxCodeLen     = 16 // max length of Huffman code
    21  	maxCodeLenMask = 15 // mask for max length of Huffman code
    22  	// The next three numbers come from the RFC section 3.2.7, with the
    23  	// additional proviso in section 3.2.5 which implies that distance codes
    24  	// 30 and 31 should never occur in compressed data.
    25  	maxNumLit  = 286
    26  	maxNumDist = 30
    27  	numCodes   = 19 // number of codes in Huffman meta-code
    28  
    29  	debugDecode = false
    30  )
    31  
    32  // Value of length - 3 and extra bits.
    33  type lengthExtra struct {
    34  	length, extra uint8
    35  }
    36  
    37  var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
    38  
    39  var bitMask32 = [32]uint32{
    40  	0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
    41  	0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
    42  	0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
    43  	0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
    44  } // up to 32 bits
    45  
    46  // Initialize the fixedHuffmanDecoder only once upon first use.
    47  var fixedOnce sync.Once
    48  var fixedHuffmanDecoder huffmanDecoder
    49  
    50  // A CorruptInputError reports the presence of corrupt input at a given offset.
    51  type CorruptInputError = flate.CorruptInputError
    52  
    53  // An InternalError reports an error in the flate code itself.
    54  type InternalError string
    55  
    56  func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
    57  
    58  // A ReadError reports an error encountered while reading input.
    59  //
    60  // Deprecated: No longer returned.
    61  type ReadError = flate.ReadError
    62  
    63  // A WriteError reports an error encountered while writing output.
    64  //
    65  // Deprecated: No longer returned.
    66  type WriteError = flate.WriteError
    67  
    68  // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
    69  // to switch to a new underlying Reader. This permits reusing a ReadCloser
    70  // instead of allocating a new one.
    71  type Resetter interface {
    72  	// Reset discards any buffered data and resets the Resetter as if it was
    73  	// newly initialized with the given reader.
    74  	Reset(r io.Reader, dict []byte) error
    75  }
    76  
    77  // The data structure for decoding Huffman tables is based on that of
    78  // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
    79  // For codes smaller than the table width, there are multiple entries
    80  // (each combination of trailing bits has the same value). For codes
    81  // larger than the table width, the table contains a link to an overflow
    82  // table. The width of each entry in the link table is the maximum code
    83  // size minus the chunk width.
    84  //
    85  // Note that you can do a lookup in the table even without all bits
    86  // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
    87  // have the property that shorter codes come before longer ones, the
    88  // bit length estimate in the result is a lower bound on the actual
    89  // number of bits.
    90  //
    91  // See the following:
    92  //	http://www.gzip.org/algorithm.txt
    93  
    94  // chunk & 15 is number of bits
    95  // chunk >> 4 is value, including table link
    96  
    97  const (
    98  	huffmanChunkBits  = 9
    99  	huffmanNumChunks  = 1 << huffmanChunkBits
   100  	huffmanCountMask  = 15
   101  	huffmanValueShift = 4
   102  )
   103  
   104  type huffmanDecoder struct {
   105  	maxRead  int                       // the maximum number of bits we can read and not overread
   106  	chunks   *[huffmanNumChunks]uint16 // chunks as described above
   107  	links    [][]uint16                // overflow links
   108  	linkMask uint32                    // mask the width of the link table
   109  }
   110  
   111  // Initialize Huffman decoding tables from array of code lengths.
   112  // Following this function, h is guaranteed to be initialized into a complete
   113  // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
   114  // degenerate case where the tree has only a single symbol with length 1. Empty
   115  // trees are permitted.
   116  func (h *huffmanDecoder) init(lengths []int) bool {
   117  	// Sanity enables additional runtime tests during Huffman
   118  	// table construction. It's intended to be used during
   119  	// development to supplement the currently ad-hoc unit tests.
   120  	const sanity = false
   121  
   122  	if h.chunks == nil {
   123  		h.chunks = new([huffmanNumChunks]uint16)
   124  	}
   125  
   126  	if h.maxRead != 0 {
   127  		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
   128  	}
   129  
   130  	// Count number of codes of each length,
   131  	// compute maxRead and max length.
   132  	var count [maxCodeLen]int
   133  	var min, max int
   134  	for _, n := range lengths {
   135  		if n == 0 {
   136  			continue
   137  		}
   138  		if min == 0 || n < min {
   139  			min = n
   140  		}
   141  		if n > max {
   142  			max = n
   143  		}
   144  		count[n&maxCodeLenMask]++
   145  	}
   146  
   147  	// Empty tree. The decompressor.huffSym function will fail later if the tree
   148  	// is used. Technically, an empty tree is only valid for the HDIST tree and
   149  	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
   150  	// is guaranteed to fail since it will attempt to use the tree to decode the
   151  	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
   152  	// guaranteed to fail later since the compressed data section must be
   153  	// composed of at least one symbol (the end-of-block marker).
   154  	if max == 0 {
   155  		return true
   156  	}
   157  
   158  	code := 0
   159  	var nextcode [maxCodeLen]int
   160  	for i := min; i <= max; i++ {
   161  		code <<= 1
   162  		nextcode[i&maxCodeLenMask] = code
   163  		code += count[i&maxCodeLenMask]
   164  	}
   165  
   166  	// Check that the coding is complete (i.e., that we've
   167  	// assigned all 2-to-the-max possible bit sequences).
   168  	// Exception: To be compatible with zlib, we also need to
   169  	// accept degenerate single-code codings. See also
   170  	// TestDegenerateHuffmanCoding.
   171  	if code != 1<<uint(max) && !(code == 1 && max == 1) {
   172  		if debugDecode {
   173  			fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
   174  		}
   175  		return false
   176  	}
   177  
   178  	h.maxRead = min
   179  
   180  	chunks := h.chunks[:]
   181  	for i := range chunks {
   182  		chunks[i] = 0
   183  	}
   184  
   185  	if max > huffmanChunkBits {
   186  		numLinks := 1 << (uint(max) - huffmanChunkBits)
   187  		h.linkMask = uint32(numLinks - 1)
   188  
   189  		// create link tables
   190  		link := nextcode[huffmanChunkBits+1] >> 1
   191  		if cap(h.links) < huffmanNumChunks-link {
   192  			h.links = make([][]uint16, huffmanNumChunks-link)
   193  		} else {
   194  			h.links = h.links[:huffmanNumChunks-link]
   195  		}
   196  		for j := uint(link); j < huffmanNumChunks; j++ {
   197  			reverse := int(bits.Reverse16(uint16(j)))
   198  			reverse >>= uint(16 - huffmanChunkBits)
   199  			off := j - uint(link)
   200  			if sanity && h.chunks[reverse] != 0 {
   201  				panic("impossible: overwriting existing chunk")
   202  			}
   203  			h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
   204  			if cap(h.links[off]) < numLinks {
   205  				h.links[off] = make([]uint16, numLinks)
   206  			} else {
   207  				h.links[off] = h.links[off][:numLinks]
   208  			}
   209  		}
   210  	} else {
   211  		h.links = h.links[:0]
   212  	}
   213  
   214  	for i, n := range lengths {
   215  		if n == 0 {
   216  			continue
   217  		}
   218  		code := nextcode[n]
   219  		nextcode[n]++
   220  		chunk := uint16(i<<huffmanValueShift | n)
   221  		reverse := int(bits.Reverse16(uint16(code)))
   222  		reverse >>= uint(16 - n)
   223  		if n <= huffmanChunkBits {
   224  			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
   225  				// We should never need to overwrite
   226  				// an existing chunk. Also, 0 is
   227  				// never a valid chunk, because the
   228  				// lower 4 "count" bits should be
   229  				// between 1 and 15.
   230  				if sanity && h.chunks[off] != 0 {
   231  					panic("impossible: overwriting existing chunk")
   232  				}
   233  				h.chunks[off] = chunk
   234  			}
   235  		} else {
   236  			j := reverse & (huffmanNumChunks - 1)
   237  			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
   238  				// Longer codes should have been
   239  				// associated with a link table above.
   240  				panic("impossible: not an indirect chunk")
   241  			}
   242  			value := h.chunks[j] >> huffmanValueShift
   243  			linktab := h.links[value]
   244  			reverse >>= huffmanChunkBits
   245  			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
   246  				if sanity && linktab[off] != 0 {
   247  					panic("impossible: overwriting existing chunk")
   248  				}
   249  				linktab[off] = chunk
   250  			}
   251  		}
   252  	}
   253  
   254  	if sanity {
   255  		// Above we've sanity checked that we never overwrote
   256  		// an existing entry. Here we additionally check that
   257  		// we filled the tables completely.
   258  		for i, chunk := range h.chunks {
   259  			if chunk == 0 {
   260  				// As an exception, in the degenerate
   261  				// single-code case, we allow odd
   262  				// chunks to be missing.
   263  				if code == 1 && i%2 == 1 {
   264  					continue
   265  				}
   266  				panic("impossible: missing chunk")
   267  			}
   268  		}
   269  		for _, linktab := range h.links {
   270  			for _, chunk := range linktab {
   271  				if chunk == 0 {
   272  					panic("impossible: missing chunk")
   273  				}
   274  			}
   275  		}
   276  	}
   277  
   278  	return true
   279  }
   280  
   281  // Reader is the actual read interface needed by NewReader.
   282  // If the passed in io.Reader does not also have ReadByte,
   283  // the NewReader will introduce its own buffering.
   284  type Reader interface {
   285  	io.Reader
   286  	io.ByteReader
   287  }
   288  
   289  type step uint8
   290  
   291  const (
   292  	copyData step = iota + 1
   293  	nextBlock
   294  	huffmanBytesBuffer
   295  	huffmanBytesReader
   296  	huffmanBufioReader
   297  	huffmanStringsReader
   298  	huffmanGenericReader
   299  )
   300  
   301  // Decompress state.
   302  type decompressor struct {
   303  	// Input source.
   304  	r       Reader
   305  	roffset int64
   306  
   307  	// Huffman decoders for literal/length, distance.
   308  	h1, h2 huffmanDecoder
   309  
   310  	// Length arrays used to define Huffman codes.
   311  	bits     *[maxNumLit + maxNumDist]int
   312  	codebits *[numCodes]int
   313  
   314  	// Output history, buffer.
   315  	dict dictDecoder
   316  
   317  	// Next step in the decompression,
   318  	// and decompression state.
   319  	step      step
   320  	stepState int
   321  	err       error
   322  	toRead    []byte
   323  	hl, hd    *huffmanDecoder
   324  	copyLen   int
   325  	copyDist  int
   326  
   327  	// Temporary buffer (avoids repeated allocation).
   328  	buf [4]byte
   329  
   330  	// Input bits, in top of b.
   331  	b uint32
   332  
   333  	nb    uint
   334  	final bool
   335  }
   336  
   337  func (f *decompressor) nextBlock() {
   338  	for f.nb < 1+2 {
   339  		if f.err = f.moreBits(); f.err != nil {
   340  			return
   341  		}
   342  	}
   343  	f.final = f.b&1 == 1
   344  	f.b >>= 1
   345  	typ := f.b & 3
   346  	f.b >>= 2
   347  	f.nb -= 1 + 2
   348  	switch typ {
   349  	case 0:
   350  		f.dataBlock()
   351  		if debugDecode {
   352  			fmt.Println("stored block")
   353  		}
   354  	case 1:
   355  		// compressed, fixed Huffman tables
   356  		f.hl = &fixedHuffmanDecoder
   357  		f.hd = nil
   358  		f.huffmanBlockDecoder()
   359  		if debugDecode {
   360  			fmt.Println("predefinied huffman block")
   361  		}
   362  	case 2:
   363  		// compressed, dynamic Huffman tables
   364  		if f.err = f.readHuffman(); f.err != nil {
   365  			break
   366  		}
   367  		f.hl = &f.h1
   368  		f.hd = &f.h2
   369  		f.huffmanBlockDecoder()
   370  		if debugDecode {
   371  			fmt.Println("dynamic huffman block")
   372  		}
   373  	default:
   374  		// 3 is reserved.
   375  		if debugDecode {
   376  			fmt.Println("reserved data block encountered")
   377  		}
   378  		f.err = CorruptInputError(f.roffset)
   379  	}
   380  }
   381  
   382  func (f *decompressor) Read(b []byte) (int, error) {
   383  	for {
   384  		if len(f.toRead) > 0 {
   385  			n := copy(b, f.toRead)
   386  			f.toRead = f.toRead[n:]
   387  			if len(f.toRead) == 0 {
   388  				return n, f.err
   389  			}
   390  			return n, nil
   391  		}
   392  		if f.err != nil {
   393  			return 0, f.err
   394  		}
   395  
   396  		f.doStep()
   397  
   398  		if f.err != nil && len(f.toRead) == 0 {
   399  			f.toRead = f.dict.readFlush() // Flush what's left in case of error
   400  		}
   401  	}
   402  }
   403  
   404  // WriteTo implements the io.WriteTo interface for io.Copy and friends.
   405  func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
   406  	total := int64(0)
   407  	flushed := false
   408  	for {
   409  		if len(f.toRead) > 0 {
   410  			n, err := w.Write(f.toRead)
   411  			total += int64(n)
   412  			if err != nil {
   413  				f.err = err
   414  				return total, err
   415  			}
   416  			if n != len(f.toRead) {
   417  				return total, io.ErrShortWrite
   418  			}
   419  			f.toRead = f.toRead[:0]
   420  		}
   421  		if f.err != nil && flushed {
   422  			if f.err == io.EOF {
   423  				return total, nil
   424  			}
   425  			return total, f.err
   426  		}
   427  		if f.err == nil {
   428  			f.doStep()
   429  		}
   430  		if len(f.toRead) == 0 && f.err != nil && !flushed {
   431  			f.toRead = f.dict.readFlush() // Flush what's left in case of error
   432  			flushed = true
   433  		}
   434  	}
   435  }
   436  
   437  func (f *decompressor) Close() error {
   438  	if f.err == io.EOF {
   439  		return nil
   440  	}
   441  	return f.err
   442  }
   443  
   444  // RFC 1951 section 3.2.7.
   445  // Compression with dynamic Huffman codes
   446  
   447  var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
   448  
   449  func (f *decompressor) readHuffman() error {
   450  	// HLIT[5], HDIST[5], HCLEN[4].
   451  	for f.nb < 5+5+4 {
   452  		if err := f.moreBits(); err != nil {
   453  			return err
   454  		}
   455  	}
   456  	nlit := int(f.b&0x1F) + 257
   457  	if nlit > maxNumLit {
   458  		if debugDecode {
   459  			fmt.Println("nlit > maxNumLit", nlit)
   460  		}
   461  		return CorruptInputError(f.roffset)
   462  	}
   463  	f.b >>= 5
   464  	ndist := int(f.b&0x1F) + 1
   465  	if ndist > maxNumDist {
   466  		if debugDecode {
   467  			fmt.Println("ndist > maxNumDist", ndist)
   468  		}
   469  		return CorruptInputError(f.roffset)
   470  	}
   471  	f.b >>= 5
   472  	nclen := int(f.b&0xF) + 4
   473  	// numCodes is 19, so nclen is always valid.
   474  	f.b >>= 4
   475  	f.nb -= 5 + 5 + 4
   476  
   477  	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
   478  	for i := 0; i < nclen; i++ {
   479  		for f.nb < 3 {
   480  			if err := f.moreBits(); err != nil {
   481  				return err
   482  			}
   483  		}
   484  		f.codebits[codeOrder[i]] = int(f.b & 0x7)
   485  		f.b >>= 3
   486  		f.nb -= 3
   487  	}
   488  	for i := nclen; i < len(codeOrder); i++ {
   489  		f.codebits[codeOrder[i]] = 0
   490  	}
   491  	if !f.h1.init(f.codebits[0:]) {
   492  		if debugDecode {
   493  			fmt.Println("init codebits failed")
   494  		}
   495  		return CorruptInputError(f.roffset)
   496  	}
   497  
   498  	// HLIT + 257 code lengths, HDIST + 1 code lengths,
   499  	// using the code length Huffman code.
   500  	for i, n := 0, nlit+ndist; i < n; {
   501  		x, err := f.huffSym(&f.h1)
   502  		if err != nil {
   503  			return err
   504  		}
   505  		if x < 16 {
   506  			// Actual length.
   507  			f.bits[i] = x
   508  			i++
   509  			continue
   510  		}
   511  		// Repeat previous length or zero.
   512  		var rep int
   513  		var nb uint
   514  		var b int
   515  		switch x {
   516  		default:
   517  			return InternalError("unexpected length code")
   518  		case 16:
   519  			rep = 3
   520  			nb = 2
   521  			if i == 0 {
   522  				if debugDecode {
   523  					fmt.Println("i==0")
   524  				}
   525  				return CorruptInputError(f.roffset)
   526  			}
   527  			b = f.bits[i-1]
   528  		case 17:
   529  			rep = 3
   530  			nb = 3
   531  			b = 0
   532  		case 18:
   533  			rep = 11
   534  			nb = 7
   535  			b = 0
   536  		}
   537  		for f.nb < nb {
   538  			if err := f.moreBits(); err != nil {
   539  				if debugDecode {
   540  					fmt.Println("morebits:", err)
   541  				}
   542  				return err
   543  			}
   544  		}
   545  		rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
   546  		f.b >>= nb & regSizeMaskUint32
   547  		f.nb -= nb
   548  		if i+rep > n {
   549  			if debugDecode {
   550  				fmt.Println("i+rep > n", i, rep, n)
   551  			}
   552  			return CorruptInputError(f.roffset)
   553  		}
   554  		for j := 0; j < rep; j++ {
   555  			f.bits[i] = b
   556  			i++
   557  		}
   558  	}
   559  
   560  	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
   561  		if debugDecode {
   562  			fmt.Println("init2 failed")
   563  		}
   564  		return CorruptInputError(f.roffset)
   565  	}
   566  
   567  	// As an optimization, we can initialize the maxRead bits to read at a time
   568  	// for the HLIT tree to the length of the EOB marker since we know that
   569  	// every block must terminate with one. This preserves the property that
   570  	// we never read any extra bytes after the end of the DEFLATE stream.
   571  	if f.h1.maxRead < f.bits[endBlockMarker] {
   572  		f.h1.maxRead = f.bits[endBlockMarker]
   573  	}
   574  	if !f.final {
   575  		// If not the final block, the smallest block possible is
   576  		// a predefined table, BTYPE=01, with a single EOB marker.
   577  		// This will take up 3 + 7 bits.
   578  		f.h1.maxRead += 10
   579  	}
   580  
   581  	return nil
   582  }
   583  
   584  // Copy a single uncompressed data block from input to output.
   585  func (f *decompressor) dataBlock() {
   586  	// Uncompressed.
   587  	// Discard current half-byte.
   588  	left := (f.nb) & 7
   589  	f.nb -= left
   590  	f.b >>= left
   591  
   592  	offBytes := f.nb >> 3
   593  	// Unfilled values will be overwritten.
   594  	f.buf[0] = uint8(f.b)
   595  	f.buf[1] = uint8(f.b >> 8)
   596  	f.buf[2] = uint8(f.b >> 16)
   597  	f.buf[3] = uint8(f.b >> 24)
   598  
   599  	f.roffset += int64(offBytes)
   600  	f.nb, f.b = 0, 0
   601  
   602  	// Length then ones-complement of length.
   603  	nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
   604  	f.roffset += int64(nr)
   605  	if err != nil {
   606  		f.err = noEOF(err)
   607  		return
   608  	}
   609  	n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
   610  	nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
   611  	if nn != ^n {
   612  		if debugDecode {
   613  			ncomp := ^n
   614  			fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
   615  		}
   616  		f.err = CorruptInputError(f.roffset)
   617  		return
   618  	}
   619  
   620  	if n == 0 {
   621  		f.toRead = f.dict.readFlush()
   622  		f.finishBlock()
   623  		return
   624  	}
   625  
   626  	f.copyLen = int(n)
   627  	f.copyData()
   628  }
   629  
   630  // copyData copies f.copyLen bytes from the underlying reader into f.hist.
   631  // It pauses for reads when f.hist is full.
   632  func (f *decompressor) copyData() {
   633  	buf := f.dict.writeSlice()
   634  	if len(buf) > f.copyLen {
   635  		buf = buf[:f.copyLen]
   636  	}
   637  
   638  	cnt, err := io.ReadFull(f.r, buf)
   639  	f.roffset += int64(cnt)
   640  	f.copyLen -= cnt
   641  	f.dict.writeMark(cnt)
   642  	if err != nil {
   643  		f.err = noEOF(err)
   644  		return
   645  	}
   646  
   647  	if f.dict.availWrite() == 0 || f.copyLen > 0 {
   648  		f.toRead = f.dict.readFlush()
   649  		f.step = copyData
   650  		return
   651  	}
   652  	f.finishBlock()
   653  }
   654  
   655  func (f *decompressor) finishBlock() {
   656  	if f.final {
   657  		if f.dict.availRead() > 0 {
   658  			f.toRead = f.dict.readFlush()
   659  		}
   660  		f.err = io.EOF
   661  	}
   662  	f.step = nextBlock
   663  }
   664  
   665  func (f *decompressor) doStep() {
   666  	switch f.step {
   667  	case copyData:
   668  		f.copyData()
   669  	case nextBlock:
   670  		f.nextBlock()
   671  	case huffmanBytesBuffer:
   672  		f.huffmanBytesBuffer()
   673  	case huffmanBytesReader:
   674  		f.huffmanBytesReader()
   675  	case huffmanBufioReader:
   676  		f.huffmanBufioReader()
   677  	case huffmanStringsReader:
   678  		f.huffmanStringsReader()
   679  	case huffmanGenericReader:
   680  		f.huffmanGenericReader()
   681  	default:
   682  		panic("BUG: unexpected step state")
   683  	}
   684  }
   685  
   686  // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
   687  func noEOF(e error) error {
   688  	if e == io.EOF {
   689  		return io.ErrUnexpectedEOF
   690  	}
   691  	return e
   692  }
   693  
   694  func (f *decompressor) moreBits() error {
   695  	c, err := f.r.ReadByte()
   696  	if err != nil {
   697  		return noEOF(err)
   698  	}
   699  	f.roffset++
   700  	f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
   701  	f.nb += 8
   702  	return nil
   703  }
   704  
   705  // Read the next Huffman-encoded symbol from f according to h.
   706  func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
   707  	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
   708  	// with single element, huffSym must error on these two edge cases. In both
   709  	// cases, the chunks slice will be 0 for the invalid sequence, leading it
   710  	// satisfy the n == 0 check below.
   711  	n := uint(h.maxRead)
   712  	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
   713  	// but is smart enough to keep local variables in registers, so use nb and b,
   714  	// inline call to moreBits and reassign b,nb back to f on return.
   715  	nb, b := f.nb, f.b
   716  	for {
   717  		for nb < n {
   718  			c, err := f.r.ReadByte()
   719  			if err != nil {
   720  				f.b = b
   721  				f.nb = nb
   722  				return 0, noEOF(err)
   723  			}
   724  			f.roffset++
   725  			b |= uint32(c) << (nb & regSizeMaskUint32)
   726  			nb += 8
   727  		}
   728  		chunk := h.chunks[b&(huffmanNumChunks-1)]
   729  		n = uint(chunk & huffmanCountMask)
   730  		if n > huffmanChunkBits {
   731  			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
   732  			n = uint(chunk & huffmanCountMask)
   733  		}
   734  		if n <= nb {
   735  			if n == 0 {
   736  				f.b = b
   737  				f.nb = nb
   738  				if debugDecode {
   739  					fmt.Println("huffsym: n==0")
   740  				}
   741  				f.err = CorruptInputError(f.roffset)
   742  				return 0, f.err
   743  			}
   744  			f.b = b >> (n & regSizeMaskUint32)
   745  			f.nb = nb - n
   746  			return int(chunk >> huffmanValueShift), nil
   747  		}
   748  	}
   749  }
   750  
   751  func makeReader(r io.Reader) Reader {
   752  	if rr, ok := r.(Reader); ok {
   753  		return rr
   754  	}
   755  	return bufio.NewReader(r)
   756  }
   757  
   758  func fixedHuffmanDecoderInit() {
   759  	fixedOnce.Do(func() {
   760  		// These come from the RFC section 3.2.6.
   761  		var bits [288]int
   762  		for i := 0; i < 144; i++ {
   763  			bits[i] = 8
   764  		}
   765  		for i := 144; i < 256; i++ {
   766  			bits[i] = 9
   767  		}
   768  		for i := 256; i < 280; i++ {
   769  			bits[i] = 7
   770  		}
   771  		for i := 280; i < 288; i++ {
   772  			bits[i] = 8
   773  		}
   774  		fixedHuffmanDecoder.init(bits[:])
   775  	})
   776  }
   777  
   778  func (f *decompressor) Reset(r io.Reader, dict []byte) error {
   779  	*f = decompressor{
   780  		r:        makeReader(r),
   781  		bits:     f.bits,
   782  		codebits: f.codebits,
   783  		h1:       f.h1,
   784  		h2:       f.h2,
   785  		dict:     f.dict,
   786  		step:     nextBlock,
   787  	}
   788  	f.dict.init(maxMatchOffset, dict)
   789  	return nil
   790  }
   791  
   792  // NewReader returns a new ReadCloser that can be used
   793  // to read the uncompressed version of r.
   794  // If r does not also implement io.ByteReader,
   795  // the decompressor may read more data than necessary from r.
   796  // It is the caller's responsibility to call Close on the ReadCloser
   797  // when finished reading.
   798  //
   799  // The ReadCloser returned by NewReader also implements Resetter.
   800  func NewReader(r io.Reader) io.ReadCloser {
   801  	fixedHuffmanDecoderInit()
   802  
   803  	var f decompressor
   804  	f.r = makeReader(r)
   805  	f.bits = new([maxNumLit + maxNumDist]int)
   806  	f.codebits = new([numCodes]int)
   807  	f.step = nextBlock
   808  	f.dict.init(maxMatchOffset, nil)
   809  	return &f
   810  }
   811  
   812  // NewReaderDict is like NewReader but initializes the reader
   813  // with a preset dictionary. The returned Reader behaves as if
   814  // the uncompressed data stream started with the given dictionary,
   815  // which has already been read. NewReaderDict is typically used
   816  // to read data compressed by NewWriterDict.
   817  //
   818  // The ReadCloser returned by NewReader also implements Resetter.
   819  func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
   820  	fixedHuffmanDecoderInit()
   821  
   822  	var f decompressor
   823  	f.r = makeReader(r)
   824  	f.bits = new([maxNumLit + maxNumDist]int)
   825  	f.codebits = new([numCodes]int)
   826  	f.step = nextBlock
   827  	f.dict.init(maxMatchOffset, dict)
   828  	return &f
   829  }
   830
View as plain text