huffman_bit_writer.go

Documentation: github.com/klauspost/compress/flate

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"io"
    11  	"math"
    12  )
    13  
    14  const (
    15  	// The largest offset code.
    16  	offsetCodeCount = 30
    17  
    18  	// The special code used to mark the end of a block.
    19  	endBlockMarker = 256
    20  
    21  	// The first length code.
    22  	lengthCodesStart = 257
    23  
    24  	// The number of codegen codes.
    25  	codegenCodeCount = 19
    26  	badCode          = 255
    27  
    28  	// maxPredefinedTokens is the maximum number of tokens
    29  	// where we check if fixed size is smaller.
    30  	maxPredefinedTokens = 250
    31  
    32  	// bufferFlushSize indicates the buffer size
    33  	// after which bytes are flushed to the writer.
    34  	// Should preferably be a multiple of 6, since
    35  	// we accumulate 6 bytes between writes to the buffer.
    36  	bufferFlushSize = 246
    37  )
    38  
    39  // Minimum length code that emits bits.
    40  const lengthExtraBitsMinCode = 8
    41  
    42  // The number of extra bits needed by length code X - LENGTH_CODES_START.
    43  var lengthExtraBits = [32]uint8{
    44  	/* 257 */ 0, 0, 0,
    45  	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
    46  	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
    47  	/* 280 */ 4, 5, 5, 5, 5, 0,
    48  }
    49  
    50  // The length indicated by length code X - LENGTH_CODES_START.
    51  var lengthBase = [32]uint8{
    52  	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
    53  	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
    54  	64, 80, 96, 112, 128, 160, 192, 224, 255,
    55  }
    56  
    57  // Minimum offset code that emits bits.
    58  const offsetExtraBitsMinCode = 4
    59  
    60  // offset code word extra bits.
    61  var offsetExtraBits = [32]int8{
    62  	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
    63  	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
    64  	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
    65  	/* extended window */
    66  	14, 14,
    67  }
    68  
    69  var offsetCombined = [32]uint32{}
    70  
    71  func init() {
    72  	var offsetBase = [32]uint32{
    73  		/* normal deflate */
    74  		0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
    75  		0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
    76  		0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
    77  		0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
    78  		0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
    79  		0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
    80  
    81  		/* extended window */
    82  		0x008000, 0x00c000,
    83  	}
    84  
    85  	for i := range offsetCombined[:] {
    86  		// Don't use extended window values...
    87  		if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
    88  			continue
    89  		}
    90  		offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
    91  	}
    92  }
    93  
    94  // The odd order in which the codegen code sizes are written.
    95  var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
    96  
    97  type huffmanBitWriter struct {
    98  	// writer is the underlying writer.
    99  	// Do not use it directly; use the write method, which ensures
   100  	// that Write errors are sticky.
   101  	writer io.Writer
   102  
   103  	// Data waiting to be written is bytes[0:nbytes]
   104  	// and then the low nbits of bits.
   105  	bits            uint64
   106  	nbits           uint8
   107  	nbytes          uint8
   108  	lastHuffMan     bool
   109  	literalEncoding *huffmanEncoder
   110  	tmpLitEncoding  *huffmanEncoder
   111  	offsetEncoding  *huffmanEncoder
   112  	codegenEncoding *huffmanEncoder
   113  	err             error
   114  	lastHeader      int
   115  	// Set between 0 (reused block can be up to 2x the size)
   116  	logNewTablePenalty uint
   117  	bytes              [256 + 8]byte
   118  	literalFreq        [lengthCodesStart + 32]uint16
   119  	offsetFreq         [32]uint16
   120  	codegenFreq        [codegenCodeCount]uint16
   121  
   122  	// codegen must have an extra space for the final symbol.
   123  	codegen [literalCount + offsetCodeCount + 1]uint8
   124  }
   125  
   126  // Huffman reuse.
   127  //
   128  // The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
   129  //
   130  // This is controlled by several variables:
   131  //
   132  // If lastHeader is non-zero the Huffman table can be reused.
   133  // This also indicates that a Huffman table has been generated that can output all
   134  // possible symbols.
   135  // It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
   136  // an EOB with the previous table must be written.
   137  //
   138  // If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
   139  //
   140  // An incoming block estimates the output size of a new table using a 'fresh' by calculating the
   141  // optimal size and adding a penalty in 'logNewTablePenalty'.
   142  // A Huffman table is not optimal, which is why we add a penalty, and generating a new table
   143  // is slower both for compression and decompression.
   144  
   145  func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
   146  	return &huffmanBitWriter{
   147  		writer:          w,
   148  		literalEncoding: newHuffmanEncoder(literalCount),
   149  		tmpLitEncoding:  newHuffmanEncoder(literalCount),
   150  		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
   151  		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
   152  	}
   153  }
   154  
   155  func (w *huffmanBitWriter) reset(writer io.Writer) {
   156  	w.writer = writer
   157  	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
   158  	w.lastHeader = 0
   159  	w.lastHuffMan = false
   160  }
   161  
   162  func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
   163  	a := t.offHist[:offsetCodeCount]
   164  	b := w.offsetEncoding.codes
   165  	b = b[:len(a)]
   166  	for i, v := range a {
   167  		if v != 0 && b[i].zero() {
   168  			return false
   169  		}
   170  	}
   171  
   172  	a = t.extraHist[:literalCount-256]
   173  	b = w.literalEncoding.codes[256:literalCount]
   174  	b = b[:len(a)]
   175  	for i, v := range a {
   176  		if v != 0 && b[i].zero() {
   177  			return false
   178  		}
   179  	}
   180  
   181  	a = t.litHist[:256]
   182  	b = w.literalEncoding.codes[:len(a)]
   183  	for i, v := range a {
   184  		if v != 0 && b[i].zero() {
   185  			return false
   186  		}
   187  	}
   188  	return true
   189  }
   190  
   191  func (w *huffmanBitWriter) flush() {
   192  	if w.err != nil {
   193  		w.nbits = 0
   194  		return
   195  	}
   196  	if w.lastHeader > 0 {
   197  		// We owe an EOB
   198  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   199  		w.lastHeader = 0
   200  	}
   201  	n := w.nbytes
   202  	for w.nbits != 0 {
   203  		w.bytes[n] = byte(w.bits)
   204  		w.bits >>= 8
   205  		if w.nbits > 8 { // Avoid underflow
   206  			w.nbits -= 8
   207  		} else {
   208  			w.nbits = 0
   209  		}
   210  		n++
   211  	}
   212  	w.bits = 0
   213  	w.write(w.bytes[:n])
   214  	w.nbytes = 0
   215  }
   216  
   217  func (w *huffmanBitWriter) write(b []byte) {
   218  	if w.err != nil {
   219  		return
   220  	}
   221  	_, w.err = w.writer.Write(b)
   222  }
   223  
   224  func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
   225  	w.bits |= uint64(b) << (w.nbits & 63)
   226  	w.nbits += nb
   227  	if w.nbits >= 48 {
   228  		w.writeOutBits()
   229  	}
   230  }
   231  
   232  func (w *huffmanBitWriter) writeBytes(bytes []byte) {
   233  	if w.err != nil {
   234  		return
   235  	}
   236  	n := w.nbytes
   237  	if w.nbits&7 != 0 {
   238  		w.err = InternalError("writeBytes with unfinished bits")
   239  		return
   240  	}
   241  	for w.nbits != 0 {
   242  		w.bytes[n] = byte(w.bits)
   243  		w.bits >>= 8
   244  		w.nbits -= 8
   245  		n++
   246  	}
   247  	if n != 0 {
   248  		w.write(w.bytes[:n])
   249  	}
   250  	w.nbytes = 0
   251  	w.write(bytes)
   252  }
   253  
   254  // RFC 1951 3.2.7 specifies a special run-length encoding for specifying
   255  // the literal and offset lengths arrays (which are concatenated into a single
   256  // array).  This method generates that run-length encoding.
   257  //
   258  // The result is written into the codegen array, and the frequencies
   259  // of each code is written into the codegenFreq array.
   260  // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
   261  // information. Code badCode is an end marker
   262  //
   263  //	numLiterals      The number of literals in literalEncoding
   264  //	numOffsets       The number of offsets in offsetEncoding
   265  //	litenc, offenc   The literal and offset encoder to use
   266  func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
   267  	for i := range w.codegenFreq {
   268  		w.codegenFreq[i] = 0
   269  	}
   270  	// Note that we are using codegen both as a temporary variable for holding
   271  	// a copy of the frequencies, and as the place where we put the result.
   272  	// This is fine because the output is always shorter than the input used
   273  	// so far.
   274  	codegen := w.codegen[:] // cache
   275  	// Copy the concatenated code sizes to codegen. Put a marker at the end.
   276  	cgnl := codegen[:numLiterals]
   277  	for i := range cgnl {
   278  		cgnl[i] = litEnc.codes[i].len()
   279  	}
   280  
   281  	cgnl = codegen[numLiterals : numLiterals+numOffsets]
   282  	for i := range cgnl {
   283  		cgnl[i] = offEnc.codes[i].len()
   284  	}
   285  	codegen[numLiterals+numOffsets] = badCode
   286  
   287  	size := codegen[0]
   288  	count := 1
   289  	outIndex := 0
   290  	for inIndex := 1; size != badCode; inIndex++ {
   291  		// INVARIANT: We have seen "count" copies of size that have not yet
   292  		// had output generated for them.
   293  		nextSize := codegen[inIndex]
   294  		if nextSize == size {
   295  			count++
   296  			continue
   297  		}
   298  		// We need to generate codegen indicating "count" of size.
   299  		if size != 0 {
   300  			codegen[outIndex] = size
   301  			outIndex++
   302  			w.codegenFreq[size]++
   303  			count--
   304  			for count >= 3 {
   305  				n := 6
   306  				if n > count {
   307  					n = count
   308  				}
   309  				codegen[outIndex] = 16
   310  				outIndex++
   311  				codegen[outIndex] = uint8(n - 3)
   312  				outIndex++
   313  				w.codegenFreq[16]++
   314  				count -= n
   315  			}
   316  		} else {
   317  			for count >= 11 {
   318  				n := 138
   319  				if n > count {
   320  					n = count
   321  				}
   322  				codegen[outIndex] = 18
   323  				outIndex++
   324  				codegen[outIndex] = uint8(n - 11)
   325  				outIndex++
   326  				w.codegenFreq[18]++
   327  				count -= n
   328  			}
   329  			if count >= 3 {
   330  				// count >= 3 && count <= 10
   331  				codegen[outIndex] = 17
   332  				outIndex++
   333  				codegen[outIndex] = uint8(count - 3)
   334  				outIndex++
   335  				w.codegenFreq[17]++
   336  				count = 0
   337  			}
   338  		}
   339  		count--
   340  		for ; count >= 0; count-- {
   341  			codegen[outIndex] = size
   342  			outIndex++
   343  			w.codegenFreq[size]++
   344  		}
   345  		// Set up invariant for next time through the loop.
   346  		size = nextSize
   347  		count = 1
   348  	}
   349  	// Marker indicating the end of the codegen.
   350  	codegen[outIndex] = badCode
   351  }
   352  
   353  func (w *huffmanBitWriter) codegens() int {
   354  	numCodegens := len(w.codegenFreq)
   355  	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
   356  		numCodegens--
   357  	}
   358  	return numCodegens
   359  }
   360  
   361  func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
   362  	numCodegens = len(w.codegenFreq)
   363  	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
   364  		numCodegens--
   365  	}
   366  	return 3 + 5 + 5 + 4 + (3 * numCodegens) +
   367  		w.codegenEncoding.bitLength(w.codegenFreq[:]) +
   368  		int(w.codegenFreq[16])*2 +
   369  		int(w.codegenFreq[17])*3 +
   370  		int(w.codegenFreq[18])*7, numCodegens
   371  }
   372  
   373  // dynamicSize returns the size of dynamically encoded data in bits.
   374  func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
   375  	size = litEnc.bitLength(w.literalFreq[:]) +
   376  		offEnc.bitLength(w.offsetFreq[:])
   377  	return size
   378  }
   379  
   380  // dynamicSize returns the size of dynamically encoded data in bits.
   381  func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
   382  	header, numCodegens := w.headerSize()
   383  	size = header +
   384  		litEnc.bitLength(w.literalFreq[:]) +
   385  		offEnc.bitLength(w.offsetFreq[:]) +
   386  		extraBits
   387  	return size, numCodegens
   388  }
   389  
   390  // extraBitSize will return the number of bits that will be written
   391  // as "extra" bits on matches.
   392  func (w *huffmanBitWriter) extraBitSize() int {
   393  	total := 0
   394  	for i, n := range w.literalFreq[257:literalCount] {
   395  		total += int(n) * int(lengthExtraBits[i&31])
   396  	}
   397  	for i, n := range w.offsetFreq[:offsetCodeCount] {
   398  		total += int(n) * int(offsetExtraBits[i&31])
   399  	}
   400  	return total
   401  }
   402  
   403  // fixedSize returns the size of dynamically encoded data in bits.
   404  func (w *huffmanBitWriter) fixedSize(extraBits int) int {
   405  	return 3 +
   406  		fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
   407  		fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
   408  		extraBits
   409  }
   410  
   411  // storedSize calculates the stored size, including header.
   412  // The function returns the size in bits and whether the block
   413  // fits inside a single block.
   414  func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
   415  	if in == nil {
   416  		return 0, false
   417  	}
   418  	if len(in) <= maxStoreBlockSize {
   419  		return (len(in) + 5) * 8, true
   420  	}
   421  	return 0, false
   422  }
   423  
   424  func (w *huffmanBitWriter) writeCode(c hcode) {
   425  	// The function does not get inlined if we "& 63" the shift.
   426  	w.bits |= c.code64() << (w.nbits & 63)
   427  	w.nbits += c.len()
   428  	if w.nbits >= 48 {
   429  		w.writeOutBits()
   430  	}
   431  }
   432  
   433  // writeOutBits will write bits to the buffer.
   434  func (w *huffmanBitWriter) writeOutBits() {
   435  	bits := w.bits
   436  	w.bits >>= 48
   437  	w.nbits -= 48
   438  	n := w.nbytes
   439  
   440  	// We over-write, but faster...
   441  	binary.LittleEndian.PutUint64(w.bytes[n:], bits)
   442  	n += 6
   443  
   444  	if n >= bufferFlushSize {
   445  		if w.err != nil {
   446  			n = 0
   447  			return
   448  		}
   449  		w.write(w.bytes[:n])
   450  		n = 0
   451  	}
   452  
   453  	w.nbytes = n
   454  }
   455  
   456  // Write the header of a dynamic Huffman block to the output stream.
   457  //
   458  //	numLiterals  The number of literals specified in codegen
   459  //	numOffsets   The number of offsets specified in codegen
   460  //	numCodegens  The number of codegens used in codegen
   461  func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
   462  	if w.err != nil {
   463  		return
   464  	}
   465  	var firstBits int32 = 4
   466  	if isEof {
   467  		firstBits = 5
   468  	}
   469  	w.writeBits(firstBits, 3)
   470  	w.writeBits(int32(numLiterals-257), 5)
   471  	w.writeBits(int32(numOffsets-1), 5)
   472  	w.writeBits(int32(numCodegens-4), 4)
   473  
   474  	for i := 0; i < numCodegens; i++ {
   475  		value := uint(w.codegenEncoding.codes[codegenOrder[i]].len())
   476  		w.writeBits(int32(value), 3)
   477  	}
   478  
   479  	i := 0
   480  	for {
   481  		var codeWord = uint32(w.codegen[i])
   482  		i++
   483  		if codeWord == badCode {
   484  			break
   485  		}
   486  		w.writeCode(w.codegenEncoding.codes[codeWord])
   487  
   488  		switch codeWord {
   489  		case 16:
   490  			w.writeBits(int32(w.codegen[i]), 2)
   491  			i++
   492  		case 17:
   493  			w.writeBits(int32(w.codegen[i]), 3)
   494  			i++
   495  		case 18:
   496  			w.writeBits(int32(w.codegen[i]), 7)
   497  			i++
   498  		}
   499  	}
   500  }
   501  
   502  // writeStoredHeader will write a stored header.
   503  // If the stored block is only used for EOF,
   504  // it is replaced with a fixed huffman block.
   505  func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
   506  	if w.err != nil {
   507  		return
   508  	}
   509  	if w.lastHeader > 0 {
   510  		// We owe an EOB
   511  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   512  		w.lastHeader = 0
   513  	}
   514  
   515  	// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
   516  	if length == 0 && isEof {
   517  		w.writeFixedHeader(isEof)
   518  		// EOB: 7 bits, value: 0
   519  		w.writeBits(0, 7)
   520  		w.flush()
   521  		return
   522  	}
   523  
   524  	var flag int32
   525  	if isEof {
   526  		flag = 1
   527  	}
   528  	w.writeBits(flag, 3)
   529  	w.flush()
   530  	w.writeBits(int32(length), 16)
   531  	w.writeBits(int32(^uint16(length)), 16)
   532  }
   533  
   534  func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
   535  	if w.err != nil {
   536  		return
   537  	}
   538  	if w.lastHeader > 0 {
   539  		// We owe an EOB
   540  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   541  		w.lastHeader = 0
   542  	}
   543  
   544  	// Indicate that we are a fixed Huffman block
   545  	var value int32 = 2
   546  	if isEof {
   547  		value = 3
   548  	}
   549  	w.writeBits(value, 3)
   550  }
   551  
   552  // writeBlock will write a block of tokens with the smallest encoding.
   553  // The original input can be supplied, and if the huffman encoded data
   554  // is larger than the original bytes, the data will be written as a
   555  // stored block.
   556  // If the input is nil, the tokens will always be Huffman encoded.
   557  func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
   558  	if w.err != nil {
   559  		return
   560  	}
   561  
   562  	tokens.AddEOB()
   563  	if w.lastHeader > 0 {
   564  		// We owe an EOB
   565  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   566  		w.lastHeader = 0
   567  	}
   568  	numLiterals, numOffsets := w.indexTokens(tokens, false)
   569  	w.generate()
   570  	var extraBits int
   571  	storedSize, storable := w.storedSize(input)
   572  	if storable {
   573  		extraBits = w.extraBitSize()
   574  	}
   575  
   576  	// Figure out smallest code.
   577  	// Fixed Huffman baseline.
   578  	var literalEncoding = fixedLiteralEncoding
   579  	var offsetEncoding = fixedOffsetEncoding
   580  	var size = math.MaxInt32
   581  	if tokens.n < maxPredefinedTokens {
   582  		size = w.fixedSize(extraBits)
   583  	}
   584  
   585  	// Dynamic Huffman?
   586  	var numCodegens int
   587  
   588  	// Generate codegen and codegenFrequencies, which indicates how to encode
   589  	// the literalEncoding and the offsetEncoding.
   590  	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
   591  	w.codegenEncoding.generate(w.codegenFreq[:], 7)
   592  	dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
   593  
   594  	if dynamicSize < size {
   595  		size = dynamicSize
   596  		literalEncoding = w.literalEncoding
   597  		offsetEncoding = w.offsetEncoding
   598  	}
   599  
   600  	// Stored bytes?
   601  	if storable && storedSize <= size {
   602  		w.writeStoredHeader(len(input), eof)
   603  		w.writeBytes(input)
   604  		return
   605  	}
   606  
   607  	// Huffman.
   608  	if literalEncoding == fixedLiteralEncoding {
   609  		w.writeFixedHeader(eof)
   610  	} else {
   611  		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
   612  	}
   613  
   614  	// Write the tokens.
   615  	w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
   616  }
   617  
   618  // writeBlockDynamic encodes a block using a dynamic Huffman table.
   619  // This should be used if the symbols used have a disproportionate
   620  // histogram distribution.
   621  // If input is supplied and the compression savings are below 1/16th of the
   622  // input size the block is stored.
   623  func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
   624  	if w.err != nil {
   625  		return
   626  	}
   627  
   628  	sync = sync || eof
   629  	if sync {
   630  		tokens.AddEOB()
   631  	}
   632  
   633  	// We cannot reuse pure huffman table, and must mark as EOF.
   634  	if (w.lastHuffMan || eof) && w.lastHeader > 0 {
   635  		// We will not try to reuse.
   636  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   637  		w.lastHeader = 0
   638  		w.lastHuffMan = false
   639  	}
   640  
   641  	// fillReuse enables filling of empty values.
   642  	// This will make encodings always reusable without testing.
   643  	// However, this does not appear to benefit on most cases.
   644  	const fillReuse = false
   645  
   646  	// Check if we can reuse...
   647  	if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) {
   648  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
   649  		w.lastHeader = 0
   650  	}
   651  
   652  	numLiterals, numOffsets := w.indexTokens(tokens, !sync)
   653  	extraBits := 0
   654  	ssize, storable := w.storedSize(input)
   655  
   656  	const usePrefs = true
   657  	if storable || w.lastHeader > 0 {
   658  		extraBits = w.extraBitSize()
   659  	}
   660  
   661  	var size int
   662  
   663  	// Check if we should reuse.
   664  	if w.lastHeader > 0 {
   665  		// Estimate size for using a new table.
   666  		// Use the previous header size as the best estimate.
   667  		newSize := w.lastHeader + tokens.EstimatedBits()
   668  		newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty
   669  
   670  		// The estimated size is calculated as an optimal table.
   671  		// We add a penalty to make it more realistic and re-use a bit more.
   672  		reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits
   673  
   674  		// Check if a new table is better.
   675  		if newSize < reuseSize {
   676  			// Write the EOB we owe.
   677  			w.writeCode(w.literalEncoding.codes[endBlockMarker])
   678  			size = newSize
   679  			w.lastHeader = 0
   680  		} else {
   681  			size = reuseSize
   682  		}
   683  
   684  		if tokens.n < maxPredefinedTokens {
   685  			if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
   686  				// Check if we get a reasonable size decrease.
   687  				if storable && ssize <= size {
   688  					w.writeStoredHeader(len(input), eof)
   689  					w.writeBytes(input)
   690  					return
   691  				}
   692  				w.writeFixedHeader(eof)
   693  				if !sync {
   694  					tokens.AddEOB()
   695  				}
   696  				w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
   697  				return
   698  			}
   699  		}
   700  		// Check if we get a reasonable size decrease.
   701  		if storable && ssize <= size {
   702  			w.writeStoredHeader(len(input), eof)
   703  			w.writeBytes(input)
   704  			return
   705  		}
   706  	}
   707  
   708  	// We want a new block/table
   709  	if w.lastHeader == 0 {
   710  		if fillReuse && !sync {
   711  			w.fillTokens()
   712  			numLiterals, numOffsets = maxNumLit, maxNumDist
   713  		} else {
   714  			w.literalFreq[endBlockMarker] = 1
   715  		}
   716  
   717  		w.generate()
   718  		// Generate codegen and codegenFrequencies, which indicates how to encode
   719  		// the literalEncoding and the offsetEncoding.
   720  		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
   721  		w.codegenEncoding.generate(w.codegenFreq[:], 7)
   722  
   723  		var numCodegens int
   724  		if fillReuse && !sync {
   725  			// Reindex for accurate size...
   726  			w.indexTokens(tokens, true)
   727  		}
   728  		size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
   729  
   730  		// Store predefined, if we don't get a reasonable improvement.
   731  		if tokens.n < maxPredefinedTokens {
   732  			if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
   733  				// Store bytes, if we don't get an improvement.
   734  				if storable && ssize <= preSize {
   735  					w.writeStoredHeader(len(input), eof)
   736  					w.writeBytes(input)
   737  					return
   738  				}
   739  				w.writeFixedHeader(eof)
   740  				if !sync {
   741  					tokens.AddEOB()
   742  				}
   743  				w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
   744  				return
   745  			}
   746  		}
   747  
   748  		if storable && ssize <= size {
   749  			// Store bytes, if we don't get an improvement.
   750  			w.writeStoredHeader(len(input), eof)
   751  			w.writeBytes(input)
   752  			return
   753  		}
   754  
   755  		// Write Huffman table.
   756  		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
   757  		if !sync {
   758  			w.lastHeader, _ = w.headerSize()
   759  		}
   760  		w.lastHuffMan = false
   761  	}
   762  
   763  	if sync {
   764  		w.lastHeader = 0
   765  	}
   766  	// Write the tokens.
   767  	w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
   768  }
   769  
   770  func (w *huffmanBitWriter) fillTokens() {
   771  	for i, v := range w.literalFreq[:literalCount] {
   772  		if v == 0 {
   773  			w.literalFreq[i] = 1
   774  		}
   775  	}
   776  	for i, v := range w.offsetFreq[:offsetCodeCount] {
   777  		if v == 0 {
   778  			w.offsetFreq[i] = 1
   779  		}
   780  	}
   781  }
   782  
   783  // indexTokens indexes a slice of tokens, and updates
   784  // literalFreq and offsetFreq, and generates literalEncoding
   785  // and offsetEncoding.
   786  // The number of literal and offset tokens is returned.
   787  func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
   788  	//copy(w.literalFreq[:], t.litHist[:])
   789  	*(*[256]uint16)(w.literalFreq[:]) = t.litHist
   790  	//copy(w.literalFreq[256:], t.extraHist[:])
   791  	*(*[32]uint16)(w.literalFreq[256:]) = t.extraHist
   792  	w.offsetFreq = t.offHist
   793  
   794  	if t.n == 0 {
   795  		return
   796  	}
   797  	if filled {
   798  		return maxNumLit, maxNumDist
   799  	}
   800  	// get the number of literals
   801  	numLiterals = len(w.literalFreq)
   802  	for w.literalFreq[numLiterals-1] == 0 {
   803  		numLiterals--
   804  	}
   805  	// get the number of offsets
   806  	numOffsets = len(w.offsetFreq)
   807  	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
   808  		numOffsets--
   809  	}
   810  	if numOffsets == 0 {
   811  		// We haven't found a single match. If we want to go with the dynamic encoding,
   812  		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
   813  		w.offsetFreq[0] = 1
   814  		numOffsets = 1
   815  	}
   816  	return
   817  }
   818  
   819  func (w *huffmanBitWriter) generate() {
   820  	w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
   821  	w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
   822  }
   823  
   824  // writeTokens writes a slice of tokens to the output.
   825  // codes for literal and offset encoding must be supplied.
   826  func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
   827  	if w.err != nil {
   828  		return
   829  	}
   830  	if len(tokens) == 0 {
   831  		return
   832  	}
   833  
   834  	// Only last token should be endBlockMarker.
   835  	var deferEOB bool
   836  	if tokens[len(tokens)-1] == endBlockMarker {
   837  		tokens = tokens[:len(tokens)-1]
   838  		deferEOB = true
   839  	}
   840  
   841  	// Create slices up to the next power of two to avoid bounds checks.
   842  	lits := leCodes[:256]
   843  	offs := oeCodes[:32]
   844  	lengths := leCodes[lengthCodesStart:]
   845  	lengths = lengths[:32]
   846  
   847  	// Go 1.16 LOVES having these on stack.
   848  	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
   849  
   850  	for _, t := range tokens {
   851  		if t < 256 {
   852  			//w.writeCode(lits[t.literal()])
   853  			c := lits[t]
   854  			bits |= c.code64() << (nbits & 63)
   855  			nbits += c.len()
   856  			if nbits >= 48 {
   857  				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   858  				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
   859  				bits >>= 48
   860  				nbits -= 48
   861  				nbytes += 6
   862  				if nbytes >= bufferFlushSize {
   863  					if w.err != nil {
   864  						nbytes = 0
   865  						return
   866  					}
   867  					_, w.err = w.writer.Write(w.bytes[:nbytes])
   868  					nbytes = 0
   869  				}
   870  			}
   871  			continue
   872  		}
   873  
   874  		// Write the length
   875  		length := t.length()
   876  		lengthCode := lengthCode(length) & 31
   877  		if false {
   878  			w.writeCode(lengths[lengthCode])
   879  		} else {
   880  			// inlined
   881  			c := lengths[lengthCode]
   882  			bits |= c.code64() << (nbits & 63)
   883  			nbits += c.len()
   884  			if nbits >= 48 {
   885  				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   886  				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
   887  				bits >>= 48
   888  				nbits -= 48
   889  				nbytes += 6
   890  				if nbytes >= bufferFlushSize {
   891  					if w.err != nil {
   892  						nbytes = 0
   893  						return
   894  					}
   895  					_, w.err = w.writer.Write(w.bytes[:nbytes])
   896  					nbytes = 0
   897  				}
   898  			}
   899  		}
   900  
   901  		if lengthCode >= lengthExtraBitsMinCode {
   902  			extraLengthBits := lengthExtraBits[lengthCode]
   903  			//w.writeBits(extraLength, extraLengthBits)
   904  			extraLength := int32(length - lengthBase[lengthCode])
   905  			bits |= uint64(extraLength) << (nbits & 63)
   906  			nbits += extraLengthBits
   907  			if nbits >= 48 {
   908  				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   909  				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
   910  				bits >>= 48
   911  				nbits -= 48
   912  				nbytes += 6
   913  				if nbytes >= bufferFlushSize {
   914  					if w.err != nil {
   915  						nbytes = 0
   916  						return
   917  					}
   918  					_, w.err = w.writer.Write(w.bytes[:nbytes])
   919  					nbytes = 0
   920  				}
   921  			}
   922  		}
   923  		// Write the offset
   924  		offset := t.offset()
   925  		offsetCode := (offset >> 16) & 31
   926  		if false {
   927  			w.writeCode(offs[offsetCode])
   928  		} else {
   929  			// inlined
   930  			c := offs[offsetCode]
   931  			bits |= c.code64() << (nbits & 63)
   932  			nbits += c.len()
   933  			if nbits >= 48 {
   934  				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   935  				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
   936  				bits >>= 48
   937  				nbits -= 48
   938  				nbytes += 6
   939  				if nbytes >= bufferFlushSize {
   940  					if w.err != nil {
   941  						nbytes = 0
   942  						return
   943  					}
   944  					_, w.err = w.writer.Write(w.bytes[:nbytes])
   945  					nbytes = 0
   946  				}
   947  			}
   948  		}
   949  
   950  		if offsetCode >= offsetExtraBitsMinCode {
   951  			offsetComb := offsetCombined[offsetCode]
   952  			//w.writeBits(extraOffset, extraOffsetBits)
   953  			bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
   954  			nbits += uint8(offsetComb)
   955  			if nbits >= 48 {
   956  				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
   957  				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
   958  				bits >>= 48
   959  				nbits -= 48
   960  				nbytes += 6
   961  				if nbytes >= bufferFlushSize {
   962  					if w.err != nil {
   963  						nbytes = 0
   964  						return
   965  					}
   966  					_, w.err = w.writer.Write(w.bytes[:nbytes])
   967  					nbytes = 0
   968  				}
   969  			}
   970  		}
   971  	}
   972  	// Restore...
   973  	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
   974  
   975  	if deferEOB {
   976  		w.writeCode(leCodes[endBlockMarker])
   977  	}
   978  }
   979  
   980  // huffOffset is a static offset encoder used for huffman only encoding.
   981  // It can be reused since we will not be encoding offset values.
   982  var huffOffset *huffmanEncoder
   983  
   984  func init() {
   985  	w := newHuffmanBitWriter(nil)
   986  	w.offsetFreq[0] = 1
   987  	huffOffset = newHuffmanEncoder(offsetCodeCount)
   988  	huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
   989  }
   990  
   991  // writeBlockHuff encodes a block of bytes as either
   992  // Huffman encoded literals or uncompressed bytes if the
   993  // results only gains very little from compression.
   994  func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
   995  	if w.err != nil {
   996  		return
   997  	}
   998  
   999  	// Clear histogram
  1000  	for i := range w.literalFreq[:] {
  1001  		w.literalFreq[i] = 0
  1002  	}
  1003  	if !w.lastHuffMan {
  1004  		for i := range w.offsetFreq[:] {
  1005  			w.offsetFreq[i] = 0
  1006  		}
  1007  	}
  1008  
  1009  	const numLiterals = endBlockMarker + 1
  1010  	const numOffsets = 1
  1011  
  1012  	// Add everything as literals
  1013  	// We have to estimate the header size.
  1014  	// Assume header is around 70 bytes:
  1015  	// https://stackoverflow.com/a/25454430
  1016  	const guessHeaderSizeBits = 70 * 8
  1017  	histogram(input, w.literalFreq[:numLiterals])
  1018  	ssize, storable := w.storedSize(input)
  1019  	if storable && len(input) > 1024 {
  1020  		// Quick check for incompressible content.
  1021  		abs := float64(0)
  1022  		avg := float64(len(input)) / 256
  1023  		max := float64(len(input) * 2)
  1024  		for _, v := range w.literalFreq[:256] {
  1025  			diff := float64(v) - avg
  1026  			abs += diff * diff
  1027  			if abs > max {
  1028  				break
  1029  			}
  1030  		}
  1031  		if abs < max {
  1032  			if debugDeflate {
  1033  				fmt.Println("stored", abs, "<", max)
  1034  			}
  1035  			// No chance we can compress this...
  1036  			w.writeStoredHeader(len(input), eof)
  1037  			w.writeBytes(input)
  1038  			return
  1039  		}
  1040  	}
  1041  	w.literalFreq[endBlockMarker] = 1
  1042  	w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
  1043  	estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
  1044  	if estBits < math.MaxInt32 {
  1045  		estBits += w.lastHeader
  1046  		if w.lastHeader == 0 {
  1047  			estBits += guessHeaderSizeBits
  1048  		}
  1049  		estBits += estBits >> w.logNewTablePenalty
  1050  	}
  1051  
  1052  	// Store bytes, if we don't get a reasonable improvement.
  1053  	if storable && ssize <= estBits {
  1054  		if debugDeflate {
  1055  			fmt.Println("stored,", ssize, "<=", estBits)
  1056  		}
  1057  		w.writeStoredHeader(len(input), eof)
  1058  		w.writeBytes(input)
  1059  		return
  1060  	}
  1061  
  1062  	if w.lastHeader > 0 {
  1063  		reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])
  1064  
  1065  		if estBits < reuseSize {
  1066  			if debugDeflate {
  1067  				fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes")
  1068  			}
  1069  			// We owe an EOB
  1070  			w.writeCode(w.literalEncoding.codes[endBlockMarker])
  1071  			w.lastHeader = 0
  1072  		} else if debugDeflate {
  1073  			fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
  1074  		}
  1075  	}
  1076  
  1077  	count := 0
  1078  	if w.lastHeader == 0 {
  1079  		// Use the temp encoding, so swap.
  1080  		w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
  1081  		// Generate codegen and codegenFrequencies, which indicates how to encode
  1082  		// the literalEncoding and the offsetEncoding.
  1083  		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
  1084  		w.codegenEncoding.generate(w.codegenFreq[:], 7)
  1085  		numCodegens := w.codegens()
  1086  
  1087  		// Huffman.
  1088  		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
  1089  		w.lastHuffMan = true
  1090  		w.lastHeader, _ = w.headerSize()
  1091  		if debugDeflate {
  1092  			count += w.lastHeader
  1093  			fmt.Println("header:", count/8)
  1094  		}
  1095  	}
  1096  
  1097  	encoding := w.literalEncoding.codes[:256]
  1098  	// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
  1099  	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
  1100  
  1101  	if debugDeflate {
  1102  		count -= int(nbytes)*8 + int(nbits)
  1103  	}
  1104  	// Unroll, write 3 codes/loop.
  1105  	// Fastest number of unrolls.
  1106  	for len(input) > 3 {
  1107  		// We must have at least 48 bits free.
  1108  		if nbits >= 8 {
  1109  			n := nbits >> 3
  1110  			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
  1111  			bits >>= (n * 8) & 63
  1112  			nbits -= n * 8
  1113  			nbytes += n
  1114  		}
  1115  		if nbytes >= bufferFlushSize {
  1116  			if w.err != nil {
  1117  				nbytes = 0
  1118  				return
  1119  			}
  1120  			if debugDeflate {
  1121  				count += int(nbytes) * 8
  1122  			}
  1123  			_, w.err = w.writer.Write(w.bytes[:nbytes])
  1124  			nbytes = 0
  1125  		}
  1126  		a, b := encoding[input[0]], encoding[input[1]]
  1127  		bits |= a.code64() << (nbits & 63)
  1128  		bits |= b.code64() << ((nbits + a.len()) & 63)
  1129  		c := encoding[input[2]]
  1130  		nbits += b.len() + a.len()
  1131  		bits |= c.code64() << (nbits & 63)
  1132  		nbits += c.len()
  1133  		input = input[3:]
  1134  	}
  1135  
  1136  	// Remaining...
  1137  	for _, t := range input {
  1138  		if nbits >= 48 {
  1139  			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
  1140  			//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
  1141  			bits >>= 48
  1142  			nbits -= 48
  1143  			nbytes += 6
  1144  			if nbytes >= bufferFlushSize {
  1145  				if w.err != nil {
  1146  					nbytes = 0
  1147  					return
  1148  				}
  1149  				if debugDeflate {
  1150  					count += int(nbytes) * 8
  1151  				}
  1152  				_, w.err = w.writer.Write(w.bytes[:nbytes])
  1153  				nbytes = 0
  1154  			}
  1155  		}
  1156  		// Bitwriting inlined, ~30% speedup
  1157  		c := encoding[t]
  1158  		bits |= c.code64() << (nbits & 63)
  1159  
  1160  		nbits += c.len()
  1161  		if debugDeflate {
  1162  			count += int(c.len())
  1163  		}
  1164  	}
  1165  	// Restore...
  1166  	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
  1167  
  1168  	if debugDeflate {
  1169  		nb := count + int(nbytes)*8 + int(nbits)
  1170  		fmt.Println("wrote", nb, "bits,", nb/8, "bytes.")
  1171  	}
  1172  	// Flush if needed to have space.
  1173  	if w.nbits >= 48 {
  1174  		w.writeOutBits()
  1175  	}
  1176  
  1177  	if eof || sync {
  1178  		w.writeCode(w.literalEncoding.codes[endBlockMarker])
  1179  		w.lastHeader = 0
  1180  		w.lastHuffMan = false
  1181  	}
  1182  }
  1183
View as plain text