blockenc.go

Documentation: github.com/klauspost/compress/zstd

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"math"
    11  	"math/bits"
    12  
    13  	"github.com/klauspost/compress/huff0"
    14  )
    15  
    16  type blockEnc struct {
    17  	size       int
    18  	literals   []byte
    19  	sequences  []seq
    20  	coders     seqCoders
    21  	litEnc     *huff0.Scratch
    22  	dictLitEnc *huff0.Scratch
    23  	wr         bitWriter
    24  
    25  	extraLits         int
    26  	output            []byte
    27  	recentOffsets     [3]uint32
    28  	prevRecentOffsets [3]uint32
    29  
    30  	last   bool
    31  	lowMem bool
    32  }
    33  
    34  // init should be used once the block has been created.
    35  // If called more than once, the effect is the same as calling reset.
    36  func (b *blockEnc) init() {
    37  	if b.lowMem {
    38  		// 1K literals
    39  		if cap(b.literals) < 1<<10 {
    40  			b.literals = make([]byte, 0, 1<<10)
    41  		}
    42  		const defSeqs = 20
    43  		if cap(b.sequences) < defSeqs {
    44  			b.sequences = make([]seq, 0, defSeqs)
    45  		}
    46  		// 1K
    47  		if cap(b.output) < 1<<10 {
    48  			b.output = make([]byte, 0, 1<<10)
    49  		}
    50  	} else {
    51  		if cap(b.literals) < maxCompressedBlockSize {
    52  			b.literals = make([]byte, 0, maxCompressedBlockSize)
    53  		}
    54  		const defSeqs = 2000
    55  		if cap(b.sequences) < defSeqs {
    56  			b.sequences = make([]seq, 0, defSeqs)
    57  		}
    58  		if cap(b.output) < maxCompressedBlockSize {
    59  			b.output = make([]byte, 0, maxCompressedBlockSize)
    60  		}
    61  	}
    62  
    63  	if b.coders.mlEnc == nil {
    64  		b.coders.mlEnc = &fseEncoder{}
    65  		b.coders.mlPrev = &fseEncoder{}
    66  		b.coders.ofEnc = &fseEncoder{}
    67  		b.coders.ofPrev = &fseEncoder{}
    68  		b.coders.llEnc = &fseEncoder{}
    69  		b.coders.llPrev = &fseEncoder{}
    70  	}
    71  	b.litEnc = &huff0.Scratch{WantLogLess: 4}
    72  	b.reset(nil)
    73  }
    74  
    75  // initNewEncode can be used to reset offsets and encoders to the initial state.
    76  func (b *blockEnc) initNewEncode() {
    77  	b.recentOffsets = [3]uint32{1, 4, 8}
    78  	b.litEnc.Reuse = huff0.ReusePolicyNone
    79  	b.coders.setPrev(nil, nil, nil)
    80  }
    81  
    82  // reset will reset the block for a new encode, but in the same stream,
    83  // meaning that state will be carried over, but the block content is reset.
    84  // If a previous block is provided, the recent offsets are carried over.
    85  func (b *blockEnc) reset(prev *blockEnc) {
    86  	b.extraLits = 0
    87  	b.literals = b.literals[:0]
    88  	b.size = 0
    89  	b.sequences = b.sequences[:0]
    90  	b.output = b.output[:0]
    91  	b.last = false
    92  	if prev != nil {
    93  		b.recentOffsets = prev.prevRecentOffsets
    94  	}
    95  	b.dictLitEnc = nil
    96  }
    97  
    98  // reset will reset the block for a new encode, but in the same stream,
    99  // meaning that state will be carried over, but the block content is reset.
   100  // If a previous block is provided, the recent offsets are carried over.
   101  func (b *blockEnc) swapEncoders(prev *blockEnc) {
   102  	b.coders.swap(&prev.coders)
   103  	b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
   104  }
   105  
   106  // blockHeader contains the information for a block header.
   107  type blockHeader uint32
   108  
   109  // setLast sets the 'last' indicator on a block.
   110  func (h *blockHeader) setLast(b bool) {
   111  	if b {
   112  		*h = *h | 1
   113  	} else {
   114  		const mask = (1 << 24) - 2
   115  		*h = *h & mask
   116  	}
   117  }
   118  
   119  // setSize will store the compressed size of a block.
   120  func (h *blockHeader) setSize(v uint32) {
   121  	const mask = 7
   122  	*h = (*h)&mask | blockHeader(v<<3)
   123  }
   124  
   125  // setType sets the block type.
   126  func (h *blockHeader) setType(t blockType) {
   127  	const mask = 1 | (((1 << 24) - 1) ^ 7)
   128  	*h = (*h & mask) | blockHeader(t<<1)
   129  }
   130  
   131  // appendTo will append the block header to a slice.
   132  func (h blockHeader) appendTo(b []byte) []byte {
   133  	return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
   134  }
   135  
   136  // String returns a string representation of the block.
   137  func (h blockHeader) String() string {
   138  	return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
   139  }
   140  
   141  // literalsHeader contains literals header information.
   142  type literalsHeader uint64
   143  
   144  // setType can be used to set the type of literal block.
   145  func (h *literalsHeader) setType(t literalsBlockType) {
   146  	const mask = math.MaxUint64 - 3
   147  	*h = (*h & mask) | literalsHeader(t)
   148  }
   149  
   150  // setSize can be used to set a single size, for uncompressed and RLE content.
   151  func (h *literalsHeader) setSize(regenLen int) {
   152  	inBits := bits.Len32(uint32(regenLen))
   153  	// Only retain 2 bits
   154  	const mask = 3
   155  	lh := uint64(*h & mask)
   156  	switch {
   157  	case inBits < 5:
   158  		lh |= (uint64(regenLen) << 3) | (1 << 60)
   159  		if debugEncoder {
   160  			got := int(lh>>3) & 0xff
   161  			if got != regenLen {
   162  				panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
   163  			}
   164  		}
   165  	case inBits < 12:
   166  		lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
   167  	case inBits < 20:
   168  		lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
   169  	default:
   170  		panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
   171  	}
   172  	*h = literalsHeader(lh)
   173  }
   174  
   175  // setSizes will set the size of a compressed literals section and the input length.
   176  func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
   177  	compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
   178  	// Only retain 2 bits
   179  	const mask = 3
   180  	lh := uint64(*h & mask)
   181  	switch {
   182  	case compBits <= 10 && inBits <= 10:
   183  		if !single {
   184  			lh |= 1 << 2
   185  		}
   186  		lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
   187  		if debugEncoder {
   188  			const mmask = (1 << 24) - 1
   189  			n := (lh >> 4) & mmask
   190  			if int(n&1023) != inLen {
   191  				panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
   192  			}
   193  			if int(n>>10) != compLen {
   194  				panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
   195  			}
   196  		}
   197  	case compBits <= 14 && inBits <= 14:
   198  		lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
   199  		if single {
   200  			panic("single stream used with more than 10 bits length.")
   201  		}
   202  	case compBits <= 18 && inBits <= 18:
   203  		lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
   204  		if single {
   205  			panic("single stream used with more than 10 bits length.")
   206  		}
   207  	default:
   208  		panic("internal error: block too big")
   209  	}
   210  	*h = literalsHeader(lh)
   211  }
   212  
   213  // appendTo will append the literals header to a byte slice.
   214  func (h literalsHeader) appendTo(b []byte) []byte {
   215  	size := uint8(h >> 60)
   216  	switch size {
   217  	case 1:
   218  		b = append(b, uint8(h))
   219  	case 2:
   220  		b = append(b, uint8(h), uint8(h>>8))
   221  	case 3:
   222  		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
   223  	case 4:
   224  		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
   225  	case 5:
   226  		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
   227  	default:
   228  		panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
   229  	}
   230  	return b
   231  }
   232  
   233  // size returns the output size with currently set values.
   234  func (h literalsHeader) size() int {
   235  	return int(h >> 60)
   236  }
   237  
   238  func (h literalsHeader) String() string {
   239  	return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
   240  }
   241  
   242  // pushOffsets will push the recent offsets to the backup store.
   243  func (b *blockEnc) pushOffsets() {
   244  	b.prevRecentOffsets = b.recentOffsets
   245  }
   246  
   247  // pushOffsets will push the recent offsets to the backup store.
   248  func (b *blockEnc) popOffsets() {
   249  	b.recentOffsets = b.prevRecentOffsets
   250  }
   251  
   252  // matchOffset will adjust recent offsets and return the adjusted one,
   253  // if it matches a previous offset.
   254  func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
   255  	// Check if offset is one of the recent offsets.
   256  	// Adjusts the output offset accordingly.
   257  	// Gives a tiny bit of compression, typically around 1%.
   258  	if true {
   259  		if lits > 0 {
   260  			switch offset {
   261  			case b.recentOffsets[0]:
   262  				offset = 1
   263  			case b.recentOffsets[1]:
   264  				b.recentOffsets[1] = b.recentOffsets[0]
   265  				b.recentOffsets[0] = offset
   266  				offset = 2
   267  			case b.recentOffsets[2]:
   268  				b.recentOffsets[2] = b.recentOffsets[1]
   269  				b.recentOffsets[1] = b.recentOffsets[0]
   270  				b.recentOffsets[0] = offset
   271  				offset = 3
   272  			default:
   273  				b.recentOffsets[2] = b.recentOffsets[1]
   274  				b.recentOffsets[1] = b.recentOffsets[0]
   275  				b.recentOffsets[0] = offset
   276  				offset += 3
   277  			}
   278  		} else {
   279  			switch offset {
   280  			case b.recentOffsets[1]:
   281  				b.recentOffsets[1] = b.recentOffsets[0]
   282  				b.recentOffsets[0] = offset
   283  				offset = 1
   284  			case b.recentOffsets[2]:
   285  				b.recentOffsets[2] = b.recentOffsets[1]
   286  				b.recentOffsets[1] = b.recentOffsets[0]
   287  				b.recentOffsets[0] = offset
   288  				offset = 2
   289  			case b.recentOffsets[0] - 1:
   290  				b.recentOffsets[2] = b.recentOffsets[1]
   291  				b.recentOffsets[1] = b.recentOffsets[0]
   292  				b.recentOffsets[0] = offset
   293  				offset = 3
   294  			default:
   295  				b.recentOffsets[2] = b.recentOffsets[1]
   296  				b.recentOffsets[1] = b.recentOffsets[0]
   297  				b.recentOffsets[0] = offset
   298  				offset += 3
   299  			}
   300  		}
   301  	} else {
   302  		offset += 3
   303  	}
   304  	return offset
   305  }
   306  
   307  // encodeRaw can be used to set the output to a raw representation of supplied bytes.
   308  func (b *blockEnc) encodeRaw(a []byte) {
   309  	var bh blockHeader
   310  	bh.setLast(b.last)
   311  	bh.setSize(uint32(len(a)))
   312  	bh.setType(blockTypeRaw)
   313  	b.output = bh.appendTo(b.output[:0])
   314  	b.output = append(b.output, a...)
   315  	if debugEncoder {
   316  		println("Adding RAW block, length", len(a), "last:", b.last)
   317  	}
   318  }
   319  
   320  // encodeRaw can be used to set the output to a raw representation of supplied bytes.
   321  func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
   322  	var bh blockHeader
   323  	bh.setLast(b.last)
   324  	bh.setSize(uint32(len(src)))
   325  	bh.setType(blockTypeRaw)
   326  	dst = bh.appendTo(dst)
   327  	dst = append(dst, src...)
   328  	if debugEncoder {
   329  		println("Adding RAW block, length", len(src), "last:", b.last)
   330  	}
   331  	return dst
   332  }
   333  
   334  // encodeLits can be used if the block is only litLen.
   335  func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
   336  	var bh blockHeader
   337  	bh.setLast(b.last)
   338  	bh.setSize(uint32(len(lits)))
   339  
   340  	// Don't compress extremely small blocks
   341  	if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
   342  		if debugEncoder {
   343  			println("Adding RAW block, length", len(lits), "last:", b.last)
   344  		}
   345  		bh.setType(blockTypeRaw)
   346  		b.output = bh.appendTo(b.output)
   347  		b.output = append(b.output, lits...)
   348  		return nil
   349  	}
   350  
   351  	var (
   352  		out            []byte
   353  		reUsed, single bool
   354  		err            error
   355  	)
   356  	if b.dictLitEnc != nil {
   357  		b.litEnc.TransferCTable(b.dictLitEnc)
   358  		b.litEnc.Reuse = huff0.ReusePolicyAllow
   359  		b.dictLitEnc = nil
   360  	}
   361  	if len(lits) >= 1024 {
   362  		// Use 4 Streams.
   363  		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
   364  	} else if len(lits) > 16 {
   365  		// Use 1 stream
   366  		single = true
   367  		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
   368  	} else {
   369  		err = huff0.ErrIncompressible
   370  	}
   371  	if err == nil && len(out)+5 > len(lits) {
   372  		// If we are close, we may still be worse or equal to raw.
   373  		var lh literalsHeader
   374  		lh.setSizes(len(out), len(lits), single)
   375  		if len(out)+lh.size() >= len(lits) {
   376  			err = huff0.ErrIncompressible
   377  		}
   378  	}
   379  	switch err {
   380  	case huff0.ErrIncompressible:
   381  		if debugEncoder {
   382  			println("Adding RAW block, length", len(lits), "last:", b.last)
   383  		}
   384  		bh.setType(blockTypeRaw)
   385  		b.output = bh.appendTo(b.output)
   386  		b.output = append(b.output, lits...)
   387  		return nil
   388  	case huff0.ErrUseRLE:
   389  		if debugEncoder {
   390  			println("Adding RLE block, length", len(lits))
   391  		}
   392  		bh.setType(blockTypeRLE)
   393  		b.output = bh.appendTo(b.output)
   394  		b.output = append(b.output, lits[0])
   395  		return nil
   396  	case nil:
   397  	default:
   398  		return err
   399  	}
   400  	// Compressed...
   401  	// Now, allow reuse
   402  	b.litEnc.Reuse = huff0.ReusePolicyAllow
   403  	bh.setType(blockTypeCompressed)
   404  	var lh literalsHeader
   405  	if reUsed {
   406  		if debugEncoder {
   407  			println("Reused tree, compressed to", len(out))
   408  		}
   409  		lh.setType(literalsBlockTreeless)
   410  	} else {
   411  		if debugEncoder {
   412  			println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
   413  		}
   414  		lh.setType(literalsBlockCompressed)
   415  	}
   416  	// Set sizes
   417  	lh.setSizes(len(out), len(lits), single)
   418  	bh.setSize(uint32(len(out) + lh.size() + 1))
   419  
   420  	// Write block headers.
   421  	b.output = bh.appendTo(b.output)
   422  	b.output = lh.appendTo(b.output)
   423  	// Add compressed data.
   424  	b.output = append(b.output, out...)
   425  	// No sequences.
   426  	b.output = append(b.output, 0)
   427  	return nil
   428  }
   429  
   430  // encodeRLE will encode an RLE block.
   431  func (b *blockEnc) encodeRLE(val byte, length uint32) {
   432  	var bh blockHeader
   433  	bh.setLast(b.last)
   434  	bh.setSize(length)
   435  	bh.setType(blockTypeRLE)
   436  	b.output = bh.appendTo(b.output)
   437  	b.output = append(b.output, val)
   438  }
   439  
   440  // fuzzFseEncoder can be used to fuzz the FSE encoder.
   441  func fuzzFseEncoder(data []byte) int {
   442  	if len(data) > maxSequences || len(data) < 2 {
   443  		return 0
   444  	}
   445  	enc := fseEncoder{}
   446  	hist := enc.Histogram()
   447  	maxSym := uint8(0)
   448  	for i, v := range data {
   449  		v = v & 63
   450  		data[i] = v
   451  		hist[v]++
   452  		if v > maxSym {
   453  			maxSym = v
   454  		}
   455  	}
   456  	if maxSym == 0 {
   457  		// All 0
   458  		return 0
   459  	}
   460  	maxCount := func(a []uint32) int {
   461  		var max uint32
   462  		for _, v := range a {
   463  			if v > max {
   464  				max = v
   465  			}
   466  		}
   467  		return int(max)
   468  	}
   469  	cnt := maxCount(hist[:maxSym])
   470  	if cnt == len(data) {
   471  		// RLE
   472  		return 0
   473  	}
   474  	enc.HistogramFinished(maxSym, cnt)
   475  	err := enc.normalizeCount(len(data))
   476  	if err != nil {
   477  		return 0
   478  	}
   479  	_, err = enc.writeCount(nil)
   480  	if err != nil {
   481  		panic(err)
   482  	}
   483  	return 1
   484  }
   485  
   486  // encode will encode the block and append the output in b.output.
   487  // Previous offset codes must be pushed if more blocks are expected.
   488  func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
   489  	if len(b.sequences) == 0 {
   490  		return b.encodeLits(b.literals, rawAllLits)
   491  	}
   492  	if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 {
   493  		// Check common RLE cases.
   494  		seq := b.sequences[0]
   495  		if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 {
   496  			// Offset == 1 and 0 or 1 literals.
   497  			b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen)
   498  			return nil
   499  		}
   500  	}
   501  
   502  	// We want some difference to at least account for the headers.
   503  	saved := b.size - len(b.literals) - (b.size >> 6)
   504  	if saved < 16 {
   505  		if org == nil {
   506  			return errIncompressible
   507  		}
   508  		b.popOffsets()
   509  		return b.encodeLits(org, rawAllLits)
   510  	}
   511  
   512  	var bh blockHeader
   513  	var lh literalsHeader
   514  	bh.setLast(b.last)
   515  	bh.setType(blockTypeCompressed)
   516  	// Store offset of the block header. Needed when we know the size.
   517  	bhOffset := len(b.output)
   518  	b.output = bh.appendTo(b.output)
   519  
   520  	var (
   521  		out            []byte
   522  		reUsed, single bool
   523  		err            error
   524  	)
   525  	if b.dictLitEnc != nil {
   526  		b.litEnc.TransferCTable(b.dictLitEnc)
   527  		b.litEnc.Reuse = huff0.ReusePolicyAllow
   528  		b.dictLitEnc = nil
   529  	}
   530  	if len(b.literals) >= 1024 && !raw {
   531  		// Use 4 Streams.
   532  		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
   533  	} else if len(b.literals) > 16 && !raw {
   534  		// Use 1 stream
   535  		single = true
   536  		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
   537  	} else {
   538  		err = huff0.ErrIncompressible
   539  	}
   540  
   541  	if err == nil && len(out)+5 > len(b.literals) {
   542  		// If we are close, we may still be worse or equal to raw.
   543  		var lh literalsHeader
   544  		lh.setSize(len(b.literals))
   545  		szRaw := lh.size()
   546  		lh.setSizes(len(out), len(b.literals), single)
   547  		szComp := lh.size()
   548  		if len(out)+szComp >= len(b.literals)+szRaw {
   549  			err = huff0.ErrIncompressible
   550  		}
   551  	}
   552  	switch err {
   553  	case huff0.ErrIncompressible:
   554  		lh.setType(literalsBlockRaw)
   555  		lh.setSize(len(b.literals))
   556  		b.output = lh.appendTo(b.output)
   557  		b.output = append(b.output, b.literals...)
   558  		if debugEncoder {
   559  			println("Adding literals RAW, length", len(b.literals))
   560  		}
   561  	case huff0.ErrUseRLE:
   562  		lh.setType(literalsBlockRLE)
   563  		lh.setSize(len(b.literals))
   564  		b.output = lh.appendTo(b.output)
   565  		b.output = append(b.output, b.literals[0])
   566  		if debugEncoder {
   567  			println("Adding literals RLE")
   568  		}
   569  	case nil:
   570  		// Compressed litLen...
   571  		if reUsed {
   572  			if debugEncoder {
   573  				println("reused tree")
   574  			}
   575  			lh.setType(literalsBlockTreeless)
   576  		} else {
   577  			if debugEncoder {
   578  				println("new tree, size:", len(b.litEnc.OutTable))
   579  			}
   580  			lh.setType(literalsBlockCompressed)
   581  			if debugEncoder {
   582  				_, _, err := huff0.ReadTable(out, nil)
   583  				if err != nil {
   584  					panic(err)
   585  				}
   586  			}
   587  		}
   588  		lh.setSizes(len(out), len(b.literals), single)
   589  		if debugEncoder {
   590  			printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
   591  			println("Adding literal header:", lh)
   592  		}
   593  		b.output = lh.appendTo(b.output)
   594  		b.output = append(b.output, out...)
   595  		b.litEnc.Reuse = huff0.ReusePolicyAllow
   596  		if debugEncoder {
   597  			println("Adding literals compressed")
   598  		}
   599  	default:
   600  		if debugEncoder {
   601  			println("Adding literals ERROR:", err)
   602  		}
   603  		return err
   604  	}
   605  	// Sequence compression
   606  
   607  	// Write the number of sequences
   608  	switch {
   609  	case len(b.sequences) < 128:
   610  		b.output = append(b.output, uint8(len(b.sequences)))
   611  	case len(b.sequences) < 0x7f00: // TODO: this could be wrong
   612  		n := len(b.sequences)
   613  		b.output = append(b.output, 128+uint8(n>>8), uint8(n))
   614  	default:
   615  		n := len(b.sequences) - 0x7f00
   616  		b.output = append(b.output, 255, uint8(n), uint8(n>>8))
   617  	}
   618  	if debugEncoder {
   619  		println("Encoding", len(b.sequences), "sequences")
   620  	}
   621  	b.genCodes()
   622  	llEnc := b.coders.llEnc
   623  	ofEnc := b.coders.ofEnc
   624  	mlEnc := b.coders.mlEnc
   625  	err = llEnc.normalizeCount(len(b.sequences))
   626  	if err != nil {
   627  		return err
   628  	}
   629  	err = ofEnc.normalizeCount(len(b.sequences))
   630  	if err != nil {
   631  		return err
   632  	}
   633  	err = mlEnc.normalizeCount(len(b.sequences))
   634  	if err != nil {
   635  		return err
   636  	}
   637  
   638  	// Choose the best compression mode for each type.
   639  	// Will evaluate the new vs predefined and previous.
   640  	chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
   641  		// See if predefined/previous is better
   642  		hist := cur.count[:cur.symbolLen]
   643  		nSize := cur.approxSize(hist) + cur.maxHeaderSize()
   644  		predefSize := preDef.approxSize(hist)
   645  		prevSize := prev.approxSize(hist)
   646  
   647  		// Add a small penalty for new encoders.
   648  		// Don't bother with extremely small (<2 byte gains).
   649  		nSize = nSize + (nSize+2*8*16)>>4
   650  		switch {
   651  		case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
   652  			if debugEncoder {
   653  				println("Using predefined", predefSize>>3, "<=", nSize>>3)
   654  			}
   655  			return preDef, compModePredefined
   656  		case prevSize <= nSize:
   657  			if debugEncoder {
   658  				println("Using previous", prevSize>>3, "<=", nSize>>3)
   659  			}
   660  			return prev, compModeRepeat
   661  		default:
   662  			if debugEncoder {
   663  				println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
   664  				println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
   665  			}
   666  			return cur, compModeFSE
   667  		}
   668  	}
   669  
   670  	// Write compression mode
   671  	var mode uint8
   672  	if llEnc.useRLE {
   673  		mode |= uint8(compModeRLE) << 6
   674  		llEnc.setRLE(b.sequences[0].llCode)
   675  		if debugEncoder {
   676  			println("llEnc.useRLE")
   677  		}
   678  	} else {
   679  		var m seqCompMode
   680  		llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
   681  		mode |= uint8(m) << 6
   682  	}
   683  	if ofEnc.useRLE {
   684  		mode |= uint8(compModeRLE) << 4
   685  		ofEnc.setRLE(b.sequences[0].ofCode)
   686  		if debugEncoder {
   687  			println("ofEnc.useRLE")
   688  		}
   689  	} else {
   690  		var m seqCompMode
   691  		ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
   692  		mode |= uint8(m) << 4
   693  	}
   694  
   695  	if mlEnc.useRLE {
   696  		mode |= uint8(compModeRLE) << 2
   697  		mlEnc.setRLE(b.sequences[0].mlCode)
   698  		if debugEncoder {
   699  			println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
   700  		}
   701  	} else {
   702  		var m seqCompMode
   703  		mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
   704  		mode |= uint8(m) << 2
   705  	}
   706  	b.output = append(b.output, mode)
   707  	if debugEncoder {
   708  		printf("Compression modes: 0b%b", mode)
   709  	}
   710  	b.output, err = llEnc.writeCount(b.output)
   711  	if err != nil {
   712  		return err
   713  	}
   714  	start := len(b.output)
   715  	b.output, err = ofEnc.writeCount(b.output)
   716  	if err != nil {
   717  		return err
   718  	}
   719  	if false {
   720  		println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
   721  		fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
   722  		for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
   723  			fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
   724  		}
   725  	}
   726  	b.output, err = mlEnc.writeCount(b.output)
   727  	if err != nil {
   728  		return err
   729  	}
   730  
   731  	// Maybe in block?
   732  	wr := &b.wr
   733  	wr.reset(b.output)
   734  
   735  	var ll, of, ml cState
   736  
   737  	// Current sequence
   738  	seq := len(b.sequences) - 1
   739  	s := b.sequences[seq]
   740  	llEnc.setBits(llBitsTable[:])
   741  	mlEnc.setBits(mlBitsTable[:])
   742  	ofEnc.setBits(nil)
   743  
   744  	llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
   745  
   746  	// We have 3 bounds checks here (and in the loop).
   747  	// Since we are iterating backwards it is kinda hard to avoid.
   748  	llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
   749  	ll.init(wr, &llEnc.ct, llB)
   750  	of.init(wr, &ofEnc.ct, ofB)
   751  	wr.flush32()
   752  	ml.init(wr, &mlEnc.ct, mlB)
   753  
   754  	// Each of these lookups also generates a bounds check.
   755  	wr.addBits32NC(s.litLen, llB.outBits)
   756  	wr.addBits32NC(s.matchLen, mlB.outBits)
   757  	wr.flush32()
   758  	wr.addBits32NC(s.offset, ofB.outBits)
   759  	if debugSequences {
   760  		println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
   761  	}
   762  	seq--
   763  	// Store sequences in reverse...
   764  	for seq >= 0 {
   765  		s = b.sequences[seq]
   766  
   767  		ofB := ofTT[s.ofCode]
   768  		wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
   769  		//of.encode(ofB)
   770  		nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
   771  		dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
   772  		wr.addBits16NC(of.state, uint8(nbBitsOut))
   773  		of.state = of.stateTable[dstState]
   774  
   775  		// Accumulate extra bits.
   776  		outBits := ofB.outBits & 31
   777  		extraBits := uint64(s.offset & bitMask32[outBits])
   778  		extraBitsN := outBits
   779  
   780  		mlB := mlTT[s.mlCode]
   781  		//ml.encode(mlB)
   782  		nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
   783  		dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
   784  		wr.addBits16NC(ml.state, uint8(nbBitsOut))
   785  		ml.state = ml.stateTable[dstState]
   786  
   787  		outBits = mlB.outBits & 31
   788  		extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
   789  		extraBitsN += outBits
   790  
   791  		llB := llTT[s.llCode]
   792  		//ll.encode(llB)
   793  		nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
   794  		dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
   795  		wr.addBits16NC(ll.state, uint8(nbBitsOut))
   796  		ll.state = ll.stateTable[dstState]
   797  
   798  		outBits = llB.outBits & 31
   799  		extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
   800  		extraBitsN += outBits
   801  
   802  		wr.flush32()
   803  		wr.addBits64NC(extraBits, extraBitsN)
   804  
   805  		if debugSequences {
   806  			println("Encoded seq", seq, s)
   807  		}
   808  
   809  		seq--
   810  	}
   811  	ml.flush(mlEnc.actualTableLog)
   812  	of.flush(ofEnc.actualTableLog)
   813  	ll.flush(llEnc.actualTableLog)
   814  	wr.close()
   815  	b.output = wr.out
   816  
   817  	// Maybe even add a bigger margin.
   818  	if len(b.output)-3-bhOffset >= b.size {
   819  		// Discard and encode as raw block.
   820  		b.output = b.encodeRawTo(b.output[:bhOffset], org)
   821  		b.popOffsets()
   822  		b.litEnc.Reuse = huff0.ReusePolicyNone
   823  		return nil
   824  	}
   825  
   826  	// Size is output minus block header.
   827  	bh.setSize(uint32(len(b.output)-bhOffset) - 3)
   828  	if debugEncoder {
   829  		println("Rewriting block header", bh)
   830  	}
   831  	_ = bh.appendTo(b.output[bhOffset:bhOffset])
   832  	b.coders.setPrev(llEnc, mlEnc, ofEnc)
   833  	return nil
   834  }
   835  
   836  var errIncompressible = errors.New("incompressible")
   837  
   838  func (b *blockEnc) genCodes() {
   839  	if len(b.sequences) == 0 {
   840  		// nothing to do
   841  		return
   842  	}
   843  	if len(b.sequences) > math.MaxUint16 {
   844  		panic("can only encode up to 64K sequences")
   845  	}
   846  	// No bounds checks after here:
   847  	llH := b.coders.llEnc.Histogram()
   848  	ofH := b.coders.ofEnc.Histogram()
   849  	mlH := b.coders.mlEnc.Histogram()
   850  	for i := range llH {
   851  		llH[i] = 0
   852  	}
   853  	for i := range ofH {
   854  		ofH[i] = 0
   855  	}
   856  	for i := range mlH {
   857  		mlH[i] = 0
   858  	}
   859  
   860  	var llMax, ofMax, mlMax uint8
   861  	for i := range b.sequences {
   862  		seq := &b.sequences[i]
   863  		v := llCode(seq.litLen)
   864  		seq.llCode = v
   865  		llH[v]++
   866  		if v > llMax {
   867  			llMax = v
   868  		}
   869  
   870  		v = ofCode(seq.offset)
   871  		seq.ofCode = v
   872  		ofH[v]++
   873  		if v > ofMax {
   874  			ofMax = v
   875  		}
   876  
   877  		v = mlCode(seq.matchLen)
   878  		seq.mlCode = v
   879  		mlH[v]++
   880  		if v > mlMax {
   881  			mlMax = v
   882  			if debugAsserts && mlMax > maxMatchLengthSymbol {
   883  				panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
   884  			}
   885  		}
   886  	}
   887  	maxCount := func(a []uint32) int {
   888  		var max uint32
   889  		for _, v := range a {
   890  			if v > max {
   891  				max = v
   892  			}
   893  		}
   894  		return int(max)
   895  	}
   896  	if debugAsserts && mlMax > maxMatchLengthSymbol {
   897  		panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
   898  	}
   899  	if debugAsserts && ofMax > maxOffsetBits {
   900  		panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
   901  	}
   902  	if debugAsserts && llMax > maxLiteralLengthSymbol {
   903  		panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
   904  	}
   905  
   906  	b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
   907  	b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
   908  	b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
   909  }
   910
View as plain text