...

Source file src/github.com/klauspost/compress/zstd/enc_fast.go

Documentation: github.com/klauspost/compress/zstd

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import (
     8  	"fmt"
     9  )
    10  
    11  const (
    12  	tableBits        = 15                               // Bits used in the table
    13  	tableSize        = 1 << tableBits                   // Size of the table
    14  	tableShardCnt    = 1 << (tableBits - dictShardBits) // Number of shards in the table
    15  	tableShardSize   = tableSize / tableShardCnt        // Size of an individual shard
    16  	tableFastHashLen = 6
    17  	tableMask        = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
    18  	maxMatchLength   = 131074
    19  )
    20  
    21  type tableEntry struct {
    22  	val    uint32
    23  	offset int32
    24  }
    25  
    26  type fastEncoder struct {
    27  	fastBase
    28  	table [tableSize]tableEntry
    29  }
    30  
    31  type fastEncoderDict struct {
    32  	fastEncoder
    33  	dictTable       []tableEntry
    34  	tableShardDirty [tableShardCnt]bool
    35  	allDirty        bool
    36  }
    37  
    38  // Encode mimmics functionality in zstd_fast.c
    39  func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
    40  	const (
    41  		inputMargin            = 8
    42  		minNonLiteralBlockSize = 1 + 1 + inputMargin
    43  	)
    44  
    45  	// Protect against e.cur wraparound.
    46  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
    47  		if len(e.hist) == 0 {
    48  			for i := range e.table[:] {
    49  				e.table[i] = tableEntry{}
    50  			}
    51  			e.cur = e.maxMatchOff
    52  			break
    53  		}
    54  		// Shift down everything in the table that isn't already too far away.
    55  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
    56  		for i := range e.table[:] {
    57  			v := e.table[i].offset
    58  			if v < minOff {
    59  				v = 0
    60  			} else {
    61  				v = v - e.cur + e.maxMatchOff
    62  			}
    63  			e.table[i].offset = v
    64  		}
    65  		e.cur = e.maxMatchOff
    66  		break
    67  	}
    68  
    69  	s := e.addBlock(src)
    70  	blk.size = len(src)
    71  	if len(src) < minNonLiteralBlockSize {
    72  		blk.extraLits = len(src)
    73  		blk.literals = blk.literals[:len(src)]
    74  		copy(blk.literals, src)
    75  		return
    76  	}
    77  
    78  	// Override src
    79  	src = e.hist
    80  	sLimit := int32(len(src)) - inputMargin
    81  	// stepSize is the number of bytes to skip on every main loop iteration.
    82  	// It should be >= 2.
    83  	const stepSize = 2
    84  
    85  	// TEMPLATE
    86  	const hashLog = tableBits
    87  	// seems global, but would be nice to tweak.
    88  	const kSearchStrength = 6
    89  
    90  	// nextEmit is where in src the next emitLiteral should start from.
    91  	nextEmit := s
    92  	cv := load6432(src, s)
    93  
    94  	// Relative offsets
    95  	offset1 := int32(blk.recentOffsets[0])
    96  	offset2 := int32(blk.recentOffsets[1])
    97  
    98  	addLiterals := func(s *seq, until int32) {
    99  		if until == nextEmit {
   100  			return
   101  		}
   102  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   103  		s.litLen = uint32(until - nextEmit)
   104  	}
   105  	if debugEncoder {
   106  		println("recent offsets:", blk.recentOffsets)
   107  	}
   108  
   109  encodeLoop:
   110  	for {
   111  		// t will contain the match offset when we find one.
   112  		// When existing the search loop, we have already checked 4 bytes.
   113  		var t int32
   114  
   115  		// We will not use repeat offsets across blocks.
   116  		// By not using them for the first 3 matches
   117  		canRepeat := len(blk.sequences) > 2
   118  
   119  		for {
   120  			if debugAsserts && canRepeat && offset1 == 0 {
   121  				panic("offset0 was 0")
   122  			}
   123  
   124  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   125  			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
   126  			candidate := e.table[nextHash]
   127  			candidate2 := e.table[nextHash2]
   128  			repIndex := s - offset1 + 2
   129  
   130  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   131  			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
   132  
   133  			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
   134  				// Consider history as well.
   135  				var seq seq
   136  				length := 4 + e.matchlen(s+6, repIndex+4, src)
   137  				seq.matchLen = uint32(length - zstdMinMatch)
   138  
   139  				// We might be able to match backwards.
   140  				// Extend as long as we can.
   141  				start := s + 2
   142  				// We end the search early, so we don't risk 0 literals
   143  				// and have to do special offset treatment.
   144  				startLimit := nextEmit + 1
   145  
   146  				sMin := s - e.maxMatchOff
   147  				if sMin < 0 {
   148  					sMin = 0
   149  				}
   150  				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
   151  					repIndex--
   152  					start--
   153  					seq.matchLen++
   154  				}
   155  				addLiterals(&seq, start)
   156  
   157  				// rep 0
   158  				seq.offset = 1
   159  				if debugSequences {
   160  					println("repeat sequence", seq, "next s:", s)
   161  				}
   162  				blk.sequences = append(blk.sequences, seq)
   163  				s += length + 2
   164  				nextEmit = s
   165  				if s >= sLimit {
   166  					if debugEncoder {
   167  						println("repeat ended", s, length)
   168  
   169  					}
   170  					break encodeLoop
   171  				}
   172  				cv = load6432(src, s)
   173  				continue
   174  			}
   175  			coffset0 := s - (candidate.offset - e.cur)
   176  			coffset1 := s - (candidate2.offset - e.cur) + 1
   177  			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
   178  				// found a regular match
   179  				t = candidate.offset - e.cur
   180  				if debugAsserts && s <= t {
   181  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   182  				}
   183  				if debugAsserts && s-t > e.maxMatchOff {
   184  					panic("s - t >e.maxMatchOff")
   185  				}
   186  				break
   187  			}
   188  
   189  			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
   190  				// found a regular match
   191  				t = candidate2.offset - e.cur
   192  				s++
   193  				if debugAsserts && s <= t {
   194  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   195  				}
   196  				if debugAsserts && s-t > e.maxMatchOff {
   197  					panic("s - t >e.maxMatchOff")
   198  				}
   199  				if debugAsserts && t < 0 {
   200  					panic("t<0")
   201  				}
   202  				break
   203  			}
   204  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   205  			if s >= sLimit {
   206  				break encodeLoop
   207  			}
   208  			cv = load6432(src, s)
   209  		}
   210  		// A 4-byte match has been found. We'll later see if more than 4 bytes.
   211  		offset2 = offset1
   212  		offset1 = s - t
   213  
   214  		if debugAsserts && s <= t {
   215  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   216  		}
   217  
   218  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   219  			panic("invalid offset")
   220  		}
   221  
   222  		// Extend the 4-byte match as long as possible.
   223  		l := e.matchlen(s+4, t+4, src) + 4
   224  
   225  		// Extend backwards
   226  		tMin := s - e.maxMatchOff
   227  		if tMin < 0 {
   228  			tMin = 0
   229  		}
   230  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   231  			s--
   232  			t--
   233  			l++
   234  		}
   235  
   236  		// Write our sequence.
   237  		var seq seq
   238  		seq.litLen = uint32(s - nextEmit)
   239  		seq.matchLen = uint32(l - zstdMinMatch)
   240  		if seq.litLen > 0 {
   241  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   242  		}
   243  		// Don't use repeat offsets
   244  		seq.offset = uint32(s-t) + 3
   245  		s += l
   246  		if debugSequences {
   247  			println("sequence", seq, "next s:", s)
   248  		}
   249  		blk.sequences = append(blk.sequences, seq)
   250  		nextEmit = s
   251  		if s >= sLimit {
   252  			break encodeLoop
   253  		}
   254  		cv = load6432(src, s)
   255  
   256  		// Check offset 2
   257  		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
   258  			// We have at least 4 byte match.
   259  			// No need to check backwards. We come straight from a match
   260  			l := 4 + e.matchlen(s+4, o2+4, src)
   261  
   262  			// Store this, since we have it.
   263  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   264  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   265  			seq.matchLen = uint32(l) - zstdMinMatch
   266  			seq.litLen = 0
   267  			// Since litlen is always 0, this is offset 1.
   268  			seq.offset = 1
   269  			s += l
   270  			nextEmit = s
   271  			if debugSequences {
   272  				println("sequence", seq, "next s:", s)
   273  			}
   274  			blk.sequences = append(blk.sequences, seq)
   275  
   276  			// Swap offset 1 and 2.
   277  			offset1, offset2 = offset2, offset1
   278  			if s >= sLimit {
   279  				break encodeLoop
   280  			}
   281  			// Prepare next loop.
   282  			cv = load6432(src, s)
   283  		}
   284  	}
   285  
   286  	if int(nextEmit) < len(src) {
   287  		blk.literals = append(blk.literals, src[nextEmit:]...)
   288  		blk.extraLits = len(src) - int(nextEmit)
   289  	}
   290  	blk.recentOffsets[0] = uint32(offset1)
   291  	blk.recentOffsets[1] = uint32(offset2)
   292  	if debugEncoder {
   293  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   294  	}
   295  }
   296  
   297  // EncodeNoHist will encode a block with no history and no following blocks.
   298  // Most notable difference is that src will not be copied for history and
   299  // we do not need to check for max match length.
   300  func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
   301  	const (
   302  		inputMargin            = 8
   303  		minNonLiteralBlockSize = 1 + 1 + inputMargin
   304  	)
   305  	if debugEncoder {
   306  		if len(src) > maxCompressedBlockSize {
   307  			panic("src too big")
   308  		}
   309  	}
   310  
   311  	// Protect against e.cur wraparound.
   312  	if e.cur >= e.bufferReset {
   313  		for i := range e.table[:] {
   314  			e.table[i] = tableEntry{}
   315  		}
   316  		e.cur = e.maxMatchOff
   317  	}
   318  
   319  	s := int32(0)
   320  	blk.size = len(src)
   321  	if len(src) < minNonLiteralBlockSize {
   322  		blk.extraLits = len(src)
   323  		blk.literals = blk.literals[:len(src)]
   324  		copy(blk.literals, src)
   325  		return
   326  	}
   327  
   328  	sLimit := int32(len(src)) - inputMargin
   329  	// stepSize is the number of bytes to skip on every main loop iteration.
   330  	// It should be >= 2.
   331  	const stepSize = 2
   332  
   333  	// TEMPLATE
   334  	const hashLog = tableBits
   335  	// seems global, but would be nice to tweak.
   336  	const kSearchStrength = 6
   337  
   338  	// nextEmit is where in src the next emitLiteral should start from.
   339  	nextEmit := s
   340  	cv := load6432(src, s)
   341  
   342  	// Relative offsets
   343  	offset1 := int32(blk.recentOffsets[0])
   344  	offset2 := int32(blk.recentOffsets[1])
   345  
   346  	addLiterals := func(s *seq, until int32) {
   347  		if until == nextEmit {
   348  			return
   349  		}
   350  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   351  		s.litLen = uint32(until - nextEmit)
   352  	}
   353  	if debugEncoder {
   354  		println("recent offsets:", blk.recentOffsets)
   355  	}
   356  
   357  encodeLoop:
   358  	for {
   359  		// t will contain the match offset when we find one.
   360  		// When existing the search loop, we have already checked 4 bytes.
   361  		var t int32
   362  
   363  		// We will not use repeat offsets across blocks.
   364  		// By not using them for the first 3 matches
   365  
   366  		for {
   367  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   368  			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
   369  			candidate := e.table[nextHash]
   370  			candidate2 := e.table[nextHash2]
   371  			repIndex := s - offset1 + 2
   372  
   373  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   374  			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
   375  
   376  			if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
   377  				// Consider history as well.
   378  				var seq seq
   379  				length := 4 + e.matchlen(s+6, repIndex+4, src)
   380  
   381  				seq.matchLen = uint32(length - zstdMinMatch)
   382  
   383  				// We might be able to match backwards.
   384  				// Extend as long as we can.
   385  				start := s + 2
   386  				// We end the search early, so we don't risk 0 literals
   387  				// and have to do special offset treatment.
   388  				startLimit := nextEmit + 1
   389  
   390  				sMin := s - e.maxMatchOff
   391  				if sMin < 0 {
   392  					sMin = 0
   393  				}
   394  				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
   395  					repIndex--
   396  					start--
   397  					seq.matchLen++
   398  				}
   399  				addLiterals(&seq, start)
   400  
   401  				// rep 0
   402  				seq.offset = 1
   403  				if debugSequences {
   404  					println("repeat sequence", seq, "next s:", s)
   405  				}
   406  				blk.sequences = append(blk.sequences, seq)
   407  				s += length + 2
   408  				nextEmit = s
   409  				if s >= sLimit {
   410  					if debugEncoder {
   411  						println("repeat ended", s, length)
   412  
   413  					}
   414  					break encodeLoop
   415  				}
   416  				cv = load6432(src, s)
   417  				continue
   418  			}
   419  			coffset0 := s - (candidate.offset - e.cur)
   420  			coffset1 := s - (candidate2.offset - e.cur) + 1
   421  			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
   422  				// found a regular match
   423  				t = candidate.offset - e.cur
   424  				if debugAsserts && s <= t {
   425  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   426  				}
   427  				if debugAsserts && s-t > e.maxMatchOff {
   428  					panic("s - t >e.maxMatchOff")
   429  				}
   430  				if debugAsserts && t < 0 {
   431  					panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
   432  				}
   433  				break
   434  			}
   435  
   436  			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
   437  				// found a regular match
   438  				t = candidate2.offset - e.cur
   439  				s++
   440  				if debugAsserts && s <= t {
   441  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   442  				}
   443  				if debugAsserts && s-t > e.maxMatchOff {
   444  					panic("s - t >e.maxMatchOff")
   445  				}
   446  				if debugAsserts && t < 0 {
   447  					panic("t<0")
   448  				}
   449  				break
   450  			}
   451  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   452  			if s >= sLimit {
   453  				break encodeLoop
   454  			}
   455  			cv = load6432(src, s)
   456  		}
   457  		// A 4-byte match has been found. We'll later see if more than 4 bytes.
   458  		offset2 = offset1
   459  		offset1 = s - t
   460  
   461  		if debugAsserts && s <= t {
   462  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   463  		}
   464  
   465  		if debugAsserts && t < 0 {
   466  			panic(fmt.Sprintf("t (%d) < 0 ", t))
   467  		}
   468  		// Extend the 4-byte match as long as possible.
   469  		l := e.matchlen(s+4, t+4, src) + 4
   470  
   471  		// Extend backwards
   472  		tMin := s - e.maxMatchOff
   473  		if tMin < 0 {
   474  			tMin = 0
   475  		}
   476  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
   477  			s--
   478  			t--
   479  			l++
   480  		}
   481  
   482  		// Write our sequence.
   483  		var seq seq
   484  		seq.litLen = uint32(s - nextEmit)
   485  		seq.matchLen = uint32(l - zstdMinMatch)
   486  		if seq.litLen > 0 {
   487  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   488  		}
   489  		// Don't use repeat offsets
   490  		seq.offset = uint32(s-t) + 3
   491  		s += l
   492  		if debugSequences {
   493  			println("sequence", seq, "next s:", s)
   494  		}
   495  		blk.sequences = append(blk.sequences, seq)
   496  		nextEmit = s
   497  		if s >= sLimit {
   498  			break encodeLoop
   499  		}
   500  		cv = load6432(src, s)
   501  
   502  		// Check offset 2
   503  		if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
   504  			// We have at least 4 byte match.
   505  			// No need to check backwards. We come straight from a match
   506  			l := 4 + e.matchlen(s+4, o2+4, src)
   507  
   508  			// Store this, since we have it.
   509  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   510  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   511  			seq.matchLen = uint32(l) - zstdMinMatch
   512  			seq.litLen = 0
   513  			// Since litlen is always 0, this is offset 1.
   514  			seq.offset = 1
   515  			s += l
   516  			nextEmit = s
   517  			if debugSequences {
   518  				println("sequence", seq, "next s:", s)
   519  			}
   520  			blk.sequences = append(blk.sequences, seq)
   521  
   522  			// Swap offset 1 and 2.
   523  			offset1, offset2 = offset2, offset1
   524  			if s >= sLimit {
   525  				break encodeLoop
   526  			}
   527  			// Prepare next loop.
   528  			cv = load6432(src, s)
   529  		}
   530  	}
   531  
   532  	if int(nextEmit) < len(src) {
   533  		blk.literals = append(blk.literals, src[nextEmit:]...)
   534  		blk.extraLits = len(src) - int(nextEmit)
   535  	}
   536  	if debugEncoder {
   537  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   538  	}
   539  	// We do not store history, so we must offset e.cur to avoid false matches for next user.
   540  	if e.cur < e.bufferReset {
   541  		e.cur += int32(len(src))
   542  	}
   543  }
   544  
   545  // Encode will encode the content, with a dictionary if initialized for it.
   546  func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
   547  	const (
   548  		inputMargin            = 8
   549  		minNonLiteralBlockSize = 1 + 1 + inputMargin
   550  	)
   551  	if e.allDirty || len(src) > 32<<10 {
   552  		e.fastEncoder.Encode(blk, src)
   553  		e.allDirty = true
   554  		return
   555  	}
   556  	// Protect against e.cur wraparound.
   557  	for e.cur >= e.bufferReset-int32(len(e.hist)) {
   558  		if len(e.hist) == 0 {
   559  			e.table = [tableSize]tableEntry{}
   560  			e.cur = e.maxMatchOff
   561  			break
   562  		}
   563  		// Shift down everything in the table that isn't already too far away.
   564  		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
   565  		for i := range e.table[:] {
   566  			v := e.table[i].offset
   567  			if v < minOff {
   568  				v = 0
   569  			} else {
   570  				v = v - e.cur + e.maxMatchOff
   571  			}
   572  			e.table[i].offset = v
   573  		}
   574  		e.cur = e.maxMatchOff
   575  		break
   576  	}
   577  
   578  	s := e.addBlock(src)
   579  	blk.size = len(src)
   580  	if len(src) < minNonLiteralBlockSize {
   581  		blk.extraLits = len(src)
   582  		blk.literals = blk.literals[:len(src)]
   583  		copy(blk.literals, src)
   584  		return
   585  	}
   586  
   587  	// Override src
   588  	src = e.hist
   589  	sLimit := int32(len(src)) - inputMargin
   590  	// stepSize is the number of bytes to skip on every main loop iteration.
   591  	// It should be >= 2.
   592  	const stepSize = 2
   593  
   594  	// TEMPLATE
   595  	const hashLog = tableBits
   596  	// seems global, but would be nice to tweak.
   597  	const kSearchStrength = 7
   598  
   599  	// nextEmit is where in src the next emitLiteral should start from.
   600  	nextEmit := s
   601  	cv := load6432(src, s)
   602  
   603  	// Relative offsets
   604  	offset1 := int32(blk.recentOffsets[0])
   605  	offset2 := int32(blk.recentOffsets[1])
   606  
   607  	addLiterals := func(s *seq, until int32) {
   608  		if until == nextEmit {
   609  			return
   610  		}
   611  		blk.literals = append(blk.literals, src[nextEmit:until]...)
   612  		s.litLen = uint32(until - nextEmit)
   613  	}
   614  	if debugEncoder {
   615  		println("recent offsets:", blk.recentOffsets)
   616  	}
   617  
   618  encodeLoop:
   619  	for {
   620  		// t will contain the match offset when we find one.
   621  		// When existing the search loop, we have already checked 4 bytes.
   622  		var t int32
   623  
   624  		// We will not use repeat offsets across blocks.
   625  		// By not using them for the first 3 matches
   626  		canRepeat := len(blk.sequences) > 2
   627  
   628  		for {
   629  			if debugAsserts && canRepeat && offset1 == 0 {
   630  				panic("offset0 was 0")
   631  			}
   632  
   633  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   634  			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
   635  			candidate := e.table[nextHash]
   636  			candidate2 := e.table[nextHash2]
   637  			repIndex := s - offset1 + 2
   638  
   639  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   640  			e.markShardDirty(nextHash)
   641  			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
   642  			e.markShardDirty(nextHash2)
   643  
   644  			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
   645  				// Consider history as well.
   646  				var seq seq
   647  				length := 4 + e.matchlen(s+6, repIndex+4, src)
   648  
   649  				seq.matchLen = uint32(length - zstdMinMatch)
   650  
   651  				// We might be able to match backwards.
   652  				// Extend as long as we can.
   653  				start := s + 2
   654  				// We end the search early, so we don't risk 0 literals
   655  				// and have to do special offset treatment.
   656  				startLimit := nextEmit + 1
   657  
   658  				sMin := s - e.maxMatchOff
   659  				if sMin < 0 {
   660  					sMin = 0
   661  				}
   662  				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
   663  					repIndex--
   664  					start--
   665  					seq.matchLen++
   666  				}
   667  				addLiterals(&seq, start)
   668  
   669  				// rep 0
   670  				seq.offset = 1
   671  				if debugSequences {
   672  					println("repeat sequence", seq, "next s:", s)
   673  				}
   674  				blk.sequences = append(blk.sequences, seq)
   675  				s += length + 2
   676  				nextEmit = s
   677  				if s >= sLimit {
   678  					if debugEncoder {
   679  						println("repeat ended", s, length)
   680  
   681  					}
   682  					break encodeLoop
   683  				}
   684  				cv = load6432(src, s)
   685  				continue
   686  			}
   687  			coffset0 := s - (candidate.offset - e.cur)
   688  			coffset1 := s - (candidate2.offset - e.cur) + 1
   689  			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
   690  				// found a regular match
   691  				t = candidate.offset - e.cur
   692  				if debugAsserts && s <= t {
   693  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   694  				}
   695  				if debugAsserts && s-t > e.maxMatchOff {
   696  					panic("s - t >e.maxMatchOff")
   697  				}
   698  				break
   699  			}
   700  
   701  			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
   702  				// found a regular match
   703  				t = candidate2.offset - e.cur
   704  				s++
   705  				if debugAsserts && s <= t {
   706  					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   707  				}
   708  				if debugAsserts && s-t > e.maxMatchOff {
   709  					panic("s - t >e.maxMatchOff")
   710  				}
   711  				if debugAsserts && t < 0 {
   712  					panic("t<0")
   713  				}
   714  				break
   715  			}
   716  			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
   717  			if s >= sLimit {
   718  				break encodeLoop
   719  			}
   720  			cv = load6432(src, s)
   721  		}
   722  		// A 4-byte match has been found. We'll later see if more than 4 bytes.
   723  		offset2 = offset1
   724  		offset1 = s - t
   725  
   726  		if debugAsserts && s <= t {
   727  			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
   728  		}
   729  
   730  		if debugAsserts && canRepeat && int(offset1) > len(src) {
   731  			panic("invalid offset")
   732  		}
   733  
   734  		// Extend the 4-byte match as long as possible.
   735  		l := e.matchlen(s+4, t+4, src) + 4
   736  
   737  		// Extend backwards
   738  		tMin := s - e.maxMatchOff
   739  		if tMin < 0 {
   740  			tMin = 0
   741  		}
   742  		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
   743  			s--
   744  			t--
   745  			l++
   746  		}
   747  
   748  		// Write our sequence.
   749  		var seq seq
   750  		seq.litLen = uint32(s - nextEmit)
   751  		seq.matchLen = uint32(l - zstdMinMatch)
   752  		if seq.litLen > 0 {
   753  			blk.literals = append(blk.literals, src[nextEmit:s]...)
   754  		}
   755  		// Don't use repeat offsets
   756  		seq.offset = uint32(s-t) + 3
   757  		s += l
   758  		if debugSequences {
   759  			println("sequence", seq, "next s:", s)
   760  		}
   761  		blk.sequences = append(blk.sequences, seq)
   762  		nextEmit = s
   763  		if s >= sLimit {
   764  			break encodeLoop
   765  		}
   766  		cv = load6432(src, s)
   767  
   768  		// Check offset 2
   769  		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
   770  			// We have at least 4 byte match.
   771  			// No need to check backwards. We come straight from a match
   772  			l := 4 + e.matchlen(s+4, o2+4, src)
   773  
   774  			// Store this, since we have it.
   775  			nextHash := hashLen(cv, hashLog, tableFastHashLen)
   776  			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
   777  			e.markShardDirty(nextHash)
   778  			seq.matchLen = uint32(l) - zstdMinMatch
   779  			seq.litLen = 0
   780  			// Since litlen is always 0, this is offset 1.
   781  			seq.offset = 1
   782  			s += l
   783  			nextEmit = s
   784  			if debugSequences {
   785  				println("sequence", seq, "next s:", s)
   786  			}
   787  			blk.sequences = append(blk.sequences, seq)
   788  
   789  			// Swap offset 1 and 2.
   790  			offset1, offset2 = offset2, offset1
   791  			if s >= sLimit {
   792  				break encodeLoop
   793  			}
   794  			// Prepare next loop.
   795  			cv = load6432(src, s)
   796  		}
   797  	}
   798  
   799  	if int(nextEmit) < len(src) {
   800  		blk.literals = append(blk.literals, src[nextEmit:]...)
   801  		blk.extraLits = len(src) - int(nextEmit)
   802  	}
   803  	blk.recentOffsets[0] = uint32(offset1)
   804  	blk.recentOffsets[1] = uint32(offset2)
   805  	if debugEncoder {
   806  		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
   807  	}
   808  }
   809  
   810  // ResetDict will reset and set a dictionary if not nil
   811  func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
   812  	e.resetBase(d, singleBlock)
   813  	if d != nil {
   814  		panic("fastEncoder: Reset with dict")
   815  	}
   816  }
   817  
   818  // ResetDict will reset and set a dictionary if not nil
   819  func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
   820  	e.resetBase(d, singleBlock)
   821  	if d == nil {
   822  		return
   823  	}
   824  
   825  	// Init or copy dict table
   826  	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
   827  		if len(e.dictTable) != len(e.table) {
   828  			e.dictTable = make([]tableEntry, len(e.table))
   829  		}
   830  		if true {
   831  			end := e.maxMatchOff + int32(len(d.content)) - 8
   832  			for i := e.maxMatchOff; i < end; i += 2 {
   833  				const hashLog = tableBits
   834  
   835  				cv := load6432(d.content, i-e.maxMatchOff)
   836  				nextHash := hashLen(cv, hashLog, tableFastHashLen)     // 0 -> 6
   837  				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
   838  				e.dictTable[nextHash] = tableEntry{
   839  					val:    uint32(cv),
   840  					offset: i,
   841  				}
   842  				e.dictTable[nextHash1] = tableEntry{
   843  					val:    uint32(cv >> 8),
   844  					offset: i + 1,
   845  				}
   846  			}
   847  		}
   848  		e.lastDictID = d.id
   849  		e.allDirty = true
   850  	}
   851  
   852  	e.cur = e.maxMatchOff
   853  	dirtyShardCnt := 0
   854  	if !e.allDirty {
   855  		for i := range e.tableShardDirty {
   856  			if e.tableShardDirty[i] {
   857  				dirtyShardCnt++
   858  			}
   859  		}
   860  	}
   861  
   862  	const shardCnt = tableShardCnt
   863  	const shardSize = tableShardSize
   864  	if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
   865  		//copy(e.table[:], e.dictTable)
   866  		e.table = *(*[tableSize]tableEntry)(e.dictTable)
   867  		for i := range e.tableShardDirty {
   868  			e.tableShardDirty[i] = false
   869  		}
   870  		e.allDirty = false
   871  		return
   872  	}
   873  	for i := range e.tableShardDirty {
   874  		if !e.tableShardDirty[i] {
   875  			continue
   876  		}
   877  
   878  		//copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
   879  		*(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
   880  		e.tableShardDirty[i] = false
   881  	}
   882  	e.allDirty = false
   883  }
   884  
   885  func (e *fastEncoderDict) markAllShardsDirty() {
   886  	e.allDirty = true
   887  }
   888  
   889  func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
   890  	e.tableShardDirty[entryNum/tableShardSize] = true
   891  }
   892  

View as plain text