...

Source file src/github.com/klauspost/compress/s2/encode_all.go

Documentation: github.com/klauspost/compress/s2

     1  // Copyright 2016 The Snappy-Go Authors. All rights reserved.
     2  // Copyright (c) 2019 Klaus Post. All rights reserved.
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package s2
     7  
     8  import (
     9  	"bytes"
    10  	"encoding/binary"
    11  	"fmt"
    12  	"math/bits"
    13  )
    14  
    15  func load32(b []byte, i int) uint32 {
    16  	return binary.LittleEndian.Uint32(b[i:])
    17  }
    18  
    19  func load64(b []byte, i int) uint64 {
    20  	return binary.LittleEndian.Uint64(b[i:])
    21  }
    22  
    23  // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
    24  // Preferably h should be a constant and should always be <64.
    25  func hash6(u uint64, h uint8) uint32 {
    26  	const prime6bytes = 227718039650203
    27  	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
    28  }
    29  
    30  func encodeGo(dst, src []byte) []byte {
    31  	if n := MaxEncodedLen(len(src)); n < 0 {
    32  		panic(ErrTooLarge)
    33  	} else if len(dst) < n {
    34  		dst = make([]byte, n)
    35  	}
    36  
    37  	// The block starts with the varint-encoded length of the decompressed bytes.
    38  	d := binary.PutUvarint(dst, uint64(len(src)))
    39  
    40  	if len(src) == 0 {
    41  		return dst[:d]
    42  	}
    43  	if len(src) < minNonLiteralBlockSize {
    44  		d += emitLiteral(dst[d:], src)
    45  		return dst[:d]
    46  	}
    47  	n := encodeBlockGo(dst[d:], src)
    48  	if n > 0 {
    49  		d += n
    50  		return dst[:d]
    51  	}
    52  	// Not compressible
    53  	d += emitLiteral(dst[d:], src)
    54  	return dst[:d]
    55  }
    56  
    57  // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
    58  // assumes that the varint-encoded length of the decompressed bytes has already
    59  // been written.
    60  //
    61  // It also assumes that:
    62  //
    63  //	len(dst) >= MaxEncodedLen(len(src)) &&
    64  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
    65  func encodeBlockGo(dst, src []byte) (d int) {
    66  	// Initialize the hash table.
    67  	const (
    68  		tableBits    = 14
    69  		maxTableSize = 1 << tableBits
    70  
    71  		debug = false
    72  	)
    73  
    74  	var table [maxTableSize]uint32
    75  
    76  	// sLimit is when to stop looking for offset/length copies. The inputMargin
    77  	// lets us use a fast path for emitLiteral in the main loop, while we are
    78  	// looking for copies.
    79  	sLimit := len(src) - inputMargin
    80  
    81  	// Bail if we can't compress to at least this.
    82  	dstLimit := len(src) - len(src)>>5 - 5
    83  
    84  	// nextEmit is where in src the next emitLiteral should start from.
    85  	nextEmit := 0
    86  
    87  	// The encoded form must start with a literal, as there are no previous
    88  	// bytes to copy, so we start looking for hash matches at s == 1.
    89  	s := 1
    90  	cv := load64(src, s)
    91  
    92  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
    93  	repeat := 1
    94  
    95  	for {
    96  		candidate := 0
    97  		for {
    98  			// Next src position to check
    99  			nextS := s + (s-nextEmit)>>6 + 4
   100  			if nextS > sLimit {
   101  				goto emitRemainder
   102  			}
   103  			hash0 := hash6(cv, tableBits)
   104  			hash1 := hash6(cv>>8, tableBits)
   105  			candidate = int(table[hash0])
   106  			candidate2 := int(table[hash1])
   107  			table[hash0] = uint32(s)
   108  			table[hash1] = uint32(s + 1)
   109  			hash2 := hash6(cv>>16, tableBits)
   110  
   111  			// Check repeat at offset checkRep.
   112  			const checkRep = 1
   113  			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
   114  				base := s + checkRep
   115  				// Extend back
   116  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
   117  					i--
   118  					base--
   119  				}
   120  
   121  				// Bail if we exceed the maximum size.
   122  				if d+(base-nextEmit) > dstLimit {
   123  					return 0
   124  				}
   125  
   126  				d += emitLiteral(dst[d:], src[nextEmit:base])
   127  
   128  				// Extend forward
   129  				candidate := s - repeat + 4 + checkRep
   130  				s += 4 + checkRep
   131  				for s <= sLimit {
   132  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   133  						s += bits.TrailingZeros64(diff) >> 3
   134  						break
   135  					}
   136  					s += 8
   137  					candidate += 8
   138  				}
   139  				if debug {
   140  					// Validate match.
   141  					if s <= candidate {
   142  						panic("s <= candidate")
   143  					}
   144  					a := src[base:s]
   145  					b := src[base-repeat : base-repeat+(s-base)]
   146  					if !bytes.Equal(a, b) {
   147  						panic("mismatch")
   148  					}
   149  				}
   150  				if nextEmit > 0 {
   151  					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
   152  					d += emitRepeat(dst[d:], repeat, s-base)
   153  				} else {
   154  					// First match, cannot be repeat.
   155  					d += emitCopy(dst[d:], repeat, s-base)
   156  				}
   157  				nextEmit = s
   158  				if s >= sLimit {
   159  					goto emitRemainder
   160  				}
   161  				cv = load64(src, s)
   162  				continue
   163  			}
   164  
   165  			if uint32(cv) == load32(src, candidate) {
   166  				break
   167  			}
   168  			candidate = int(table[hash2])
   169  			if uint32(cv>>8) == load32(src, candidate2) {
   170  				table[hash2] = uint32(s + 2)
   171  				candidate = candidate2
   172  				s++
   173  				break
   174  			}
   175  			table[hash2] = uint32(s + 2)
   176  			if uint32(cv>>16) == load32(src, candidate) {
   177  				s += 2
   178  				break
   179  			}
   180  
   181  			cv = load64(src, nextS)
   182  			s = nextS
   183  		}
   184  
   185  		// Extend backwards.
   186  		// The top bytes will be rechecked to get the full match.
   187  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
   188  			candidate--
   189  			s--
   190  		}
   191  
   192  		// Bail if we exceed the maximum size.
   193  		if d+(s-nextEmit) > dstLimit {
   194  			return 0
   195  		}
   196  
   197  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   198  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   199  		// them as literal bytes.
   200  
   201  		d += emitLiteral(dst[d:], src[nextEmit:s])
   202  
   203  		// Call emitCopy, and then see if another emitCopy could be our next
   204  		// move. Repeat until we find no match for the input immediately after
   205  		// what was consumed by the last emitCopy call.
   206  		//
   207  		// If we exit this loop normally then we need to call emitLiteral next,
   208  		// though we don't yet know how big the literal will be. We handle that
   209  		// by proceeding to the next iteration of the main loop. We also can
   210  		// exit this loop via goto if we get close to exhausting the input.
   211  		for {
   212  			// Invariant: we have a 4-byte match at s, and no need to emit any
   213  			// literal bytes prior to s.
   214  			base := s
   215  			repeat = base - candidate
   216  
   217  			// Extend the 4-byte match as long as possible.
   218  			s += 4
   219  			candidate += 4
   220  			for s <= len(src)-8 {
   221  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   222  					s += bits.TrailingZeros64(diff) >> 3
   223  					break
   224  				}
   225  				s += 8
   226  				candidate += 8
   227  			}
   228  
   229  			d += emitCopy(dst[d:], repeat, s-base)
   230  			if debug {
   231  				// Validate match.
   232  				if s <= candidate {
   233  					panic("s <= candidate")
   234  				}
   235  				a := src[base:s]
   236  				b := src[base-repeat : base-repeat+(s-base)]
   237  				if !bytes.Equal(a, b) {
   238  					panic("mismatch")
   239  				}
   240  			}
   241  
   242  			nextEmit = s
   243  			if s >= sLimit {
   244  				goto emitRemainder
   245  			}
   246  
   247  			if d > dstLimit {
   248  				// Do we have space for more, if not bail.
   249  				return 0
   250  			}
   251  			// Check for an immediate match, otherwise start search at s+1
   252  			x := load64(src, s-2)
   253  			m2Hash := hash6(x, tableBits)
   254  			currHash := hash6(x>>16, tableBits)
   255  			candidate = int(table[currHash])
   256  			table[m2Hash] = uint32(s - 2)
   257  			table[currHash] = uint32(s)
   258  			if debug && s == candidate {
   259  				panic("s == candidate")
   260  			}
   261  			if uint32(x>>16) != load32(src, candidate) {
   262  				cv = load64(src, s+1)
   263  				s++
   264  				break
   265  			}
   266  		}
   267  	}
   268  
   269  emitRemainder:
   270  	if nextEmit < len(src) {
   271  		// Bail if we exceed the maximum size.
   272  		if d+len(src)-nextEmit > dstLimit {
   273  			return 0
   274  		}
   275  		d += emitLiteral(dst[d:], src[nextEmit:])
   276  	}
   277  	return d
   278  }
   279  
   280  func encodeBlockSnappyGo(dst, src []byte) (d int) {
   281  	// Initialize the hash table.
   282  	const (
   283  		tableBits    = 14
   284  		maxTableSize = 1 << tableBits
   285  	)
   286  
   287  	var table [maxTableSize]uint32
   288  
   289  	// sLimit is when to stop looking for offset/length copies. The inputMargin
   290  	// lets us use a fast path for emitLiteral in the main loop, while we are
   291  	// looking for copies.
   292  	sLimit := len(src) - inputMargin
   293  
   294  	// Bail if we can't compress to at least this.
   295  	dstLimit := len(src) - len(src)>>5 - 5
   296  
   297  	// nextEmit is where in src the next emitLiteral should start from.
   298  	nextEmit := 0
   299  
   300  	// The encoded form must start with a literal, as there are no previous
   301  	// bytes to copy, so we start looking for hash matches at s == 1.
   302  	s := 1
   303  	cv := load64(src, s)
   304  
   305  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
   306  	repeat := 1
   307  
   308  	for {
   309  		candidate := 0
   310  		for {
   311  			// Next src position to check
   312  			nextS := s + (s-nextEmit)>>6 + 4
   313  			if nextS > sLimit {
   314  				goto emitRemainder
   315  			}
   316  			hash0 := hash6(cv, tableBits)
   317  			hash1 := hash6(cv>>8, tableBits)
   318  			candidate = int(table[hash0])
   319  			candidate2 := int(table[hash1])
   320  			table[hash0] = uint32(s)
   321  			table[hash1] = uint32(s + 1)
   322  			hash2 := hash6(cv>>16, tableBits)
   323  
   324  			// Check repeat at offset checkRep.
   325  			const checkRep = 1
   326  			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
   327  				base := s + checkRep
   328  				// Extend back
   329  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
   330  					i--
   331  					base--
   332  				}
   333  				// Bail if we exceed the maximum size.
   334  				if d+(base-nextEmit) > dstLimit {
   335  					return 0
   336  				}
   337  
   338  				d += emitLiteral(dst[d:], src[nextEmit:base])
   339  
   340  				// Extend forward
   341  				candidate := s - repeat + 4 + checkRep
   342  				s += 4 + checkRep
   343  				for s <= sLimit {
   344  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   345  						s += bits.TrailingZeros64(diff) >> 3
   346  						break
   347  					}
   348  					s += 8
   349  					candidate += 8
   350  				}
   351  
   352  				d += emitCopyNoRepeat(dst[d:], repeat, s-base)
   353  				nextEmit = s
   354  				if s >= sLimit {
   355  					goto emitRemainder
   356  				}
   357  
   358  				cv = load64(src, s)
   359  				continue
   360  			}
   361  
   362  			if uint32(cv) == load32(src, candidate) {
   363  				break
   364  			}
   365  			candidate = int(table[hash2])
   366  			if uint32(cv>>8) == load32(src, candidate2) {
   367  				table[hash2] = uint32(s + 2)
   368  				candidate = candidate2
   369  				s++
   370  				break
   371  			}
   372  			table[hash2] = uint32(s + 2)
   373  			if uint32(cv>>16) == load32(src, candidate) {
   374  				s += 2
   375  				break
   376  			}
   377  
   378  			cv = load64(src, nextS)
   379  			s = nextS
   380  		}
   381  
   382  		// Extend backwards
   383  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
   384  			candidate--
   385  			s--
   386  		}
   387  
   388  		// Bail if we exceed the maximum size.
   389  		if d+(s-nextEmit) > dstLimit {
   390  			return 0
   391  		}
   392  
   393  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   394  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   395  		// them as literal bytes.
   396  
   397  		d += emitLiteral(dst[d:], src[nextEmit:s])
   398  
   399  		// Call emitCopy, and then see if another emitCopy could be our next
   400  		// move. Repeat until we find no match for the input immediately after
   401  		// what was consumed by the last emitCopy call.
   402  		//
   403  		// If we exit this loop normally then we need to call emitLiteral next,
   404  		// though we don't yet know how big the literal will be. We handle that
   405  		// by proceeding to the next iteration of the main loop. We also can
   406  		// exit this loop via goto if we get close to exhausting the input.
   407  		for {
   408  			// Invariant: we have a 4-byte match at s, and no need to emit any
   409  			// literal bytes prior to s.
   410  			base := s
   411  			repeat = base - candidate
   412  
   413  			// Extend the 4-byte match as long as possible.
   414  			s += 4
   415  			candidate += 4
   416  			for s <= len(src)-8 {
   417  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   418  					s += bits.TrailingZeros64(diff) >> 3
   419  					break
   420  				}
   421  				s += 8
   422  				candidate += 8
   423  			}
   424  
   425  			d += emitCopyNoRepeat(dst[d:], repeat, s-base)
   426  			if false {
   427  				// Validate match.
   428  				a := src[base:s]
   429  				b := src[base-repeat : base-repeat+(s-base)]
   430  				if !bytes.Equal(a, b) {
   431  					panic("mismatch")
   432  				}
   433  			}
   434  
   435  			nextEmit = s
   436  			if s >= sLimit {
   437  				goto emitRemainder
   438  			}
   439  
   440  			if d > dstLimit {
   441  				// Do we have space for more, if not bail.
   442  				return 0
   443  			}
   444  			// Check for an immediate match, otherwise start search at s+1
   445  			x := load64(src, s-2)
   446  			m2Hash := hash6(x, tableBits)
   447  			currHash := hash6(x>>16, tableBits)
   448  			candidate = int(table[currHash])
   449  			table[m2Hash] = uint32(s - 2)
   450  			table[currHash] = uint32(s)
   451  			if uint32(x>>16) != load32(src, candidate) {
   452  				cv = load64(src, s+1)
   453  				s++
   454  				break
   455  			}
   456  		}
   457  	}
   458  
   459  emitRemainder:
   460  	if nextEmit < len(src) {
   461  		// Bail if we exceed the maximum size.
   462  		if d+len(src)-nextEmit > dstLimit {
   463  			return 0
   464  		}
   465  		d += emitLiteral(dst[d:], src[nextEmit:])
   466  	}
   467  	return d
   468  }
   469  
   470  // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
   471  // assumes that the varint-encoded length of the decompressed bytes has already
   472  // been written.
   473  //
   474  // It also assumes that:
   475  //
   476  //	len(dst) >= MaxEncodedLen(len(src)) &&
   477  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
   478  func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
   479  	// Initialize the hash table.
   480  	const (
   481  		tableBits    = 14
   482  		maxTableSize = 1 << tableBits
   483  		maxAhead     = 8 // maximum bytes ahead without checking sLimit
   484  
   485  		debug = false
   486  	)
   487  	dict.initFast()
   488  
   489  	var table [maxTableSize]uint32
   490  
   491  	// sLimit is when to stop looking for offset/length copies. The inputMargin
   492  	// lets us use a fast path for emitLiteral in the main loop, while we are
   493  	// looking for copies.
   494  	sLimit := len(src) - inputMargin
   495  	if sLimit > MaxDictSrcOffset-maxAhead {
   496  		sLimit = MaxDictSrcOffset - maxAhead
   497  	}
   498  
   499  	// Bail if we can't compress to at least this.
   500  	dstLimit := len(src) - len(src)>>5 - 5
   501  
   502  	// nextEmit is where in src the next emitLiteral should start from.
   503  	nextEmit := 0
   504  
   505  	// The encoded form can start with a dict entry (copy or repeat).
   506  	s := 0
   507  
   508  	// Convert dict repeat to offset
   509  	repeat := len(dict.dict) - dict.repeat
   510  	cv := load64(src, 0)
   511  
   512  	// While in dict
   513  searchDict:
   514  	for {
   515  		// Next src position to check
   516  		nextS := s + (s-nextEmit)>>6 + 4
   517  		hash0 := hash6(cv, tableBits)
   518  		hash1 := hash6(cv>>8, tableBits)
   519  		if nextS > sLimit {
   520  			if debug {
   521  				fmt.Println("slimit reached", s, nextS)
   522  			}
   523  			break searchDict
   524  		}
   525  		candidateDict := int(dict.fastTable[hash0])
   526  		candidateDict2 := int(dict.fastTable[hash1])
   527  		candidate2 := int(table[hash1])
   528  		candidate := int(table[hash0])
   529  		table[hash0] = uint32(s)
   530  		table[hash1] = uint32(s + 1)
   531  		hash2 := hash6(cv>>16, tableBits)
   532  
   533  		// Check repeat at offset checkRep.
   534  		const checkRep = 1
   535  
   536  		if repeat > s {
   537  			candidate := len(dict.dict) - repeat + s
   538  			if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
   539  				// Extend back
   540  				base := s
   541  				for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
   542  					i--
   543  					base--
   544  				}
   545  				// Bail if we exceed the maximum size.
   546  				if d+(base-nextEmit) > dstLimit {
   547  					return 0
   548  				}
   549  
   550  				d += emitLiteral(dst[d:], src[nextEmit:base])
   551  				if debug && nextEmit != base {
   552  					fmt.Println("emitted ", base-nextEmit, "literals")
   553  				}
   554  				s += 4
   555  				candidate += 4
   556  				for candidate < len(dict.dict)-8 && s <= len(src)-8 {
   557  					if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
   558  						s += bits.TrailingZeros64(diff) >> 3
   559  						break
   560  					}
   561  					s += 8
   562  					candidate += 8
   563  				}
   564  				d += emitRepeat(dst[d:], repeat, s-base)
   565  				if debug {
   566  					fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
   567  				}
   568  				nextEmit = s
   569  				if s >= sLimit {
   570  					break searchDict
   571  				}
   572  				cv = load64(src, s)
   573  				continue
   574  			}
   575  		} else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
   576  			base := s + checkRep
   577  			// Extend back
   578  			for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
   579  				i--
   580  				base--
   581  			}
   582  			d += emitLiteral(dst[d:], src[nextEmit:base])
   583  			if debug && nextEmit != base {
   584  				fmt.Println("emitted ", base-nextEmit, "literals")
   585  			}
   586  
   587  			// Extend forward
   588  			candidate := s - repeat + 4 + checkRep
   589  			s += 4 + checkRep
   590  			for s <= sLimit {
   591  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   592  					s += bits.TrailingZeros64(diff) >> 3
   593  					break
   594  				}
   595  				s += 8
   596  				candidate += 8
   597  			}
   598  			if debug {
   599  				// Validate match.
   600  				if s <= candidate {
   601  					panic("s <= candidate")
   602  				}
   603  				a := src[base:s]
   604  				b := src[base-repeat : base-repeat+(s-base)]
   605  				if !bytes.Equal(a, b) {
   606  					panic("mismatch")
   607  				}
   608  			}
   609  
   610  			if nextEmit > 0 {
   611  				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
   612  				d += emitRepeat(dst[d:], repeat, s-base)
   613  			} else {
   614  				// First match, cannot be repeat.
   615  				d += emitCopy(dst[d:], repeat, s-base)
   616  			}
   617  
   618  			nextEmit = s
   619  			if s >= sLimit {
   620  				break searchDict
   621  			}
   622  			if debug {
   623  				fmt.Println("emitted reg repeat", s-base, "s:", s)
   624  			}
   625  			cv = load64(src, s)
   626  			continue searchDict
   627  		}
   628  		if s == 0 {
   629  			cv = load64(src, nextS)
   630  			s = nextS
   631  			continue searchDict
   632  		}
   633  		// Start with table. These matches will always be closer.
   634  		if uint32(cv) == load32(src, candidate) {
   635  			goto emitMatch
   636  		}
   637  		candidate = int(table[hash2])
   638  		if uint32(cv>>8) == load32(src, candidate2) {
   639  			table[hash2] = uint32(s + 2)
   640  			candidate = candidate2
   641  			s++
   642  			goto emitMatch
   643  		}
   644  
   645  		// Check dict. Dicts have longer offsets, so we want longer matches.
   646  		if cv == load64(dict.dict, candidateDict) {
   647  			table[hash2] = uint32(s + 2)
   648  			goto emitDict
   649  		}
   650  
   651  		candidateDict = int(dict.fastTable[hash2])
   652  		// Check if upper 7 bytes match
   653  		if candidateDict2 >= 1 {
   654  			if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
   655  				table[hash2] = uint32(s + 2)
   656  				candidateDict = candidateDict2
   657  				s++
   658  				goto emitDict
   659  			}
   660  		}
   661  
   662  		table[hash2] = uint32(s + 2)
   663  		if uint32(cv>>16) == load32(src, candidate) {
   664  			s += 2
   665  			goto emitMatch
   666  		}
   667  		if candidateDict >= 2 {
   668  			// Check if upper 6 bytes match
   669  			if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
   670  				s += 2
   671  				goto emitDict
   672  			}
   673  		}
   674  
   675  		cv = load64(src, nextS)
   676  		s = nextS
   677  		continue searchDict
   678  
   679  	emitDict:
   680  		{
   681  			if debug {
   682  				if load32(dict.dict, candidateDict) != load32(src, s) {
   683  					panic("dict emit mismatch")
   684  				}
   685  			}
   686  			// Extend backwards.
   687  			// The top bytes will be rechecked to get the full match.
   688  			for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
   689  				candidateDict--
   690  				s--
   691  			}
   692  
   693  			// Bail if we exceed the maximum size.
   694  			if d+(s-nextEmit) > dstLimit {
   695  				return 0
   696  			}
   697  
   698  			// A 4-byte match has been found. We'll later see if more than 4 bytes
   699  			// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   700  			// them as literal bytes.
   701  
   702  			d += emitLiteral(dst[d:], src[nextEmit:s])
   703  			if debug && nextEmit != s {
   704  				fmt.Println("emitted ", s-nextEmit, "literals")
   705  			}
   706  			{
   707  				// Invariant: we have a 4-byte match at s, and no need to emit any
   708  				// literal bytes prior to s.
   709  				base := s
   710  				repeat = s + (len(dict.dict)) - candidateDict
   711  
   712  				// Extend the 4-byte match as long as possible.
   713  				s += 4
   714  				candidateDict += 4
   715  				for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
   716  					if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
   717  						s += bits.TrailingZeros64(diff) >> 3
   718  						break
   719  					}
   720  					s += 8
   721  					candidateDict += 8
   722  				}
   723  
   724  				// Matches longer than 64 are split.
   725  				if s <= sLimit || s-base < 8 {
   726  					d += emitCopy(dst[d:], repeat, s-base)
   727  				} else {
   728  					// Split to ensure we don't start a copy within next block
   729  					d += emitCopy(dst[d:], repeat, 4)
   730  					d += emitRepeat(dst[d:], repeat, s-base-4)
   731  				}
   732  				if false {
   733  					// Validate match.
   734  					if s <= candidate {
   735  						panic("s <= candidate")
   736  					}
   737  					a := src[base:s]
   738  					b := dict.dict[base-repeat : base-repeat+(s-base)]
   739  					if !bytes.Equal(a, b) {
   740  						panic("mismatch")
   741  					}
   742  				}
   743  				if debug {
   744  					fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
   745  				}
   746  				nextEmit = s
   747  				if s >= sLimit {
   748  					break searchDict
   749  				}
   750  
   751  				if d > dstLimit {
   752  					// Do we have space for more, if not bail.
   753  					return 0
   754  				}
   755  
   756  				// Index and continue loop to try new candidate.
   757  				x := load64(src, s-2)
   758  				m2Hash := hash6(x, tableBits)
   759  				currHash := hash6(x>>8, tableBits)
   760  				table[m2Hash] = uint32(s - 2)
   761  				table[currHash] = uint32(s - 1)
   762  				cv = load64(src, s)
   763  			}
   764  			continue
   765  		}
   766  	emitMatch:
   767  
   768  		// Extend backwards.
   769  		// The top bytes will be rechecked to get the full match.
   770  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
   771  			candidate--
   772  			s--
   773  		}
   774  
   775  		// Bail if we exceed the maximum size.
   776  		if d+(s-nextEmit) > dstLimit {
   777  			return 0
   778  		}
   779  
   780  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   781  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   782  		// them as literal bytes.
   783  
   784  		d += emitLiteral(dst[d:], src[nextEmit:s])
   785  		if debug && nextEmit != s {
   786  			fmt.Println("emitted ", s-nextEmit, "literals")
   787  		}
   788  		// Call emitCopy, and then see if another emitCopy could be our next
   789  		// move. Repeat until we find no match for the input immediately after
   790  		// what was consumed by the last emitCopy call.
   791  		//
   792  		// If we exit this loop normally then we need to call emitLiteral next,
   793  		// though we don't yet know how big the literal will be. We handle that
   794  		// by proceeding to the next iteration of the main loop. We also can
   795  		// exit this loop via goto if we get close to exhausting the input.
   796  		for {
   797  			// Invariant: we have a 4-byte match at s, and no need to emit any
   798  			// literal bytes prior to s.
   799  			base := s
   800  			repeat = base - candidate
   801  
   802  			// Extend the 4-byte match as long as possible.
   803  			s += 4
   804  			candidate += 4
   805  			for s <= len(src)-8 {
   806  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   807  					s += bits.TrailingZeros64(diff) >> 3
   808  					break
   809  				}
   810  				s += 8
   811  				candidate += 8
   812  			}
   813  
   814  			d += emitCopy(dst[d:], repeat, s-base)
   815  			if debug {
   816  				// Validate match.
   817  				if s <= candidate {
   818  					panic("s <= candidate")
   819  				}
   820  				a := src[base:s]
   821  				b := src[base-repeat : base-repeat+(s-base)]
   822  				if !bytes.Equal(a, b) {
   823  					panic("mismatch")
   824  				}
   825  			}
   826  			if debug {
   827  				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
   828  			}
   829  			nextEmit = s
   830  			if s >= sLimit {
   831  				break searchDict
   832  			}
   833  
   834  			if d > dstLimit {
   835  				// Do we have space for more, if not bail.
   836  				return 0
   837  			}
   838  			// Check for an immediate match, otherwise start search at s+1
   839  			x := load64(src, s-2)
   840  			m2Hash := hash6(x, tableBits)
   841  			currHash := hash6(x>>16, tableBits)
   842  			candidate = int(table[currHash])
   843  			table[m2Hash] = uint32(s - 2)
   844  			table[currHash] = uint32(s)
   845  			if debug && s == candidate {
   846  				panic("s == candidate")
   847  			}
   848  			if uint32(x>>16) != load32(src, candidate) {
   849  				cv = load64(src, s+1)
   850  				s++
   851  				break
   852  			}
   853  		}
   854  	}
   855  
   856  	// Search without dict:
   857  	if repeat > s {
   858  		repeat = 0
   859  	}
   860  
   861  	// No more dict
   862  	sLimit = len(src) - inputMargin
   863  	if s >= sLimit {
   864  		goto emitRemainder
   865  	}
   866  	if debug {
   867  		fmt.Println("non-dict matching at", s, "repeat:", repeat)
   868  	}
   869  	cv = load64(src, s)
   870  	if debug {
   871  		fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
   872  	}
   873  	for {
   874  		candidate := 0
   875  		for {
   876  			// Next src position to check
   877  			nextS := s + (s-nextEmit)>>6 + 4
   878  			if nextS > sLimit {
   879  				goto emitRemainder
   880  			}
   881  			hash0 := hash6(cv, tableBits)
   882  			hash1 := hash6(cv>>8, tableBits)
   883  			candidate = int(table[hash0])
   884  			candidate2 := int(table[hash1])
   885  			table[hash0] = uint32(s)
   886  			table[hash1] = uint32(s + 1)
   887  			hash2 := hash6(cv>>16, tableBits)
   888  
   889  			// Check repeat at offset checkRep.
   890  			const checkRep = 1
   891  			if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
   892  				base := s + checkRep
   893  				// Extend back
   894  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
   895  					i--
   896  					base--
   897  				}
   898  				// Bail if we exceed the maximum size.
   899  				if d+(base-nextEmit) > dstLimit {
   900  					return 0
   901  				}
   902  
   903  				d += emitLiteral(dst[d:], src[nextEmit:base])
   904  				if debug && nextEmit != base {
   905  					fmt.Println("emitted ", base-nextEmit, "literals")
   906  				}
   907  				// Extend forward
   908  				candidate := s - repeat + 4 + checkRep
   909  				s += 4 + checkRep
   910  				for s <= sLimit {
   911  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
   912  						s += bits.TrailingZeros64(diff) >> 3
   913  						break
   914  					}
   915  					s += 8
   916  					candidate += 8
   917  				}
   918  				if debug {
   919  					// Validate match.
   920  					if s <= candidate {
   921  						panic("s <= candidate")
   922  					}
   923  					a := src[base:s]
   924  					b := src[base-repeat : base-repeat+(s-base)]
   925  					if !bytes.Equal(a, b) {
   926  						panic("mismatch")
   927  					}
   928  				}
   929  				if nextEmit > 0 {
   930  					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
   931  					d += emitRepeat(dst[d:], repeat, s-base)
   932  				} else {
   933  					// First match, cannot be repeat.
   934  					d += emitCopy(dst[d:], repeat, s-base)
   935  				}
   936  				if debug {
   937  					fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
   938  				}
   939  				nextEmit = s
   940  				if s >= sLimit {
   941  					goto emitRemainder
   942  				}
   943  
   944  				cv = load64(src, s)
   945  				continue
   946  			}
   947  
   948  			if uint32(cv) == load32(src, candidate) {
   949  				break
   950  			}
   951  			candidate = int(table[hash2])
   952  			if uint32(cv>>8) == load32(src, candidate2) {
   953  				table[hash2] = uint32(s + 2)
   954  				candidate = candidate2
   955  				s++
   956  				break
   957  			}
   958  			table[hash2] = uint32(s + 2)
   959  			if uint32(cv>>16) == load32(src, candidate) {
   960  				s += 2
   961  				break
   962  			}
   963  
   964  			cv = load64(src, nextS)
   965  			s = nextS
   966  		}
   967  
   968  		// Extend backwards.
   969  		// The top bytes will be rechecked to get the full match.
   970  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
   971  			candidate--
   972  			s--
   973  		}
   974  
   975  		// Bail if we exceed the maximum size.
   976  		if d+(s-nextEmit) > dstLimit {
   977  			return 0
   978  		}
   979  
   980  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   981  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   982  		// them as literal bytes.
   983  
   984  		d += emitLiteral(dst[d:], src[nextEmit:s])
   985  		if debug && nextEmit != s {
   986  			fmt.Println("emitted ", s-nextEmit, "literals")
   987  		}
   988  		// Call emitCopy, and then see if another emitCopy could be our next
   989  		// move. Repeat until we find no match for the input immediately after
   990  		// what was consumed by the last emitCopy call.
   991  		//
   992  		// If we exit this loop normally then we need to call emitLiteral next,
   993  		// though we don't yet know how big the literal will be. We handle that
   994  		// by proceeding to the next iteration of the main loop. We also can
   995  		// exit this loop via goto if we get close to exhausting the input.
   996  		for {
   997  			// Invariant: we have a 4-byte match at s, and no need to emit any
   998  			// literal bytes prior to s.
   999  			base := s
  1000  			repeat = base - candidate
  1001  
  1002  			// Extend the 4-byte match as long as possible.
  1003  			s += 4
  1004  			candidate += 4
  1005  			for s <= len(src)-8 {
  1006  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1007  					s += bits.TrailingZeros64(diff) >> 3
  1008  					break
  1009  				}
  1010  				s += 8
  1011  				candidate += 8
  1012  			}
  1013  
  1014  			d += emitCopy(dst[d:], repeat, s-base)
  1015  			if debug {
  1016  				// Validate match.
  1017  				if s <= candidate {
  1018  					panic("s <= candidate")
  1019  				}
  1020  				a := src[base:s]
  1021  				b := src[base-repeat : base-repeat+(s-base)]
  1022  				if !bytes.Equal(a, b) {
  1023  					panic("mismatch")
  1024  				}
  1025  			}
  1026  			if debug {
  1027  				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  1028  			}
  1029  			nextEmit = s
  1030  			if s >= sLimit {
  1031  				goto emitRemainder
  1032  			}
  1033  
  1034  			if d > dstLimit {
  1035  				// Do we have space for more, if not bail.
  1036  				return 0
  1037  			}
  1038  			// Check for an immediate match, otherwise start search at s+1
  1039  			x := load64(src, s-2)
  1040  			m2Hash := hash6(x, tableBits)
  1041  			currHash := hash6(x>>16, tableBits)
  1042  			candidate = int(table[currHash])
  1043  			table[m2Hash] = uint32(s - 2)
  1044  			table[currHash] = uint32(s)
  1045  			if debug && s == candidate {
  1046  				panic("s == candidate")
  1047  			}
  1048  			if uint32(x>>16) != load32(src, candidate) {
  1049  				cv = load64(src, s+1)
  1050  				s++
  1051  				break
  1052  			}
  1053  		}
  1054  	}
  1055  
  1056  emitRemainder:
  1057  	if nextEmit < len(src) {
  1058  		// Bail if we exceed the maximum size.
  1059  		if d+len(src)-nextEmit > dstLimit {
  1060  			return 0
  1061  		}
  1062  		d += emitLiteral(dst[d:], src[nextEmit:])
  1063  		if debug && nextEmit != s {
  1064  			fmt.Println("emitted ", len(src)-nextEmit, "literals")
  1065  		}
  1066  	}
  1067  	return d
  1068  }
  1069  

View as plain text