level2.go

Documentation: github.com/klauspost/compress/flate

     1  package flate
     2  
     3  import "fmt"
     4  
     5  // fastGen maintains the table for matches,
     6  // and the previous byte block for level 2.
     7  // This is the generic implementation.
     8  type fastEncL2 struct {
     9  	fastGen
    10  	table [bTableSize]tableEntry
    11  }
    12  
    13  // EncodeL2 uses a similar algorithm to level 1, but is capable
    14  // of matching across blocks giving better compression at a small slowdown.
    15  func (e *fastEncL2) Encode(dst *tokens, src []byte) {
    16  	const (
    17  		inputMargin            = 12 - 1
    18  		minNonLiteralBlockSize = 1 + 1 + inputMargin
    19  		hashBytes              = 5
    20  	)
    21  
    22  	if debugDeflate && e.cur < 0 {
    23  		panic(fmt.Sprint("e.cur < 0: ", e.cur))
    24  	}
    25  
    26  	// Protect against e.cur wraparound.
    27  	for e.cur >= bufferReset {
    28  		if len(e.hist) == 0 {
    29  			for i := range e.table[:] {
    30  				e.table[i] = tableEntry{}
    31  			}
    32  			e.cur = maxMatchOffset
    33  			break
    34  		}
    35  		// Shift down everything in the table that isn't already too far away.
    36  		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
    37  		for i := range e.table[:] {
    38  			v := e.table[i].offset
    39  			if v <= minOff {
    40  				v = 0
    41  			} else {
    42  				v = v - e.cur + maxMatchOffset
    43  			}
    44  			e.table[i].offset = v
    45  		}
    46  		e.cur = maxMatchOffset
    47  	}
    48  
    49  	s := e.addBlock(src)
    50  
    51  	// This check isn't in the Snappy implementation, but there, the caller
    52  	// instead of the callee handles this case.
    53  	if len(src) < minNonLiteralBlockSize {
    54  		// We do not fill the token table.
    55  		// This will be picked up by caller.
    56  		dst.n = uint16(len(src))
    57  		return
    58  	}
    59  
    60  	// Override src
    61  	src = e.hist
    62  	nextEmit := s
    63  
    64  	// sLimit is when to stop looking for offset/length copies. The inputMargin
    65  	// lets us use a fast path for emitLiteral in the main loop, while we are
    66  	// looking for copies.
    67  	sLimit := int32(len(src) - inputMargin)
    68  
    69  	// nextEmit is where in src the next emitLiteral should start from.
    70  	cv := load6432(src, s)
    71  	for {
    72  		// When should we start skipping if we haven't found matches in a long while.
    73  		const skipLog = 5
    74  		const doEvery = 2
    75  
    76  		nextS := s
    77  		var candidate tableEntry
    78  		for {
    79  			nextHash := hashLen(cv, bTableBits, hashBytes)
    80  			s = nextS
    81  			nextS = s + doEvery + (s-nextEmit)>>skipLog
    82  			if nextS > sLimit {
    83  				goto emitRemainder
    84  			}
    85  			candidate = e.table[nextHash]
    86  			now := load6432(src, nextS)
    87  			e.table[nextHash] = tableEntry{offset: s + e.cur}
    88  			nextHash = hashLen(now, bTableBits, hashBytes)
    89  
    90  			offset := s - (candidate.offset - e.cur)
    91  			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
    92  				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
    93  				break
    94  			}
    95  
    96  			// Do one right away...
    97  			cv = now
    98  			s = nextS
    99  			nextS++
   100  			candidate = e.table[nextHash]
   101  			now >>= 8
   102  			e.table[nextHash] = tableEntry{offset: s + e.cur}
   103  
   104  			offset = s - (candidate.offset - e.cur)
   105  			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
   106  				break
   107  			}
   108  			cv = now
   109  		}
   110  
   111  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   112  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   113  		// them as literal bytes.
   114  
   115  		// Call emitCopy, and then see if another emitCopy could be our next
   116  		// move. Repeat until we find no match for the input immediately after
   117  		// what was consumed by the last emitCopy call.
   118  		//
   119  		// If we exit this loop normally then we need to call emitLiteral next,
   120  		// though we don't yet know how big the literal will be. We handle that
   121  		// by proceeding to the next iteration of the main loop. We also can
   122  		// exit this loop via goto if we get close to exhausting the input.
   123  		for {
   124  			// Invariant: we have a 4-byte match at s, and no need to emit any
   125  			// literal bytes prior to s.
   126  
   127  			// Extend the 4-byte match as long as possible.
   128  			t := candidate.offset - e.cur
   129  			l := e.matchlenLong(s+4, t+4, src) + 4
   130  
   131  			// Extend backwards
   132  			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
   133  				s--
   134  				t--
   135  				l++
   136  			}
   137  			if nextEmit < s {
   138  				if false {
   139  					emitLiteral(dst, src[nextEmit:s])
   140  				} else {
   141  					for _, v := range src[nextEmit:s] {
   142  						dst.tokens[dst.n] = token(v)
   143  						dst.litHist[v]++
   144  						dst.n++
   145  					}
   146  				}
   147  			}
   148  
   149  			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
   150  			s += l
   151  			nextEmit = s
   152  			if nextS >= s {
   153  				s = nextS + 1
   154  			}
   155  
   156  			if s >= sLimit {
   157  				// Index first pair after match end.
   158  				if int(s+l+8) < len(src) {
   159  					cv := load6432(src, s)
   160  					e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
   161  				}
   162  				goto emitRemainder
   163  			}
   164  
   165  			// Store every second hash in-between, but offset by 1.
   166  			for i := s - l + 2; i < s-5; i += 7 {
   167  				x := load6432(src, i)
   168  				nextHash := hashLen(x, bTableBits, hashBytes)
   169  				e.table[nextHash] = tableEntry{offset: e.cur + i}
   170  				// Skip one
   171  				x >>= 16
   172  				nextHash = hashLen(x, bTableBits, hashBytes)
   173  				e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
   174  				// Skip one
   175  				x >>= 16
   176  				nextHash = hashLen(x, bTableBits, hashBytes)
   177  				e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
   178  			}
   179  
   180  			// We could immediately start working at s now, but to improve
   181  			// compression we first update the hash table at s-2 to s. If
   182  			// another emitCopy is not our next move, also calculate nextHash
   183  			// at s+1. At least on GOARCH=amd64, these three hash calculations
   184  			// are faster as one load64 call (with some shifts) instead of
   185  			// three load32 calls.
   186  			x := load6432(src, s-2)
   187  			o := e.cur + s - 2
   188  			prevHash := hashLen(x, bTableBits, hashBytes)
   189  			prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
   190  			e.table[prevHash] = tableEntry{offset: o}
   191  			e.table[prevHash2] = tableEntry{offset: o + 1}
   192  			currHash := hashLen(x>>16, bTableBits, hashBytes)
   193  			candidate = e.table[currHash]
   194  			e.table[currHash] = tableEntry{offset: o + 2}
   195  
   196  			offset := s - (candidate.offset - e.cur)
   197  			if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
   198  				cv = x >> 24
   199  				s++
   200  				break
   201  			}
   202  		}
   203  	}
   204  
   205  emitRemainder:
   206  	if int(nextEmit) < len(src) {
   207  		// If nothing was added, don't encode literals.
   208  		if dst.n == 0 {
   209  			return
   210  		}
   211  
   212  		emitLiteral(dst, src[nextEmit:])
   213  	}
   214  }
   215
View as plain text