block.go

Documentation: github.com/klauspost/compress/internal/lz4ref

     1  package lz4ref
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  	"math/bits"
     7  	"sync"
     8  )
     9  
    10  const (
    11  	// The following constants are used to setup the compression algorithm.
    12  	minMatch   = 4  // the minimum size of the match sequence size (4 bytes)
    13  	winSizeLog = 16 // LZ4 64Kb window size limit
    14  	winSize    = 1 << winSizeLog
    15  	winMask    = winSize - 1 // 64Kb window of previous data for dependent blocks
    16  
    17  	// hashLog determines the size of the hash table used to quickly find a previous match position.
    18  	// Its value influences the compression speed and memory usage, the lower the faster,
    19  	// but at the expense of the compression ratio.
    20  	// 16 seems to be the best compromise for fast compression.
    21  	hashLog = 16
    22  	htSize  = 1 << hashLog
    23  
    24  	mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
    25  )
    26  
    27  // blockHash hashes the lower five bytes of x into a value < htSize.
    28  func blockHash(x uint64) uint32 {
    29  	const prime6bytes = 227718039650203
    30  	x &= 1<<40 - 1
    31  	return uint32((x * prime6bytes) >> (64 - hashLog))
    32  }
    33  
    34  func CompressBlockBound(n int) int {
    35  	return n + n/255 + 16
    36  }
    37  
    38  type Compressor struct {
    39  	// Offsets are at most 64kiB, so we can store only the lower 16 bits of
    40  	// match positions: effectively, an offset from some 64kiB block boundary.
    41  	//
    42  	// When we retrieve such an offset, we interpret it as relative to the last
    43  	// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
    44  	// depending on which of these is inside the current window. If a table
    45  	// entry was generated more than 64kiB back in the input, we find out by
    46  	// inspecting the input stream.
    47  	table [htSize]uint16
    48  
    49  	// Bitmap indicating which positions in the table are in use.
    50  	// This allows us to quickly reset the table for reuse,
    51  	// without having to zero everything.
    52  	inUse [htSize / 32]uint32
    53  }
    54  
    55  // Get returns the position of a presumptive match for the hash h.
    56  // The match may be a false positive due to a hash collision or an old entry.
    57  // If si < winSize, the return value may be negative.
    58  func (c *Compressor) get(h uint32, si int) int {
    59  	h &= htSize - 1
    60  	i := 0
    61  	if c.inUse[h/32]&(1<<(h%32)) != 0 {
    62  		i = int(c.table[h])
    63  	}
    64  	i += si &^ winMask
    65  	if i >= si {
    66  		// Try previous 64kiB block (negative when in first block).
    67  		i -= winSize
    68  	}
    69  	return i
    70  }
    71  
    72  func (c *Compressor) put(h uint32, si int) {
    73  	h &= htSize - 1
    74  	c.table[h] = uint16(si)
    75  	c.inUse[h/32] |= 1 << (h % 32)
    76  }
    77  
    78  func (c *Compressor) reset() { c.inUse = [htSize / 32]uint32{} }
    79  
    80  var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
    81  
    82  func CompressBlock(src, dst []byte) (int, error) {
    83  	c := compressorPool.Get().(*Compressor)
    84  	n, err := c.CompressBlock(src, dst)
    85  	compressorPool.Put(c)
    86  	return n, err
    87  }
    88  
    89  func CompressBlockLZ4s(src, dst []byte) (int, error) {
    90  	c := compressorPool.Get().(*Compressor)
    91  	n, err := c.CompressBlockLZ4s(src, dst)
    92  	compressorPool.Put(c)
    93  	return n, err
    94  }
    95  
    96  func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
    97  	// Zero out reused table to avoid non-deterministic output (issue #65).
    98  	c.reset()
    99  
   100  	const debug = false
   101  
   102  	if debug {
   103  		fmt.Printf("lz4 block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
   104  	}
   105  
   106  	// Return 0, nil only if the destination buffer size is < CompressBlockBound.
   107  	isNotCompressible := len(dst) < CompressBlockBound(len(src))
   108  
   109  	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
   110  	// This significantly speeds up incompressible data and usually has very small impact on compression.
   111  	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
   112  	const adaptSkipLog = 7
   113  
   114  	// si: Current position of the search.
   115  	// anchor: Position of the current literals.
   116  	var si, di, anchor int
   117  	sn := len(src) - mfLimit
   118  	if sn <= 0 {
   119  		goto lastLiterals
   120  	}
   121  
   122  	// Fast scan strategy: the hash table only stores the last five-byte sequences.
   123  	for si < sn {
   124  		// Hash the next five bytes (sequence)...
   125  		match := binary.LittleEndian.Uint64(src[si:])
   126  		h := blockHash(match)
   127  		h2 := blockHash(match >> 8)
   128  
   129  		// We check a match at s, s+1 and s+2 and pick the first one we get.
   130  		// Checking 3 only requires us to load the source one.
   131  		ref := c.get(h, si)
   132  		ref2 := c.get(h2, si+1)
   133  		c.put(h, si)
   134  		c.put(h2, si+1)
   135  
   136  		offset := si - ref
   137  
   138  		if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
   139  			// No match. Start calculating another hash.
   140  			// The processor can usually do this out-of-order.
   141  			h = blockHash(match >> 16)
   142  			ref3 := c.get(h, si+2)
   143  
   144  			// Check the second match at si+1
   145  			si += 1
   146  			offset = si - ref2
   147  
   148  			if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
   149  				// No match. Check the third match at si+2
   150  				si += 1
   151  				offset = si - ref3
   152  				c.put(h, si)
   153  
   154  				if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
   155  					// Skip one extra byte (at si+3) before we check 3 matches again.
   156  					si += 2 + (si-anchor)>>adaptSkipLog
   157  					continue
   158  				}
   159  			}
   160  		}
   161  
   162  		// Match found.
   163  		lLen := si - anchor // Literal length.
   164  		// We already matched 4 bytes.
   165  		mLen := 4
   166  
   167  		// Extend backwards if we can, reducing literals.
   168  		tOff := si - offset - 1
   169  		for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
   170  			si--
   171  			tOff--
   172  			lLen--
   173  			mLen++
   174  		}
   175  
   176  		// Add the match length, so we continue search at the end.
   177  		// Use mLen to store the offset base.
   178  		si, mLen = si+mLen, si+minMatch
   179  
   180  		// Find the longest match by looking by batches of 8 bytes.
   181  		for si+8 <= sn {
   182  			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
   183  			if x == 0 {
   184  				si += 8
   185  			} else {
   186  				// Stop is first non-zero byte.
   187  				si += bits.TrailingZeros64(x) >> 3
   188  				break
   189  			}
   190  		}
   191  
   192  		mLen = si - mLen
   193  		if di >= len(dst) {
   194  			return 0, ErrInvalidSourceShortBuffer
   195  		}
   196  		if mLen < 0xF {
   197  			dst[di] = byte(mLen)
   198  		} else {
   199  			dst[di] = 0xF
   200  		}
   201  
   202  		// Encode literals length.
   203  		if debug {
   204  			fmt.Printf("emit %d literals\n", lLen)
   205  		}
   206  		if lLen < 0xF {
   207  			dst[di] |= byte(lLen << 4)
   208  		} else {
   209  			dst[di] |= 0xF0
   210  			di++
   211  			l := lLen - 0xF
   212  			for ; l >= 0xFF && di < len(dst); l -= 0xFF {
   213  				dst[di] = 0xFF
   214  				di++
   215  			}
   216  			if di >= len(dst) {
   217  				return 0, ErrInvalidSourceShortBuffer
   218  			}
   219  			dst[di] = byte(l)
   220  		}
   221  		di++
   222  
   223  		// Literals.
   224  		if di+lLen > len(dst) {
   225  			return 0, ErrInvalidSourceShortBuffer
   226  		}
   227  		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
   228  		di += lLen + 2
   229  		anchor = si
   230  
   231  		// Encode offset.
   232  		if debug {
   233  			fmt.Printf("emit copy, length: %d, offset: %d\n", mLen+minMatch, offset)
   234  		}
   235  		if di > len(dst) {
   236  			return 0, ErrInvalidSourceShortBuffer
   237  		}
   238  		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
   239  
   240  		// Encode match length part 2.
   241  		if mLen >= 0xF {
   242  			for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
   243  				dst[di] = 0xFF
   244  				di++
   245  			}
   246  			if di >= len(dst) {
   247  				return 0, ErrInvalidSourceShortBuffer
   248  			}
   249  			dst[di] = byte(mLen)
   250  			di++
   251  		}
   252  		// Check if we can load next values.
   253  		if si >= sn {
   254  			break
   255  		}
   256  		// Hash match end-2
   257  		h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
   258  		c.put(h, si-2)
   259  	}
   260  
   261  lastLiterals:
   262  	if isNotCompressible && anchor == 0 {
   263  		// Incompressible.
   264  		return 0, nil
   265  	}
   266  
   267  	// Last literals.
   268  	if di >= len(dst) {
   269  		return 0, ErrInvalidSourceShortBuffer
   270  	}
   271  	lLen := len(src) - anchor
   272  	if lLen < 0xF {
   273  		dst[di] = byte(lLen << 4)
   274  	} else {
   275  		dst[di] = 0xF0
   276  		di++
   277  		for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
   278  			dst[di] = 0xFF
   279  			di++
   280  		}
   281  		if di >= len(dst) {
   282  			return 0, ErrInvalidSourceShortBuffer
   283  		}
   284  		dst[di] = byte(lLen)
   285  	}
   286  	di++
   287  
   288  	// Write the last literals.
   289  	if isNotCompressible && di >= anchor {
   290  		// Incompressible.
   291  		return 0, nil
   292  	}
   293  	if di+len(src)-anchor > len(dst) {
   294  		return 0, ErrInvalidSourceShortBuffer
   295  	}
   296  	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
   297  	return di, nil
   298  }
   299  
   300  func (c *Compressor) CompressBlockLZ4s(src, dst []byte) (int, error) {
   301  	// Zero out reused table to avoid non-deterministic output (issue #65).
   302  	c.reset()
   303  
   304  	const debug = false
   305  	const minMatch = 3
   306  	const addExtraLits = 32 // Suboptimal, but test emitting literals without matches. Set to 0 to disable.
   307  
   308  	if debug {
   309  		fmt.Printf("lz4 block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
   310  	}
   311  
   312  	// Return 0, nil only if the destination buffer size is < CompressBlockBound.
   313  	isNotCompressible := len(dst) < CompressBlockBound(len(src))
   314  
   315  	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
   316  	// This significantly speeds up incompressible data and usually has very small impact on compression.
   317  	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
   318  	const adaptSkipLog = 7
   319  
   320  	// si: Current position of the search.
   321  	// anchor: Position of the current literals.
   322  	var si, di, anchor int
   323  	sn := len(src) - mfLimit
   324  	if sn <= 0 {
   325  		goto lastLiterals
   326  	}
   327  
   328  	// Fast scan strategy: the hash table only stores the last five-byte sequences.
   329  	for si < sn {
   330  		// Hash the next five bytes (sequence)...
   331  		match := binary.LittleEndian.Uint64(src[si:])
   332  		h := blockHash(match)
   333  		h2 := blockHash(match >> 8)
   334  
   335  		// We check a match at s, s+1 and s+2 and pick the first one we get.
   336  		// Checking 3 only requires us to load the source one.
   337  		ref := c.get(h, si)
   338  		ref2 := c.get(h2, si+1)
   339  		c.put(h, si)
   340  		c.put(h2, si+1)
   341  
   342  		offset := si - ref
   343  
   344  		if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
   345  			// No match. Start calculating another hash.
   346  			// The processor can usually do this out-of-order.
   347  			h = blockHash(match >> 16)
   348  			ref3 := c.get(h, si+2)
   349  
   350  			// Check the second match at si+1
   351  			si += 1
   352  			offset = si - ref2
   353  
   354  			if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
   355  				// No match. Check the third match at si+2
   356  				si += 1
   357  				offset = si - ref3
   358  				c.put(h, si)
   359  
   360  				if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
   361  					// Skip one extra byte (at si+3) before we check 3 matches again.
   362  					si += 2 + (si-anchor)>>adaptSkipLog
   363  					continue
   364  				}
   365  			}
   366  		}
   367  
   368  		// Match found.
   369  		lLen := si - anchor // Literal length.
   370  		// We already matched 4 bytes.
   371  		mLen := 4
   372  
   373  		// Extend backwards if we can, reducing literals.
   374  		tOff := si - offset - 1
   375  		for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
   376  			si--
   377  			tOff--
   378  			lLen--
   379  			mLen++
   380  		}
   381  
   382  		// Add the match length, so we continue search at the end.
   383  		// Use mLen to store the offset base.
   384  		si, mLen = si+mLen, si+minMatch
   385  
   386  		// Find the longest match by looking by batches of 8 bytes.
   387  		for si+8 <= sn {
   388  			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
   389  			if x == 0 {
   390  				si += 8
   391  			} else {
   392  				// Stop is first non-zero byte.
   393  				si += bits.TrailingZeros64(x) >> 3
   394  				break
   395  			}
   396  		}
   397  		if addExtraLits > 15 {
   398  			// Add X lits.
   399  			if lLen > addExtraLits {
   400  				dst[di] = 0xf0
   401  				dst[di+1] = byte(int(addExtraLits-15) & 0xff) // hack to compile
   402  				di += 2
   403  				copy(dst[di:di+addExtraLits], src[anchor:anchor+lLen])
   404  				di += addExtraLits
   405  				lLen -= addExtraLits
   406  				anchor += addExtraLits
   407  			}
   408  		}
   409  		mLen = si - mLen
   410  		if di >= len(dst) {
   411  			return 0, ErrInvalidSourceShortBuffer
   412  		}
   413  		if mLen < 0xF {
   414  			dst[di] = byte(mLen)
   415  		} else {
   416  			dst[di] = 0xF
   417  		}
   418  
   419  		// Encode literals length.
   420  		if debug {
   421  			fmt.Printf("emit %d literals\n", lLen)
   422  		}
   423  		if lLen < 0xF {
   424  			dst[di] |= byte(lLen << 4)
   425  		} else {
   426  			dst[di] |= 0xF0
   427  			di++
   428  			l := lLen - 0xF
   429  			for ; l >= 0xFF && di < len(dst); l -= 0xFF {
   430  				dst[di] = 0xFF
   431  				di++
   432  			}
   433  			if di >= len(dst) {
   434  				return 0, ErrInvalidSourceShortBuffer
   435  			}
   436  			dst[di] = byte(l)
   437  		}
   438  		di++
   439  
   440  		// Literals.
   441  		if di+lLen > len(dst) {
   442  			return 0, ErrInvalidSourceShortBuffer
   443  		}
   444  		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
   445  		di += lLen + 2
   446  		anchor = si
   447  
   448  		// Encode offset.
   449  		if debug {
   450  			fmt.Printf("emit copy, length: %d, offset: %d\n", mLen+minMatch, offset)
   451  		}
   452  		if di > len(dst) {
   453  			return 0, ErrInvalidSourceShortBuffer
   454  		}
   455  		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
   456  
   457  		// Encode match length part 2.
   458  		if mLen >= 0xF {
   459  			for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
   460  				dst[di] = 0xFF
   461  				di++
   462  			}
   463  			if di >= len(dst) {
   464  				return 0, ErrInvalidSourceShortBuffer
   465  			}
   466  			dst[di] = byte(mLen)
   467  			di++
   468  		}
   469  		// Check if we can load next values.
   470  		if si >= sn {
   471  			break
   472  		}
   473  		// Hash match end-2
   474  		h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
   475  		c.put(h, si-2)
   476  	}
   477  
   478  lastLiterals:
   479  	if isNotCompressible && anchor == 0 {
   480  		// Incompressible.
   481  		return 0, nil
   482  	}
   483  
   484  	// Last literals.
   485  	if di >= len(dst) {
   486  		return 0, ErrInvalidSourceShortBuffer
   487  	}
   488  	lLen := len(src) - anchor
   489  	if lLen < 0xF {
   490  		dst[di] = byte(lLen << 4)
   491  	} else {
   492  		dst[di] = 0xF0
   493  		di++
   494  		for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
   495  			dst[di] = 0xFF
   496  			di++
   497  		}
   498  		if di >= len(dst) {
   499  			return 0, ErrInvalidSourceShortBuffer
   500  		}
   501  		dst[di] = byte(lLen)
   502  	}
   503  	di++
   504  
   505  	// Write the last literals.
   506  	if isNotCompressible && di >= anchor {
   507  		// Incompressible.
   508  		return 0, nil
   509  	}
   510  	if di+len(src)-anchor > len(dst) {
   511  		return 0, ErrInvalidSourceShortBuffer
   512  	}
   513  	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
   514  	return di, nil
   515  }
   516  
   517  func UncompressBlock(dst, src []byte) (ret int) {
   518  	// Restrict capacities so we don't read or write out of bounds.
   519  	dst = dst[:len(dst):len(dst)]
   520  	src = src[:len(src):len(src)]
   521  
   522  	const debug = false
   523  
   524  	const hasError = -2
   525  
   526  	if len(src) == 0 {
   527  		return hasError
   528  	}
   529  
   530  	defer func() {
   531  		if r := recover(); r != nil {
   532  			if debug {
   533  				fmt.Println("recover:", r)
   534  			}
   535  			ret = hasError
   536  		}
   537  	}()
   538  
   539  	var si, di uint
   540  	for {
   541  		if si >= uint(len(src)) {
   542  			return hasError
   543  		}
   544  		// Literals and match lengths (token).
   545  		b := uint(src[si])
   546  		si++
   547  
   548  		// Literals.
   549  		if lLen := b >> 4; lLen > 0 {
   550  			switch {
   551  			case lLen < 0xF && si+16 < uint(len(src)):
   552  				// Shortcut 1
   553  				// if we have enough room in src and dst, and the literals length
   554  				// is small enough (0..14) then copy all 16 bytes, even if not all
   555  				// are part of the literals.
   556  				copy(dst[di:], src[si:si+16])
   557  				si += lLen
   558  				di += lLen
   559  				if debug {
   560  					fmt.Println("ll:", lLen)
   561  				}
   562  				if mLen := b & 0xF; mLen < 0xF {
   563  					// Shortcut 2
   564  					// if the match length (4..18) fits within the literals, then copy
   565  					// all 18 bytes, even if not all are part of the literals.
   566  					mLen += 4
   567  					if offset := u16(src[si:]); mLen <= offset && offset < di {
   568  						i := di - offset
   569  						// The remaining buffer may not hold 18 bytes.
   570  						// See https://github.com/pierrec/lz4/issues/51.
   571  						if end := i + 18; end <= uint(len(dst)) {
   572  							copy(dst[di:], dst[i:end])
   573  							si += 2
   574  							di += mLen
   575  							continue
   576  						}
   577  					}
   578  				}
   579  			case lLen == 0xF:
   580  				for {
   581  					x := uint(src[si])
   582  					if lLen += x; int(lLen) < 0 {
   583  						if debug {
   584  							fmt.Println("int(lLen) < 0")
   585  						}
   586  						return hasError
   587  					}
   588  					si++
   589  					if x != 0xFF {
   590  						break
   591  					}
   592  				}
   593  				fallthrough
   594  			default:
   595  				copy(dst[di:di+lLen], src[si:si+lLen])
   596  				si += lLen
   597  				di += lLen
   598  				if debug {
   599  					fmt.Println("ll:", lLen)
   600  				}
   601  
   602  			}
   603  		}
   604  
   605  		mLen := b & 0xF
   606  		if si == uint(len(src)) && mLen == 0 {
   607  			break
   608  		} else if si >= uint(len(src))-2 {
   609  			return hasError
   610  		}
   611  
   612  		offset := u16(src[si:])
   613  		if offset == 0 {
   614  			return hasError
   615  		}
   616  		si += 2
   617  
   618  		// Match.
   619  		mLen += minMatch
   620  		if mLen == minMatch+0xF {
   621  			for {
   622  				x := uint(src[si])
   623  				if mLen += x; int(mLen) < 0 {
   624  					return hasError
   625  				}
   626  				si++
   627  				if x != 0xFF {
   628  					break
   629  				}
   630  			}
   631  		}
   632  		if debug {
   633  			fmt.Println("ml:", mLen, "offset:", offset)
   634  		}
   635  
   636  		// Copy the match.
   637  		if di < offset {
   638  			return hasError
   639  		}
   640  
   641  		expanded := dst[di-offset:]
   642  		if mLen > offset {
   643  			// Efficiently copy the match dst[di-offset:di] into the dst slice.
   644  			bytesToCopy := offset * (mLen / offset)
   645  			for n := offset; n <= bytesToCopy+offset; n *= 2 {
   646  				copy(expanded[n:], expanded[:n])
   647  			}
   648  			di += bytesToCopy
   649  			mLen -= bytesToCopy
   650  		}
   651  		di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
   652  	}
   653  
   654  	return int(di)
   655  }
   656  
   657  func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) }
   658
View as plain text