encode.go

Documentation: github.com/klauspost/compress/s2

     1  // Copyright 2011 The Snappy-Go Authors. All rights reserved.
     2  // Copyright (c) 2019 Klaus Post. All rights reserved.
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package s2
     7  
     8  import (
     9  	"encoding/binary"
    10  	"math"
    11  	"math/bits"
    12  )
    13  
    14  // Encode returns the encoded form of src. The returned slice may be a sub-
    15  // slice of dst if dst was large enough to hold the entire encoded block.
    16  // Otherwise, a newly allocated slice will be returned.
    17  //
    18  // The dst and src must not overlap. It is valid to pass a nil dst.
    19  //
    20  // The blocks will require the same amount of memory to decode as encoding,
    21  // and does not make for concurrent decoding.
    22  // Also note that blocks do not contain CRC information, so corruption may be undetected.
    23  //
    24  // If you need to encode larger amounts of data, consider using
    25  // the streaming interface which gives all of these features.
    26  func Encode(dst, src []byte) []byte {
    27  	if n := MaxEncodedLen(len(src)); n < 0 {
    28  		panic(ErrTooLarge)
    29  	} else if cap(dst) < n {
    30  		dst = make([]byte, n)
    31  	} else {
    32  		dst = dst[:n]
    33  	}
    34  
    35  	// The block starts with the varint-encoded length of the decompressed bytes.
    36  	d := binary.PutUvarint(dst, uint64(len(src)))
    37  
    38  	if len(src) == 0 {
    39  		return dst[:d]
    40  	}
    41  	if len(src) < minNonLiteralBlockSize {
    42  		d += emitLiteral(dst[d:], src)
    43  		return dst[:d]
    44  	}
    45  	n := encodeBlock(dst[d:], src)
    46  	if n > 0 {
    47  		d += n
    48  		return dst[:d]
    49  	}
    50  	// Not compressible
    51  	d += emitLiteral(dst[d:], src)
    52  	return dst[:d]
    53  }
    54  
    55  // EstimateBlockSize will perform a very fast compression
    56  // without outputting the result and return the compressed output size.
    57  // The function returns -1 if no improvement could be achieved.
    58  // Using actual compression will most often produce better compression than the estimate.
    59  func EstimateBlockSize(src []byte) (d int) {
    60  	if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
    61  		return -1
    62  	}
    63  	if len(src) <= 1024 {
    64  		d = calcBlockSizeSmall(src)
    65  	} else {
    66  		d = calcBlockSize(src)
    67  	}
    68  
    69  	if d == 0 {
    70  		return -1
    71  	}
    72  	// Size of the varint encoded block size.
    73  	d += (bits.Len64(uint64(len(src))) + 7) / 7
    74  
    75  	if d >= len(src) {
    76  		return -1
    77  	}
    78  	return d
    79  }
    80  
    81  // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
    82  // slice of dst if dst was large enough to hold the entire encoded block.
    83  // Otherwise, a newly allocated slice will be returned.
    84  //
    85  // EncodeBetter compresses better than Encode but typically with a
    86  // 10-40% speed decrease on both compression and decompression.
    87  //
    88  // The dst and src must not overlap. It is valid to pass a nil dst.
    89  //
    90  // The blocks will require the same amount of memory to decode as encoding,
    91  // and does not make for concurrent decoding.
    92  // Also note that blocks do not contain CRC information, so corruption may be undetected.
    93  //
    94  // If you need to encode larger amounts of data, consider using
    95  // the streaming interface which gives all of these features.
    96  func EncodeBetter(dst, src []byte) []byte {
    97  	if n := MaxEncodedLen(len(src)); n < 0 {
    98  		panic(ErrTooLarge)
    99  	} else if len(dst) < n {
   100  		dst = make([]byte, n)
   101  	}
   102  
   103  	// The block starts with the varint-encoded length of the decompressed bytes.
   104  	d := binary.PutUvarint(dst, uint64(len(src)))
   105  
   106  	if len(src) == 0 {
   107  		return dst[:d]
   108  	}
   109  	if len(src) < minNonLiteralBlockSize {
   110  		d += emitLiteral(dst[d:], src)
   111  		return dst[:d]
   112  	}
   113  	n := encodeBlockBetter(dst[d:], src)
   114  	if n > 0 {
   115  		d += n
   116  		return dst[:d]
   117  	}
   118  	// Not compressible
   119  	d += emitLiteral(dst[d:], src)
   120  	return dst[:d]
   121  }
   122  
   123  // EncodeBest returns the encoded form of src. The returned slice may be a sub-
   124  // slice of dst if dst was large enough to hold the entire encoded block.
   125  // Otherwise, a newly allocated slice will be returned.
   126  //
   127  // EncodeBest compresses as good as reasonably possible but with a
   128  // big speed decrease.
   129  //
   130  // The dst and src must not overlap. It is valid to pass a nil dst.
   131  //
   132  // The blocks will require the same amount of memory to decode as encoding,
   133  // and does not make for concurrent decoding.
   134  // Also note that blocks do not contain CRC information, so corruption may be undetected.
   135  //
   136  // If you need to encode larger amounts of data, consider using
   137  // the streaming interface which gives all of these features.
   138  func EncodeBest(dst, src []byte) []byte {
   139  	if n := MaxEncodedLen(len(src)); n < 0 {
   140  		panic(ErrTooLarge)
   141  	} else if len(dst) < n {
   142  		dst = make([]byte, n)
   143  	}
   144  
   145  	// The block starts with the varint-encoded length of the decompressed bytes.
   146  	d := binary.PutUvarint(dst, uint64(len(src)))
   147  
   148  	if len(src) == 0 {
   149  		return dst[:d]
   150  	}
   151  	if len(src) < minNonLiteralBlockSize {
   152  		d += emitLiteral(dst[d:], src)
   153  		return dst[:d]
   154  	}
   155  	n := encodeBlockBest(dst[d:], src, nil)
   156  	if n > 0 {
   157  		d += n
   158  		return dst[:d]
   159  	}
   160  	// Not compressible
   161  	d += emitLiteral(dst[d:], src)
   162  	return dst[:d]
   163  }
   164  
   165  // EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
   166  // slice of dst if dst was large enough to hold the entire encoded block.
   167  // Otherwise, a newly allocated slice will be returned.
   168  //
   169  // The output is Snappy compatible and will likely decompress faster.
   170  //
   171  // The dst and src must not overlap. It is valid to pass a nil dst.
   172  //
   173  // The blocks will require the same amount of memory to decode as encoding,
   174  // and does not make for concurrent decoding.
   175  // Also note that blocks do not contain CRC information, so corruption may be undetected.
   176  //
   177  // If you need to encode larger amounts of data, consider using
   178  // the streaming interface which gives all of these features.
   179  func EncodeSnappy(dst, src []byte) []byte {
   180  	if n := MaxEncodedLen(len(src)); n < 0 {
   181  		panic(ErrTooLarge)
   182  	} else if cap(dst) < n {
   183  		dst = make([]byte, n)
   184  	} else {
   185  		dst = dst[:n]
   186  	}
   187  
   188  	// The block starts with the varint-encoded length of the decompressed bytes.
   189  	d := binary.PutUvarint(dst, uint64(len(src)))
   190  
   191  	if len(src) == 0 {
   192  		return dst[:d]
   193  	}
   194  	if len(src) < minNonLiteralBlockSize {
   195  		d += emitLiteral(dst[d:], src)
   196  		return dst[:d]
   197  	}
   198  
   199  	n := encodeBlockSnappy(dst[d:], src)
   200  	if n > 0 {
   201  		d += n
   202  		return dst[:d]
   203  	}
   204  	// Not compressible
   205  	d += emitLiteral(dst[d:], src)
   206  	return dst[:d]
   207  }
   208  
   209  // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
   210  // slice of dst if dst was large enough to hold the entire encoded block.
   211  // Otherwise, a newly allocated slice will be returned.
   212  //
   213  // The output is Snappy compatible and will likely decompress faster.
   214  //
   215  // The dst and src must not overlap. It is valid to pass a nil dst.
   216  //
   217  // The blocks will require the same amount of memory to decode as encoding,
   218  // and does not make for concurrent decoding.
   219  // Also note that blocks do not contain CRC information, so corruption may be undetected.
   220  //
   221  // If you need to encode larger amounts of data, consider using
   222  // the streaming interface which gives all of these features.
   223  func EncodeSnappyBetter(dst, src []byte) []byte {
   224  	if n := MaxEncodedLen(len(src)); n < 0 {
   225  		panic(ErrTooLarge)
   226  	} else if cap(dst) < n {
   227  		dst = make([]byte, n)
   228  	} else {
   229  		dst = dst[:n]
   230  	}
   231  
   232  	// The block starts with the varint-encoded length of the decompressed bytes.
   233  	d := binary.PutUvarint(dst, uint64(len(src)))
   234  
   235  	if len(src) == 0 {
   236  		return dst[:d]
   237  	}
   238  	if len(src) < minNonLiteralBlockSize {
   239  		d += emitLiteral(dst[d:], src)
   240  		return dst[:d]
   241  	}
   242  
   243  	n := encodeBlockBetterSnappy(dst[d:], src)
   244  	if n > 0 {
   245  		d += n
   246  		return dst[:d]
   247  	}
   248  	// Not compressible
   249  	d += emitLiteral(dst[d:], src)
   250  	return dst[:d]
   251  }
   252  
   253  // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
   254  // slice of dst if dst was large enough to hold the entire encoded block.
   255  // Otherwise, a newly allocated slice will be returned.
   256  //
   257  // The output is Snappy compatible and will likely decompress faster.
   258  //
   259  // The dst and src must not overlap. It is valid to pass a nil dst.
   260  //
   261  // The blocks will require the same amount of memory to decode as encoding,
   262  // and does not make for concurrent decoding.
   263  // Also note that blocks do not contain CRC information, so corruption may be undetected.
   264  //
   265  // If you need to encode larger amounts of data, consider using
   266  // the streaming interface which gives all of these features.
   267  func EncodeSnappyBest(dst, src []byte) []byte {
   268  	if n := MaxEncodedLen(len(src)); n < 0 {
   269  		panic(ErrTooLarge)
   270  	} else if cap(dst) < n {
   271  		dst = make([]byte, n)
   272  	} else {
   273  		dst = dst[:n]
   274  	}
   275  
   276  	// The block starts with the varint-encoded length of the decompressed bytes.
   277  	d := binary.PutUvarint(dst, uint64(len(src)))
   278  
   279  	if len(src) == 0 {
   280  		return dst[:d]
   281  	}
   282  	if len(src) < minNonLiteralBlockSize {
   283  		d += emitLiteral(dst[d:], src)
   284  		return dst[:d]
   285  	}
   286  
   287  	n := encodeBlockBestSnappy(dst[d:], src)
   288  	if n > 0 {
   289  		d += n
   290  		return dst[:d]
   291  	}
   292  	// Not compressible
   293  	d += emitLiteral(dst[d:], src)
   294  	return dst[:d]
   295  }
   296  
   297  // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
   298  // If the destination is nil or too small, a new will be allocated.
   299  // The blocks are not validated, so garbage in = garbage out.
   300  // dst may not overlap block data.
   301  // Any data in dst is preserved as is, so it will not be considered a block.
   302  func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
   303  	totalSize := uint64(0)
   304  	compSize := 0
   305  	for _, b := range blocks {
   306  		l, hdr, err := decodedLen(b)
   307  		if err != nil {
   308  			return nil, err
   309  		}
   310  		totalSize += uint64(l)
   311  		compSize += len(b) - hdr
   312  	}
   313  	if totalSize == 0 {
   314  		dst = append(dst, 0)
   315  		return dst, nil
   316  	}
   317  	if totalSize > math.MaxUint32 {
   318  		return nil, ErrTooLarge
   319  	}
   320  	var tmp [binary.MaxVarintLen32]byte
   321  	hdrSize := binary.PutUvarint(tmp[:], totalSize)
   322  	wantSize := hdrSize + compSize
   323  
   324  	if cap(dst)-len(dst) < wantSize {
   325  		dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
   326  	}
   327  	dst = append(dst, tmp[:hdrSize]...)
   328  	for _, b := range blocks {
   329  		_, hdr, err := decodedLen(b)
   330  		if err != nil {
   331  			return nil, err
   332  		}
   333  		dst = append(dst, b[hdr:]...)
   334  	}
   335  	return dst, nil
   336  }
   337  
   338  // inputMargin is the minimum number of extra input bytes to keep, inside
   339  // encodeBlock's inner loop. On some architectures, this margin lets us
   340  // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
   341  // literals can be implemented as a single load to and store from a 16-byte
   342  // register. That literal's actual length can be as short as 1 byte, so this
   343  // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
   344  // the encoding loop will fix up the copy overrun, and this inputMargin ensures
   345  // that we don't overrun the dst and src buffers.
   346  const inputMargin = 8
   347  
   348  // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
   349  // will be accepted by the encoder.
   350  const minNonLiteralBlockSize = 32
   351  
   352  const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
   353  
   354  // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
   355  // Blocks this big are highly discouraged, though.
   356  // Half the size on 32 bit systems.
   357  const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
   358  
   359  // MaxEncodedLen returns the maximum length of a snappy block, given its
   360  // uncompressed length.
   361  //
   362  // It will return a negative value if srcLen is too large to encode.
   363  // 32 bit platforms will have lower thresholds for rejecting big content.
   364  func MaxEncodedLen(srcLen int) int {
   365  	n := uint64(srcLen)
   366  	if intReduction == 1 {
   367  		// 32 bits
   368  		if n > math.MaxInt32 {
   369  			// Also includes negative.
   370  			return -1
   371  		}
   372  	} else if n > 0xffffffff {
   373  		// 64 bits
   374  		// Also includes negative.
   375  		return -1
   376  	}
   377  	// Size of the varint encoded block size.
   378  	n = n + uint64((bits.Len64(n)+7)/7)
   379  
   380  	// Add maximum size of encoding block as literals.
   381  	n += uint64(literalExtraSize(int64(srcLen)))
   382  	if intReduction == 1 {
   383  		// 32 bits
   384  		if n > math.MaxInt32 {
   385  			return -1
   386  		}
   387  	} else if n > 0xffffffff {
   388  		// 64 bits
   389  		// Also includes negative.
   390  		return -1
   391  	}
   392  	return int(n)
   393  }
   394
View as plain text