...

Source file src/github.com/klauspost/compress/snappy/xerial/xerial.go

Documentation: github.com/klauspost/compress/snappy/xerial

     1  package xerial
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"errors"
     7  
     8  	"github.com/klauspost/compress/s2"
     9  )
    10  
    11  var (
    12  	xerialHeader = []byte{130, 83, 78, 65, 80, 80, 89, 0}
    13  
    14  	// This is xerial version 1 and minimally compatible with version 1
    15  	xerialVersionInfo = []byte{0, 0, 0, 1, 0, 0, 0, 1}
    16  
    17  	// ErrMalformed is returned by the decoder when the xerial framing
    18  	// is malformed
    19  	ErrMalformed = errors.New("malformed xerial framing")
    20  )
    21  
    22  // Encode *appends* to the specified 'dst' the compressed
    23  // 'src' in xerial framing format. If 'dst' does not have enough
    24  // capacity, then a new slice will be allocated. If 'dst' has
    25  // non-zero length, then if *must* have been built using this function.
    26  func Encode(dst, src []byte) []byte {
    27  	if len(dst) == 0 {
    28  		dst = append(dst, xerialHeader...)
    29  		dst = append(dst, xerialVersionInfo...)
    30  	}
    31  
    32  	// Snappy encode in blocks of maximum 32KB
    33  	var (
    34  		max       = len(src)
    35  		blockSize = 32 * 1024
    36  		pos       = 0
    37  		chunk     []byte
    38  	)
    39  
    40  	for pos < max {
    41  		newPos := min(pos+blockSize, max)
    42  		// Find maximum length we need
    43  		needLen := s2.MaxEncodedLen(newPos-pos) + 4
    44  		if cap(dst)-len(dst) >= needLen {
    45  			// Encode directly into dst
    46  			dstStart := len(dst) + 4             // Start offset in dst
    47  			dstSizePos := dst[len(dst):dstStart] // Reserve space for compressed size
    48  			dstEnd := len(dst) + needLen         // End offset in dst
    49  			// Compress into dst and get actual size.
    50  			actual := s2.EncodeSnappy(dst[dstStart:dstEnd], src[pos:newPos])
    51  			// Update dst size
    52  			dst = dst[:dstStart+len(actual)]
    53  			// Store compressed size
    54  			binary.BigEndian.PutUint32(dstSizePos, uint32(len(actual)))
    55  		} else {
    56  			chunk = s2.EncodeSnappy(chunk[:cap(chunk)], src[pos:newPos])
    57  			origLen := len(dst)
    58  			// First encode the compressed size (big-endian)
    59  			// Put* panics if the buffer is too small, so pad 4 bytes first
    60  			dst = append(dst, dst[0:4]...)
    61  			binary.BigEndian.PutUint32(dst[origLen:], uint32(len(chunk)))
    62  			// And now the compressed data
    63  			dst = append(dst, chunk...)
    64  		}
    65  		pos = newPos
    66  	}
    67  	return dst
    68  }
    69  
    70  // EncodeBetter *appends* to the specified 'dst' the compressed
    71  // 'src' in xerial framing format. If 'dst' does not have enough
    72  // capacity, then a new slice will be allocated. If 'dst' has
    73  // non-zero length, then if *must* have been built using this function.
    74  func EncodeBetter(dst, src []byte) []byte {
    75  	if len(dst) == 0 {
    76  		dst = append(dst, xerialHeader...)
    77  		dst = append(dst, xerialVersionInfo...)
    78  	}
    79  
    80  	// Snappy encode in blocks of maximum 32KB
    81  	var (
    82  		max       = len(src)
    83  		blockSize = 32 * 1024
    84  		pos       = 0
    85  		chunk     []byte
    86  	)
    87  
    88  	for pos < max {
    89  		newPos := min(pos+blockSize, max)
    90  		// Find maximum length we need
    91  		needLen := s2.MaxEncodedLen(newPos-pos) + 4
    92  		if cap(dst)-len(dst) >= needLen {
    93  			// Encode directly into dst
    94  			dstStart := len(dst) + 4             // Start offset in dst
    95  			dstSizePos := dst[len(dst):dstStart] // Reserve space for compressed size
    96  			dstEnd := len(dst) + needLen         // End offset in dst
    97  			// Compress into dst and get actual size.
    98  			actual := s2.EncodeSnappyBetter(dst[dstStart:dstEnd], src[pos:newPos])
    99  			// Update dst size
   100  			dst = dst[:dstStart+len(actual)]
   101  			// Store compressed size
   102  			binary.BigEndian.PutUint32(dstSizePos, uint32(len(actual)))
   103  		} else {
   104  			chunk = s2.EncodeSnappyBetter(chunk[:cap(chunk)], src[pos:newPos])
   105  			origLen := len(dst)
   106  			// First encode the compressed size (big-endian)
   107  			// Put* panics if the buffer is too small, so pad 4 bytes first
   108  			dst = append(dst, dst[0:4]...)
   109  			binary.BigEndian.PutUint32(dst[origLen:], uint32(len(chunk)))
   110  			// And now the compressed data
   111  			dst = append(dst, chunk...)
   112  		}
   113  		pos = newPos
   114  	}
   115  	return dst
   116  }
   117  
   118  func min(x, y int) int {
   119  	if x < y {
   120  		return x
   121  	}
   122  	return y
   123  }
   124  
   125  const (
   126  	sizeOffset = 16
   127  	sizeBytes  = 4
   128  )
   129  
   130  // Decode decodes snappy data whether it is traditional unframed
   131  // or includes the xerial framing format.
   132  func Decode(src []byte) ([]byte, error) {
   133  	return DecodeInto(nil, src)
   134  }
   135  
   136  // DecodeInto decodes snappy data whether it is traditional unframed
   137  // or includes the xerial framing format into the specified `dst`.
   138  // It is assumed that the entirety of `dst` including all capacity is available
   139  // for use by this function. If `dst` is nil *or* insufficiently large to hold
   140  // the decoded `src`, new space will be allocated.
   141  // To never allocate bigger destination, use DecodeCapped.
   142  func DecodeInto(dst, src []byte) ([]byte, error) {
   143  	var max = len(src)
   144  
   145  	if max < len(xerialHeader) || !bytes.Equal(src[:8], xerialHeader) {
   146  		dst, err := s2.Decode(dst[:cap(dst)], src)
   147  		if err != nil {
   148  			return dst, ErrMalformed
   149  		}
   150  		return dst, nil
   151  	}
   152  	if max == sizeOffset {
   153  		return []byte{}, nil
   154  	}
   155  	if max < sizeOffset+sizeBytes {
   156  		return nil, ErrMalformed
   157  	}
   158  	if len(dst) > 0 {
   159  		dst = dst[:0]
   160  	}
   161  	var (
   162  		pos   = sizeOffset
   163  		chunk []byte
   164  	)
   165  
   166  	for pos+sizeBytes <= max {
   167  		size := int(binary.BigEndian.Uint32(src[pos : pos+sizeBytes]))
   168  		pos += sizeBytes
   169  
   170  		nextPos := pos + size
   171  		// On architectures where int is 32-bytes wide size + pos could
   172  		// overflow so we need to check the low bound as well as the
   173  		// high
   174  		if nextPos < pos || nextPos > max {
   175  			return nil, ErrMalformed
   176  		}
   177  		nextLen, err := s2.DecodedLen(src[pos:nextPos])
   178  		if err != nil {
   179  			return nil, err
   180  		}
   181  		if cap(dst)-len(dst) >= nextLen {
   182  			// Decode directly into dst
   183  			dstStart := len(dst)
   184  			dstEnd := dstStart + nextLen
   185  			_, err = s2.Decode(dst[dstStart:dstEnd], src[pos:nextPos])
   186  			if err != nil {
   187  				return nil, err
   188  			}
   189  			dst = dst[:dstEnd]
   190  		} else {
   191  			chunk, err = s2.Decode(chunk[:cap(chunk)], src[pos:nextPos])
   192  			if err != nil {
   193  				return nil, err
   194  			}
   195  			dst = append(dst, chunk...)
   196  		}
   197  		pos = nextPos
   198  	}
   199  	return dst, nil
   200  }
   201  
   202  var ErrDstTooSmall = errors.New("destination buffer too small")
   203  
   204  // DecodeCapped decodes snappy data whether it is traditional unframed
   205  // or includes the xerial framing format into the specified `dst`.
   206  // It is assumed that the entirety of `dst` including all capacity is available
   207  // for use by this function. If `dst` is nil *or* insufficiently large to hold
   208  // the decoded `src`, ErrDstTooSmall is returned.
   209  func DecodeCapped(dst, src []byte) ([]byte, error) {
   210  	var max = len(src)
   211  	if dst == nil {
   212  		return nil, ErrDstTooSmall
   213  	}
   214  	if max < len(xerialHeader) || !bytes.Equal(src[:8], xerialHeader) {
   215  		l, err := s2.DecodedLen(src)
   216  		if err != nil {
   217  			return nil, ErrMalformed
   218  		}
   219  		if l > cap(dst) {
   220  			return nil, ErrDstTooSmall
   221  		}
   222  		return s2.Decode(dst[:cap(dst)], src)
   223  	}
   224  	dst = dst[:0]
   225  	if max == sizeOffset {
   226  		return dst, nil
   227  	}
   228  	if max < sizeOffset+sizeBytes {
   229  		return nil, ErrMalformed
   230  	}
   231  	pos := sizeOffset
   232  
   233  	for pos+sizeBytes <= max {
   234  		size := int(binary.BigEndian.Uint32(src[pos : pos+sizeBytes]))
   235  		pos += sizeBytes
   236  
   237  		nextPos := pos + size
   238  		// On architectures where int is 32-bytes wide size + pos could
   239  		// overflow so we need to check the low bound as well as the
   240  		// high
   241  		if nextPos < pos || nextPos > max {
   242  			return nil, ErrMalformed
   243  		}
   244  		nextLen, err := s2.DecodedLen(src[pos:nextPos])
   245  		if err != nil {
   246  			return nil, err
   247  		}
   248  		if cap(dst)-len(dst) < nextLen {
   249  			return nil, ErrDstTooSmall
   250  		}
   251  		// Decode directly into dst
   252  		dstStart := len(dst)
   253  		dstEnd := dstStart + nextLen
   254  		_, err = s2.Decode(dst[dstStart:dstEnd], src[pos:nextPos])
   255  		if err != nil {
   256  			return nil, err
   257  		}
   258  		dst = dst[:dstEnd]
   259  		pos = nextPos
   260  	}
   261  	return dst, nil
   262  }
   263  

View as plain text