...

Source file src/github.com/klauspost/compress/s2/s2_test.go

Documentation: github.com/klauspost/compress/s2

     1  // Copyright 2011 The Snappy-Go Authors. All rights reserved.
     2  // Copyright (c) 2019 Klaus Post. All rights reserved.
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package s2
     7  
     8  import (
     9  	"bytes"
    10  	"encoding/binary"
    11  	"flag"
    12  	"fmt"
    13  	"io"
    14  	"math"
    15  	"math/rand"
    16  	"net/http"
    17  	"os"
    18  	"path/filepath"
    19  	"runtime"
    20  	"strings"
    21  	"testing"
    22  
    23  	"github.com/klauspost/compress/internal/snapref"
    24  	"github.com/klauspost/compress/zip"
    25  	"github.com/klauspost/compress/zstd"
    26  )
    27  
    28  const maxUint = ^uint(0)
    29  const maxInt = int(maxUint >> 1)
    30  
    31  var (
    32  	download     = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
    33  	testdataDir  = flag.String("testdataDir", "testdata", "Directory containing the test data")
    34  	benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data")
    35  )
    36  
    37  func TestMaxEncodedLen(t *testing.T) {
    38  	testSet := []struct {
    39  		in, out int64
    40  	}{
    41  		0:  {in: 0, out: 1},
    42  		1:  {in: 1 << 24, out: 1<<24 + int64(binary.PutVarint([]byte{binary.MaxVarintLen32: 0}, int64(1<<24))) + literalExtraSize(1<<24)},
    43  		2:  {in: MaxBlockSize, out: math.MaxUint32},
    44  		3:  {in: math.MaxUint32 - binary.MaxVarintLen32 - literalExtraSize(math.MaxUint32), out: math.MaxUint32},
    45  		4:  {in: math.MaxUint32 - 9, out: -1},
    46  		5:  {in: math.MaxUint32 - 8, out: -1},
    47  		6:  {in: math.MaxUint32 - 7, out: -1},
    48  		7:  {in: math.MaxUint32 - 6, out: -1},
    49  		8:  {in: math.MaxUint32 - 5, out: -1},
    50  		9:  {in: math.MaxUint32 - 4, out: -1},
    51  		10: {in: math.MaxUint32 - 3, out: -1},
    52  		11: {in: math.MaxUint32 - 2, out: -1},
    53  		12: {in: math.MaxUint32 - 1, out: -1},
    54  		13: {in: math.MaxUint32, out: -1},
    55  		14: {in: -1, out: -1},
    56  		15: {in: -2, out: -1},
    57  	}
    58  	// 32 bit platforms have a different threshold.
    59  	if maxInt == math.MaxInt32 {
    60  		testSet[2].out = math.MaxInt32
    61  		testSet[3].out = -1
    62  	}
    63  	t.Log("Maxblock:", MaxBlockSize, "reduction:", intReduction)
    64  	// Test all sizes up to maxBlockSize.
    65  	for i := int64(0); i < maxBlockSize; i++ {
    66  		testSet = append(testSet, struct{ in, out int64 }{in: i, out: i + int64(binary.PutVarint([]byte{binary.MaxVarintLen32: 0}, i)) + literalExtraSize(i)})
    67  	}
    68  	for i := range testSet {
    69  		tt := testSet[i]
    70  		want := tt.out
    71  		got := int64(MaxEncodedLen(int(tt.in)))
    72  		if got != want {
    73  			t.Errorf("test %d: input: %d, want: %d, got: %d", i, tt.in, want, got)
    74  		}
    75  	}
    76  }
    77  
    78  func cmp(got, want []byte) error {
    79  	if bytes.Equal(got, want) {
    80  		return nil
    81  	}
    82  	if len(got) != len(want) {
    83  		return fmt.Errorf("got %d bytes, want %d", len(got), len(want))
    84  	}
    85  	for i := range got {
    86  		if got[i] != want[i] {
    87  			return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, got[i], want[i])
    88  		}
    89  	}
    90  	return nil
    91  }
    92  
    93  func roundtrip(b, ebuf, dbuf []byte) error {
    94  	bOrg := make([]byte, len(b))
    95  	copy(bOrg, b)
    96  	asmEnc := Encode(nil, b)
    97  	if err := cmp(bOrg, b); err != nil {
    98  		return fmt.Errorf("src was changed: %v", err)
    99  	}
   100  	goEnc := encodeGo(nil, b)
   101  	if err := cmp(bOrg, b); err != nil {
   102  		return fmt.Errorf("src was changed: %v", err)
   103  	}
   104  
   105  	//fmt.Println("asm:", len(asmEnc), "go:", len(goEnc))
   106  	dGo, err := Decode(nil, goEnc)
   107  	if err != nil {
   108  		return fmt.Errorf("decoding error: %v", err)
   109  	}
   110  
   111  	if err := cmp(dGo, b); err != nil {
   112  		return fmt.Errorf("roundtrip mismatch: %v", err)
   113  	}
   114  
   115  	// fmt.Println("decode asm...")
   116  	d, err := Decode(nil, asmEnc)
   117  	if err != nil {
   118  		return fmt.Errorf("decoding error: %v", err)
   119  	}
   120  	if err := cmp(d, b); err != nil {
   121  		return fmt.Errorf("roundtrip mismatch: %v", err)
   122  	}
   123  	d, err = Decode(dbuf, EncodeBetter(ebuf, b))
   124  	if err != nil {
   125  		return fmt.Errorf("decoding better error: %v", err)
   126  	}
   127  	if err := cmp(d, b); err != nil {
   128  		return fmt.Errorf("roundtrip better mismatch: %v", err)
   129  	}
   130  
   131  	// Test concat with some existing data.
   132  	dst := []byte("existing")
   133  	// Add 3 different encodes and a 0 length block.
   134  	concat, err := ConcatBlocks(dst, Encode(nil, b), EncodeBetter(nil, b), []byte{0}, EncodeSnappy(nil, b))
   135  	if err != nil {
   136  		return fmt.Errorf("concat error: %v", err)
   137  	}
   138  	if err := cmp(concat[:len(dst)], dst); err != nil {
   139  		return fmt.Errorf("concat existing mismatch: %v", err)
   140  	}
   141  	concat = concat[len(dst):]
   142  
   143  	d, _ = Decode(nil, concat)
   144  	want := append(make([]byte, 0, len(b)*3), b...)
   145  	want = append(want, b...)
   146  	want = append(want, b...)
   147  
   148  	if err := cmp(d, want); err != nil {
   149  		return fmt.Errorf("roundtrip concat mismatch: %v", err)
   150  	}
   151  
   152  	return nil
   153  }
   154  
   155  func TestEmpty(t *testing.T) {
   156  	if err := roundtrip(nil, nil, nil); err != nil {
   157  		t.Fatal(err)
   158  	}
   159  }
   160  
   161  func TestSmallCopy(t *testing.T) {
   162  	for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
   163  		for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
   164  			for i := 0; i < 32; i++ {
   165  				s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
   166  				if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
   167  					t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
   168  				}
   169  			}
   170  		}
   171  	}
   172  }
   173  
   174  func TestSmallRand(t *testing.T) {
   175  	rng := rand.New(rand.NewSource(1))
   176  	for n := 1; n < 20000; n += 23 {
   177  		b := make([]byte, n)
   178  		for i := range b {
   179  			b[i] = uint8(rng.Intn(256))
   180  		}
   181  		if err := roundtrip(b, nil, nil); err != nil {
   182  			t.Fatal(err)
   183  		}
   184  	}
   185  }
   186  
   187  func TestSmallRegular(t *testing.T) {
   188  	for n := 1; n < 20000; n += 23 {
   189  		b := make([]byte, n)
   190  		for i := range b {
   191  			b[i] = uint8(i%10 + 'a')
   192  		}
   193  		if err := roundtrip(b, nil, nil); err != nil {
   194  			t.Fatal(err)
   195  		}
   196  	}
   197  }
   198  
   199  func TestSmallRepeat(t *testing.T) {
   200  	for n := 1; n < 20000; n += 23 {
   201  		b := make([]byte, n)
   202  		for i := range b[:n/2] {
   203  			b[i] = uint8(i * 255 / n)
   204  		}
   205  		for i := range b[n/2:] {
   206  			b[i+n/2] = uint8(i%10 + 'a')
   207  		}
   208  		if err := roundtrip(b, nil, nil); err != nil {
   209  			t.Fatal(err)
   210  		}
   211  	}
   212  }
   213  
   214  func TestInvalidVarint(t *testing.T) {
   215  	testCases := []struct {
   216  		desc  string
   217  		input string
   218  	}{{
   219  		"invalid varint, final byte has continuation bit set",
   220  		"\xff",
   221  	}, {
   222  		"invalid varint, value overflows uint64",
   223  		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
   224  	}, {
   225  		// https://github.com/google/snappy/blob/master/format_description.txt
   226  		// says that "the stream starts with the uncompressed length [as a
   227  		// varint] (up to a maximum of 2^32 - 1)".
   228  		"valid varint (as uint64), but value overflows uint32",
   229  		"\x80\x80\x80\x80\x10",
   230  	}}
   231  
   232  	for _, tc := range testCases {
   233  		input := []byte(tc.input)
   234  		if _, err := DecodedLen(input); err != ErrCorrupt {
   235  			t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
   236  		}
   237  		if _, err := Decode(nil, input); err != ErrCorrupt {
   238  			t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
   239  		}
   240  	}
   241  }
   242  
   243  func TestDecode(t *testing.T) {
   244  	lit40Bytes := make([]byte, 40)
   245  	for i := range lit40Bytes {
   246  		lit40Bytes[i] = byte(i)
   247  	}
   248  	lit40 := string(lit40Bytes)
   249  
   250  	testCases := []struct {
   251  		desc    string
   252  		input   string
   253  		want    string
   254  		wantErr error
   255  	}{{
   256  		`decodedLen=0; valid input`,
   257  		"\x00",
   258  		"",
   259  		nil,
   260  	}, {
   261  		`decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
   262  		"\x03" + "\x08\xff\xff\xff",
   263  		"\xff\xff\xff",
   264  		nil,
   265  	}, {
   266  		`decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
   267  		"\x02" + "\x08\xff\xff\xff",
   268  		"",
   269  		ErrCorrupt,
   270  	}, {
   271  		`decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
   272  		"\x03" + "\x08\xff\xff",
   273  		"",
   274  		ErrCorrupt,
   275  	}, {
   276  		`decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
   277  		"\x28" + "\x9c" + lit40,
   278  		lit40,
   279  		nil,
   280  	}, {
   281  		`decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
   282  		"\x01" + "\xf0",
   283  		"",
   284  		ErrCorrupt,
   285  	}, {
   286  		`decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
   287  		"\x03" + "\xf0\x02\xff\xff\xff",
   288  		"\xff\xff\xff",
   289  		nil,
   290  	}, {
   291  		`decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
   292  		"\x01" + "\xf4\x00",
   293  		"",
   294  		ErrCorrupt,
   295  	}, {
   296  		`decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
   297  		"\x03" + "\xf4\x02\x00\xff\xff\xff",
   298  		"\xff\xff\xff",
   299  		nil,
   300  	}, {
   301  		`decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
   302  		"\x01" + "\xf8\x00\x00",
   303  		"",
   304  		ErrCorrupt,
   305  	}, {
   306  		`decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
   307  		"\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
   308  		"\xff\xff\xff",
   309  		nil,
   310  	}, {
   311  		`decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
   312  		"\x01" + "\xfc\x00\x00\x00",
   313  		"",
   314  		ErrCorrupt,
   315  	}, {
   316  		`decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
   317  		"\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
   318  		"",
   319  		ErrCorrupt,
   320  	}, {
   321  		`decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
   322  		"\x04" + "\xfc\x02\x00\x00\x00\xff",
   323  		"",
   324  		ErrCorrupt,
   325  	}, {
   326  		`decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
   327  		"\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
   328  		"\xff\xff\xff",
   329  		nil,
   330  	}, {
   331  		`decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
   332  		"\x04" + "\x01",
   333  		"",
   334  		ErrCorrupt,
   335  	}, {
   336  		`decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
   337  		"\x04" + "\x02\x00",
   338  		"",
   339  		ErrCorrupt,
   340  	}, {
   341  		`decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`,
   342  		"\x04" + "\x03\x00\x00\x00",
   343  		"",
   344  		ErrCorrupt,
   345  	}, {
   346  		`decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
   347  		"\x04" + "\x0cabcd",
   348  		"abcd",
   349  		nil,
   350  	}, {
   351  		`decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
   352  		"\x0d" + "\x0cabcd" + "\x15\x04",
   353  		"abcdabcdabcda",
   354  		nil,
   355  	}, {
   356  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
   357  		"\x08" + "\x0cabcd" + "\x01\x04",
   358  		"abcdabcd",
   359  		nil,
   360  	}, {
   361  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
   362  		"\x08" + "\x0cabcd" + "\x01\x02",
   363  		"abcdcdcd",
   364  		nil,
   365  	}, {
   366  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
   367  		"\x08" + "\x0cabcd" + "\x01\x01",
   368  		"abcddddd",
   369  		nil,
   370  	}, {
   371  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; repeat offset as first match`,
   372  		"\x08" + "\x0cabcd" + "\x01\x00",
   373  		"",
   374  		ErrCorrupt,
   375  	}, {
   376  		`decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; literal: 'z'; tagCopy1; length=4 offset=0; repeat offset as second match`,
   377  		"\x0d" + "\x0cabcd" + "\x01\x01" + "\x00z" + "\x01\x00",
   378  		"abcdddddzzzzz",
   379  		nil,
   380  	}, {
   381  		`decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
   382  		"\x09" + "\x0cabcd" + "\x01\x04",
   383  		"",
   384  		ErrCorrupt,
   385  	}, {
   386  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
   387  		"\x08" + "\x0cabcd" + "\x01\x05",
   388  		"",
   389  		ErrCorrupt,
   390  	}, {
   391  		`decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
   392  		"\x07" + "\x0cabcd" + "\x01\x04",
   393  		"",
   394  		ErrCorrupt,
   395  	}, {
   396  		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
   397  		"\x06" + "\x0cabcd" + "\x06\x03\x00",
   398  		"abcdbc",
   399  		nil,
   400  	}, {
   401  		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`,
   402  		"\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
   403  		"abcdbc",
   404  		nil,
   405  	}}
   406  
   407  	const (
   408  		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
   409  		// not present in either the input or the output. It is written to dBuf
   410  		// to check that Decode does not write bytes past the end of
   411  		// dBuf[:dLen].
   412  		//
   413  		// The magic number 37 was chosen because it is prime. A more 'natural'
   414  		// number like 32 might lead to a false negative if, for example, a
   415  		// byte was incorrectly copied 4*8 bytes later.
   416  		notPresentBase = 0xa0
   417  		notPresentLen  = 37
   418  	)
   419  
   420  	var dBuf [100]byte
   421  loop:
   422  	for i, tc := range testCases {
   423  		input := []byte(tc.input)
   424  		for _, x := range input {
   425  			if notPresentBase <= x && x < notPresentBase+notPresentLen {
   426  				t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
   427  				continue loop
   428  			}
   429  		}
   430  
   431  		dLen, n := binary.Uvarint(input)
   432  		if n <= 0 {
   433  			t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
   434  			continue
   435  		}
   436  		if dLen > uint64(len(dBuf)) {
   437  			t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
   438  			continue
   439  		}
   440  
   441  		for j := range dBuf {
   442  			dBuf[j] = byte(notPresentBase + j%notPresentLen)
   443  		}
   444  		g, gotErr := Decode(dBuf[:], input)
   445  		if got := string(g); got != tc.want || gotErr != tc.wantErr {
   446  			t.Errorf("#%d (%s):\ngot  %q, %v\nwant %q, %v",
   447  				i, tc.desc, got, gotErr, tc.want, tc.wantErr)
   448  			continue
   449  		}
   450  		for j, x := range dBuf {
   451  			if uint64(j) < dLen {
   452  				continue
   453  			}
   454  			if w := byte(notPresentBase + j%notPresentLen); x != w {
   455  				t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
   456  					i, tc.desc, j, x, w, dBuf)
   457  				continue loop
   458  			}
   459  		}
   460  	}
   461  }
   462  
   463  func TestDecodeCopy4(t *testing.T) {
   464  	dots := strings.Repeat(".", 65536)
   465  
   466  	input := strings.Join([]string{
   467  		"\x89\x80\x04",         // decodedLen = 65545.
   468  		"\x0cpqrs",             // 4-byte literal "pqrs".
   469  		"\xf4\xff\xff" + dots,  // 65536-byte literal dots.
   470  		"\x13\x04\x00\x01\x00", // tagCopy4; length=5 offset=65540.
   471  	}, "")
   472  
   473  	gotBytes, err := Decode(nil, []byte(input))
   474  	if err != nil {
   475  		t.Fatal(err)
   476  	}
   477  	got := string(gotBytes)
   478  	want := "pqrs" + dots + "pqrs."
   479  	if len(got) != len(want) {
   480  		t.Fatalf("got %d bytes, want %d", len(got), len(want))
   481  	}
   482  	if got != want {
   483  		for i := 0; i < len(got); i++ {
   484  			if g, w := got[i], want[i]; g != w {
   485  				t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w)
   486  			}
   487  		}
   488  	}
   489  }
   490  
   491  // TestDecodeLengthOffset tests decoding an encoding of the form literal +
   492  // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
   493  func TestDecodeLengthOffset(t *testing.T) {
   494  	const (
   495  		prefix = "abcdefghijklmnopqr"
   496  		suffix = "ABCDEFGHIJKLMNOPQR"
   497  
   498  		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
   499  		// not present in either the input or the output. It is written to
   500  		// gotBuf to check that Decode does not write bytes past the end of
   501  		// gotBuf[:totalLen].
   502  		//
   503  		// The magic number 37 was chosen because it is prime. A more 'natural'
   504  		// number like 32 might lead to a false negative if, for example, a
   505  		// byte was incorrectly copied 4*8 bytes later.
   506  		notPresentBase = 0xa0
   507  		notPresentLen  = 37
   508  	)
   509  	var gotBuf, wantBuf, inputBuf [128]byte
   510  	for length := 1; length <= 18; length++ {
   511  		for offset := 1; offset <= 18; offset++ {
   512  		loop:
   513  			for suffixLen := 0; suffixLen <= 18; suffixLen++ {
   514  				totalLen := len(prefix) + length + suffixLen
   515  
   516  				inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
   517  				inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
   518  				inputLen++
   519  				inputLen += copy(inputBuf[inputLen:], prefix)
   520  				inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
   521  				inputBuf[inputLen+1] = byte(offset)
   522  				inputBuf[inputLen+2] = 0x00
   523  				inputLen += 3
   524  				if suffixLen > 0 {
   525  					inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
   526  					inputLen++
   527  					inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
   528  				}
   529  				input := inputBuf[:inputLen]
   530  
   531  				for i := range gotBuf {
   532  					gotBuf[i] = byte(notPresentBase + i%notPresentLen)
   533  				}
   534  				got, err := Decode(gotBuf[:], input)
   535  				if err != nil {
   536  					t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
   537  					continue
   538  				}
   539  
   540  				wantLen := 0
   541  				wantLen += copy(wantBuf[wantLen:], prefix)
   542  				for i := 0; i < length; i++ {
   543  					wantBuf[wantLen] = wantBuf[wantLen-offset]
   544  					wantLen++
   545  				}
   546  				wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
   547  				want := wantBuf[:wantLen]
   548  
   549  				for _, x := range input {
   550  					if notPresentBase <= x && x < notPresentBase+notPresentLen {
   551  						t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
   552  							length, offset, suffixLen, x, input)
   553  						continue loop
   554  					}
   555  				}
   556  				for i, x := range gotBuf {
   557  					if i < totalLen {
   558  						continue
   559  					}
   560  					if w := byte(notPresentBase + i%notPresentLen); x != w {
   561  						t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
   562  							"Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
   563  							length, offset, suffixLen, totalLen, i, x, w, gotBuf)
   564  						continue loop
   565  					}
   566  				}
   567  				for _, x := range want {
   568  					if notPresentBase <= x && x < notPresentBase+notPresentLen {
   569  						t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
   570  							length, offset, suffixLen, x, want)
   571  						continue loop
   572  					}
   573  				}
   574  
   575  				if !bytes.Equal(got, want) {
   576  					t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot   % x\nwant  % x",
   577  						length, offset, suffixLen, input, got, want)
   578  					continue
   579  				}
   580  			}
   581  		}
   582  	}
   583  }
   584  
   585  const (
   586  	goldenText       = "Mark.Twain-Tom.Sawyer.txt"
   587  	goldenCompressed = goldenText + ".rawsnappy"
   588  )
   589  
   590  func TestDecodeGoldenInput(t *testing.T) {
   591  	tDir := filepath.FromSlash(*testdataDir)
   592  	src, err := os.ReadFile(filepath.Join(tDir, goldenCompressed))
   593  	if err != nil {
   594  		t.Fatalf("ReadFile: %v", err)
   595  	}
   596  	got, err := Decode(nil, src)
   597  	if err != nil {
   598  		t.Fatalf("Decode: %v", err)
   599  	}
   600  	want, err := os.ReadFile(filepath.Join(tDir, goldenText))
   601  	if err != nil {
   602  		t.Fatalf("ReadFile: %v", err)
   603  	}
   604  	if err := cmp(got, want); err != nil {
   605  		t.Fatal(err)
   606  	}
   607  }
   608  
   609  // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
   610  // described in decode_amd64.s and its claim of a 10 byte overrun worst case.
   611  func TestSlowForwardCopyOverrun(t *testing.T) {
   612  	const base = 100
   613  
   614  	for length := 1; length < 18; length++ {
   615  		for offset := 1; offset < 18; offset++ {
   616  			highWaterMark := base
   617  			d := base
   618  			l := length
   619  			o := offset
   620  
   621  			// makeOffsetAtLeast8
   622  			for o < 8 {
   623  				if end := d + 8; highWaterMark < end {
   624  					highWaterMark = end
   625  				}
   626  				l -= o
   627  				d += o
   628  				o += o
   629  			}
   630  
   631  			// fixUpSlowForwardCopy
   632  			a := d
   633  			d += l
   634  
   635  			// finishSlowForwardCopy
   636  			for l > 0 {
   637  				if end := a + 8; highWaterMark < end {
   638  					highWaterMark = end
   639  				}
   640  				a += 8
   641  				l -= 8
   642  			}
   643  
   644  			dWant := base + length
   645  			overrun := highWaterMark - dWant
   646  			if d != dWant || overrun < 0 || 10 < overrun {
   647  				t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
   648  					length, offset, d, overrun, dWant)
   649  			}
   650  		}
   651  	}
   652  }
   653  
   654  // TestEncoderSkip will test skipping various sizes and block types.
   655  func TestEncoderSkip(t *testing.T) {
   656  	for ti, origLen := range []int{10 << 10, 256 << 10, 2 << 20, 8 << 20} {
   657  		if testing.Short() && ti > 1 {
   658  			break
   659  		}
   660  		t.Run(fmt.Sprint(origLen), func(t *testing.T) {
   661  			src := make([]byte, origLen)
   662  			rng := rand.New(rand.NewSource(1))
   663  			firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
   664  			bonus := secondHalf[len(secondHalf)-origLen/10:]
   665  			for i := range firstHalf {
   666  				// Incompressible.
   667  				firstHalf[i] = uint8(rng.Intn(256))
   668  			}
   669  			for i := range secondHalf {
   670  				// Easy to compress.
   671  				secondHalf[i] = uint8(i & 32)
   672  			}
   673  			for i := range bonus {
   674  				// Incompressible.
   675  				bonus[i] = uint8(rng.Intn(256))
   676  			}
   677  			var dst bytes.Buffer
   678  			enc := NewWriter(&dst, WriterBlockSize(64<<10))
   679  			_, err := io.Copy(enc, bytes.NewBuffer(src))
   680  			if err != nil {
   681  				t.Fatal(err)
   682  			}
   683  			err = enc.Close()
   684  			if err != nil {
   685  				t.Fatal(err)
   686  			}
   687  			compressed := dst.Bytes()
   688  			dec := NewReader(nil)
   689  			for i := 0; i < len(src); i += len(src)/20 - 17 {
   690  				t.Run(fmt.Sprint("skip-", i), func(t *testing.T) {
   691  					want := src[i:]
   692  					dec.Reset(bytes.NewBuffer(compressed))
   693  					// Read some of it first
   694  					read, err := io.CopyN(io.Discard, dec, int64(len(want)/10))
   695  					if err != nil {
   696  						t.Fatal(err)
   697  					}
   698  					// skip what we just read.
   699  					want = want[read:]
   700  					err = dec.Skip(int64(i))
   701  					if err != nil {
   702  						t.Fatal(err)
   703  					}
   704  					got, err := io.ReadAll(dec)
   705  					if err != nil {
   706  						t.Errorf("Skipping %d returned error: %v", i, err)
   707  						return
   708  					}
   709  					if !bytes.Equal(want, got) {
   710  						t.Log("got  len:", len(got))
   711  						t.Log("want len:", len(want))
   712  						t.Errorf("Skipping %d did not return correct data (content mismatch)", i)
   713  						return
   714  					}
   715  				})
   716  				if testing.Short() && i > 0 {
   717  					return
   718  				}
   719  			}
   720  		})
   721  	}
   722  }
   723  
   724  // TestEncodeNoiseThenRepeats encodes input for which the first half is very
   725  // incompressible and the second half is very compressible. The encoded form's
   726  // length should be closer to 50% of the original length than 100%.
   727  func TestEncodeNoiseThenRepeats(t *testing.T) {
   728  	for _, origLen := range []int{256 * 1024, 2048 * 1024} {
   729  		src := make([]byte, origLen)
   730  		rng := rand.New(rand.NewSource(1))
   731  		firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
   732  		for i := range firstHalf {
   733  			firstHalf[i] = uint8(rng.Intn(256))
   734  		}
   735  		for i := range secondHalf {
   736  			secondHalf[i] = uint8(i >> 8)
   737  		}
   738  		dst := Encode(nil, src)
   739  		if got, want := len(dst), origLen*3/4; got >= want {
   740  			t.Fatalf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
   741  		}
   742  		t.Log(len(dst))
   743  	}
   744  }
   745  
   746  func TestFramingFormat(t *testing.T) {
   747  	// src is comprised of alternating 1e5-sized sequences of random
   748  	// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
   749  	// because it is larger than maxBlockSize (64k).
   750  	src := make([]byte, 1e6)
   751  	rng := rand.New(rand.NewSource(1))
   752  	for i := 0; i < 10; i++ {
   753  		if i%2 == 0 {
   754  			for j := 0; j < 1e5; j++ {
   755  				src[1e5*i+j] = uint8(rng.Intn(256))
   756  			}
   757  		} else {
   758  			for j := 0; j < 1e5; j++ {
   759  				src[1e5*i+j] = uint8(i)
   760  			}
   761  		}
   762  	}
   763  
   764  	buf := new(bytes.Buffer)
   765  	bw := NewWriter(buf)
   766  	if _, err := bw.Write(src); err != nil {
   767  		t.Fatalf("Write: encoding: %v", err)
   768  	}
   769  	err := bw.Close()
   770  	if err != nil {
   771  		t.Fatal(err)
   772  	}
   773  	dst, err := io.ReadAll(NewReader(buf))
   774  	if err != nil {
   775  		t.Fatalf("ReadAll: decoding: %v", err)
   776  	}
   777  	if err := cmp(dst, src); err != nil {
   778  		t.Fatal(err)
   779  	}
   780  }
   781  
   782  func TestFramingFormatBetter(t *testing.T) {
   783  	// src is comprised of alternating 1e5-sized sequences of random
   784  	// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
   785  	// because it is larger than maxBlockSize (64k).
   786  	src := make([]byte, 1e6)
   787  	rng := rand.New(rand.NewSource(1))
   788  	for i := 0; i < 10; i++ {
   789  		if i%2 == 0 {
   790  			for j := 0; j < 1e5; j++ {
   791  				src[1e5*i+j] = uint8(rng.Intn(256))
   792  			}
   793  		} else {
   794  			for j := 0; j < 1e5; j++ {
   795  				src[1e5*i+j] = uint8(i)
   796  			}
   797  		}
   798  	}
   799  
   800  	buf := new(bytes.Buffer)
   801  	bw := NewWriter(buf, WriterBetterCompression())
   802  	if _, err := bw.Write(src); err != nil {
   803  		t.Fatalf("Write: encoding: %v", err)
   804  	}
   805  	err := bw.Close()
   806  	if err != nil {
   807  		t.Fatal(err)
   808  	}
   809  	dst, err := io.ReadAll(NewReader(buf))
   810  	if err != nil {
   811  		t.Fatalf("ReadAll: decoding: %v", err)
   812  	}
   813  	if err := cmp(dst, src); err != nil {
   814  		t.Fatal(err)
   815  	}
   816  }
   817  
   818  func TestEmitLiteral(t *testing.T) {
   819  	testCases := []struct {
   820  		length int
   821  		want   string
   822  	}{
   823  		{1, "\x00"},
   824  		{2, "\x04"},
   825  		{59, "\xe8"},
   826  		{60, "\xec"},
   827  		{61, "\xf0\x3c"},
   828  		{62, "\xf0\x3d"},
   829  		{254, "\xf0\xfd"},
   830  		{255, "\xf0\xfe"},
   831  		{256, "\xf0\xff"},
   832  		{257, "\xf4\x00\x01"},
   833  		{65534, "\xf4\xfd\xff"},
   834  		{65535, "\xf4\xfe\xff"},
   835  		{65536, "\xf4\xff\xff"},
   836  	}
   837  
   838  	dst := make([]byte, 70000)
   839  	nines := bytes.Repeat([]byte{0x99}, 65536)
   840  	for _, tc := range testCases {
   841  		lit := nines[:tc.length]
   842  		n := emitLiteral(dst, lit)
   843  		if !bytes.HasSuffix(dst[:n], lit) {
   844  			t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
   845  			continue
   846  		}
   847  		got := string(dst[:n-tc.length])
   848  		if got != tc.want {
   849  			t.Errorf("length=%d:\ngot  % x\nwant % x", tc.length, got, tc.want)
   850  			continue
   851  		}
   852  	}
   853  }
   854  
   855  func TestEmitCopy(t *testing.T) {
   856  	testCases := []struct {
   857  		offset int
   858  		length int
   859  		want   string
   860  	}{
   861  		{8, 04, "\x01\x08"},
   862  		{8, 11, "\x1d\x08"},
   863  		{8, 12, "\x2e\x08\x00"},
   864  		{8, 13, "\x32\x08\x00"},
   865  		{8, 59, "\xea\x08\x00"},
   866  		{8, 60, "\xee\x08\x00"},
   867  		{8, 61, "\xf2\x08\x00"},
   868  		{8, 62, "\xf6\x08\x00"},
   869  		{8, 63, "\xfa\x08\x00"},
   870  		{8, 64, "\xfe\x08\x00"},
   871  		{8, 65, "\x11\b\x15\x001"},
   872  		{8, 66, "\x11\b\x15\x002"},
   873  		{8, 67, "\x11\b\x15\x003"},
   874  		{8, 68, "\x11\b\x15\x004"},
   875  		{8, 69, "\x11\b\x15\x005"},
   876  		{8, 80, "\x11\b\x15\x00@"},
   877  		{8, 800, "\x11\b\x19\x00\x14\x02"},
   878  		{8, 800000, "\x11\b\x1d\x00\xf44\v"},
   879  
   880  		{256, 04, "\x21\x00"},
   881  		{256, 11, "\x3d\x00"},
   882  		{256, 12, "\x2e\x00\x01"},
   883  		{256, 13, "\x32\x00\x01"},
   884  		{256, 59, "\xea\x00\x01"},
   885  		{256, 60, "\xee\x00\x01"},
   886  		{256, 61, "\xf2\x00\x01"},
   887  		{256, 62, "\xf6\x00\x01"},
   888  		{256, 63, "\xfa\x00\x01"},
   889  		{256, 64, "\xfe\x00\x01"},
   890  		{256, 65, "1\x00\x15\x001"},
   891  		{256, 66, "1\x00\x15\x002"},
   892  		{256, 67, "1\x00\x15\x003"},
   893  		{256, 68, "1\x00\x15\x004"},
   894  		{256, 69, "1\x00\x15\x005"},
   895  		{256, 80, "1\x00\x15\x00@"},
   896  		{256, 800, "1\x00\x19\x00\x14\x02"},
   897  		{256, 80000, "1\x00\x1d\x00t8\x00"},
   898  
   899  		{2048, 04, "\x0e\x00\x08"},
   900  		{2048, 11, "\x2a\x00\x08"},
   901  		{2048, 12, "\x2e\x00\x08"},
   902  		{2048, 13, "\x32\x00\x08"},
   903  		{2048, 59, "\xea\x00\x08"},
   904  		{2048, 60, "\xee\x00\x08"},
   905  		{2048, 61, "\xf2\x00\x08"},
   906  		{2048, 62, "\xf6\x00\x08"},
   907  		{2048, 63, "\xfa\x00\x08"},
   908  		{2048, 64, "\xfe\x00\x08"},
   909  		{2048, 65, "\xee\x00\x08\x05\x00"},
   910  		{2048, 66, "\xee\x00\x08\x09\x00"},
   911  		{2048, 67, "\xee\x00\x08\x0d\x00"},
   912  		{2048, 68, "\xee\x00\x08\x11\x00"},
   913  		{2048, 69, "\xee\x00\x08\x15\x00\x01"},
   914  		{2048, 80, "\xee\x00\x08\x15\x00\x0c"},
   915  		{2048, 800, "\xee\x00\x08\x19\x00\xe0\x01"},
   916  		{2048, 80000, "\xee\x00\x08\x1d\x00\x40\x38\x00"},
   917  
   918  		{204800, 04, "\x0f\x00\x20\x03\x00"},
   919  		{204800, 65, "\xff\x00\x20\x03\x00\x03\x00\x20\x03\x00"},
   920  		{204800, 69, "\xff\x00\x20\x03\x00\x05\x00"},
   921  		{204800, 800, "\xff\x00\x20\x03\x00\x19\x00\xdc\x01"},
   922  		{204800, 80000, "\xff\x00\x20\x03\x00\x1d\x00\x3c\x38\x00"},
   923  	}
   924  
   925  	dst := make([]byte, 1024)
   926  	for _, tc := range testCases {
   927  		n := emitCopy(dst, tc.offset, tc.length)
   928  		got := string(dst[:n])
   929  		if got != tc.want {
   930  			t.Errorf("offset=%d, length=%d:\ngot  %q\nwant %q", tc.offset, tc.length, got, tc.want)
   931  		}
   932  	}
   933  }
   934  
   935  func TestNewWriter(t *testing.T) {
   936  	// Test all 32 possible sub-sequences of these 5 input slices.
   937  	//
   938  	// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
   939  	// capacity: 6 * maxBlockSize is 393,216.
   940  	inputs := [][]byte{
   941  		bytes.Repeat([]byte{'a'}, 40000),
   942  		bytes.Repeat([]byte{'b'}, 150000),
   943  		bytes.Repeat([]byte{'c'}, 60000),
   944  		bytes.Repeat([]byte{'d'}, 120000),
   945  		bytes.Repeat([]byte{'e'}, 30000),
   946  	}
   947  loop:
   948  	for i := 0; i < 1<<uint(len(inputs)); i++ {
   949  		var want []byte
   950  		buf := new(bytes.Buffer)
   951  		w := NewWriter(buf)
   952  		for j, input := range inputs {
   953  			if i&(1<<uint(j)) == 0 {
   954  				continue
   955  			}
   956  			if _, err := w.Write(input); err != nil {
   957  				t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
   958  				continue loop
   959  			}
   960  			want = append(want, input...)
   961  		}
   962  		if err := w.Close(); err != nil {
   963  			t.Errorf("i=%#02x: Close: %v", i, err)
   964  			continue
   965  		}
   966  		got, err := io.ReadAll(NewReader(buf))
   967  		if err != nil {
   968  			t.Errorf("i=%#02x: ReadAll: %v", i, err)
   969  			continue
   970  		}
   971  		if err := cmp(got, want); err != nil {
   972  			t.Errorf("i=%#02x: %v", i, err)
   973  			continue
   974  		}
   975  	}
   976  }
   977  
   978  func TestFlush(t *testing.T) {
   979  	buf := new(bytes.Buffer)
   980  	w := NewWriter(buf)
   981  	defer w.Close()
   982  	if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
   983  		t.Fatalf("Write: %v", err)
   984  	}
   985  	if n := buf.Len(); n != 0 {
   986  		t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
   987  	}
   988  	if err := w.Flush(); err != nil {
   989  		t.Fatalf("Flush: %v", err)
   990  	}
   991  	if n := buf.Len(); n == 0 {
   992  		t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
   993  	}
   994  }
   995  
   996  func TestReaderUncompressedDataOK(t *testing.T) {
   997  	r := NewReader(strings.NewReader(magicChunk +
   998  		"\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
   999  		"\x68\x10\xe6\xb6" + // Checksum.
  1000  		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  1001  	))
  1002  	g, err := io.ReadAll(r)
  1003  	if err != nil {
  1004  		t.Fatal(err)
  1005  	}
  1006  	if got, want := string(g), "abcd"; got != want {
  1007  		t.Fatalf("got %q, want %q", got, want)
  1008  	}
  1009  }
  1010  
  1011  func TestReaderUncompressedDataNoPayload(t *testing.T) {
  1012  	r := NewReader(strings.NewReader(magicChunk +
  1013  		"\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long.
  1014  		"", // No payload; corrupt input.
  1015  	))
  1016  	if _, err := io.ReadAll(r); err != ErrCorrupt {
  1017  		t.Fatalf("got %v, want %v", err, ErrCorrupt)
  1018  	}
  1019  }
  1020  
  1021  func TestReaderUncompressedDataTooLong(t *testing.T) {
  1022  	// The maximum legal chunk length... is 4MB + 4 bytes checksum.
  1023  	n := maxBlockSize + checksumSize
  1024  	n32 := uint32(n)
  1025  	r := NewReader(strings.NewReader(magicChunk +
  1026  		// Uncompressed chunk, n bytes long.
  1027  		string([]byte{chunkTypeUncompressedData, uint8(n32), uint8(n32 >> 8), uint8(n32 >> 16)}) +
  1028  		strings.Repeat("\x00", n),
  1029  	))
  1030  	// CRC is not set, so we should expect that error.
  1031  	if _, err := io.ReadAll(r); err != ErrCRC {
  1032  		t.Fatalf("got %v, want %v", err, ErrCRC)
  1033  	}
  1034  
  1035  	// test first invalid.
  1036  	n++
  1037  	n32 = uint32(n)
  1038  	r = NewReader(strings.NewReader(magicChunk +
  1039  		// Uncompressed chunk, n bytes long.
  1040  		string([]byte{chunkTypeUncompressedData, uint8(n32), uint8(n32 >> 8), uint8(n32 >> 16)}) +
  1041  		strings.Repeat("\x00", n),
  1042  	))
  1043  	if _, err := io.ReadAll(r); err != ErrCorrupt {
  1044  		t.Fatalf("got %v, want %v", err, ErrCorrupt)
  1045  	}
  1046  }
  1047  
  1048  func TestReaderReset(t *testing.T) {
  1049  	gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
  1050  	buf := new(bytes.Buffer)
  1051  	w := NewWriter(buf)
  1052  	_, err := w.Write(gold)
  1053  	if err != nil {
  1054  		t.Fatalf("Write: %v", err)
  1055  	}
  1056  	err = w.Close()
  1057  	if err != nil {
  1058  		t.Fatalf("Close: %v", err)
  1059  	}
  1060  
  1061  	encoded, invalid, partial := buf.String(), "invalid", "partial"
  1062  	r := NewReader(nil)
  1063  	for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
  1064  		if s == partial {
  1065  			r.Reset(strings.NewReader(encoded))
  1066  			if _, err := r.Read(make([]byte, 101)); err != nil {
  1067  				t.Errorf("#%d: %v", i, err)
  1068  				continue
  1069  			}
  1070  			continue
  1071  		}
  1072  		r.Reset(strings.NewReader(s))
  1073  		got, err := io.ReadAll(r)
  1074  		switch s {
  1075  		case encoded:
  1076  			if err != nil {
  1077  				t.Errorf("#%d: %v", i, err)
  1078  				continue
  1079  			}
  1080  			if err := cmp(got, gold); err != nil {
  1081  				t.Errorf("#%d: %v", i, err)
  1082  				continue
  1083  			}
  1084  		case invalid:
  1085  			if err == nil {
  1086  				t.Errorf("#%d: got nil error, want non-nil", i)
  1087  				continue
  1088  			}
  1089  		}
  1090  	}
  1091  }
  1092  
  1093  func TestWriterReset(t *testing.T) {
  1094  	gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
  1095  	const n = 20
  1096  	w := NewWriter(nil)
  1097  	defer w.Close()
  1098  
  1099  	var gots, wants [][]byte
  1100  	failed := false
  1101  	for i := 0; i <= n; i++ {
  1102  		buf := new(bytes.Buffer)
  1103  		w.Reset(buf)
  1104  		want := gold[:len(gold)*i/n]
  1105  		if _, err := w.Write(want); err != nil {
  1106  			t.Errorf("#%d: Write: %v", i, err)
  1107  			failed = true
  1108  			continue
  1109  		}
  1110  		if err := w.Flush(); err != nil {
  1111  			t.Errorf("#%d: Flush: %v", i, err)
  1112  			failed = true
  1113  			got, err := io.ReadAll(NewReader(buf))
  1114  			if err != nil {
  1115  				t.Errorf("#%d: ReadAll: %v", i, err)
  1116  				failed = true
  1117  				continue
  1118  			}
  1119  			gots = append(gots, got)
  1120  			wants = append(wants, want)
  1121  		}
  1122  		if failed {
  1123  			continue
  1124  		}
  1125  		for i := range gots {
  1126  			if err := cmp(gots[i], wants[i]); err != nil {
  1127  				t.Errorf("#%d: %v", i, err)
  1128  			}
  1129  		}
  1130  	}
  1131  }
  1132  
  1133  func TestWriterResetWithoutFlush(t *testing.T) {
  1134  	buf0 := new(bytes.Buffer)
  1135  	buf1 := new(bytes.Buffer)
  1136  	w := NewWriter(buf0)
  1137  	if _, err := w.Write([]byte("xxx")); err != nil {
  1138  		t.Fatalf("Write #0: %v", err)
  1139  	}
  1140  	// Note that we don't Flush the Writer before calling Reset.
  1141  	w.Reset(buf1)
  1142  	if _, err := w.Write([]byte("yyy")); err != nil {
  1143  		t.Fatalf("Write #1: %v", err)
  1144  	}
  1145  	if err := w.Flush(); err != nil {
  1146  		t.Fatalf("Flush: %v", err)
  1147  	}
  1148  	got, err := io.ReadAll(NewReader(buf1))
  1149  	if err != nil {
  1150  		t.Fatalf("ReadAll: %v", err)
  1151  	}
  1152  	if err := cmp(got, []byte("yyy")); err != nil {
  1153  		t.Fatal(err)
  1154  	}
  1155  }
  1156  
  1157  type writeCounter int
  1158  
  1159  func (c *writeCounter) Write(p []byte) (int, error) {
  1160  	*c++
  1161  	return len(p), nil
  1162  }
  1163  
  1164  // TestNumUnderlyingWrites tests that each Writer flush only makes one or two
  1165  // Write calls on its underlying io.Writer, depending on whether or not the
  1166  // flushed buffer was compressible.
  1167  func TestNumUnderlyingWrites(t *testing.T) {
  1168  	testCases := []struct {
  1169  		input []byte
  1170  		want  int
  1171  	}{
  1172  		// Magic header + block
  1173  		{bytes.Repeat([]byte{'x'}, 100), 2},
  1174  		// One block each:
  1175  		{bytes.Repeat([]byte{'y'}, 100), 1},
  1176  		{[]byte("ABCDEFGHIJKLMNOPQRST"), 1},
  1177  	}
  1178  
  1179  	// If we are doing sync writes, we write uncompressed as two writes.
  1180  	if runtime.GOMAXPROCS(0) == 1 {
  1181  		testCases[2].want++
  1182  	}
  1183  	var c writeCounter
  1184  	w := NewWriter(&c)
  1185  	defer w.Close()
  1186  	for i, tc := range testCases {
  1187  		c = 0
  1188  		if _, err := w.Write(tc.input); err != nil {
  1189  			t.Errorf("#%d: Write: %v", i, err)
  1190  			continue
  1191  		}
  1192  		if err := w.Flush(); err != nil {
  1193  			t.Errorf("#%d: Flush: %v", i, err)
  1194  			continue
  1195  		}
  1196  		if int(c) != tc.want {
  1197  			t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
  1198  			continue
  1199  		}
  1200  	}
  1201  }
  1202  
  1203  func testWriterRoundtrip(t *testing.T, src []byte, opts ...WriterOption) {
  1204  	var buf bytes.Buffer
  1205  	enc := NewWriter(&buf, opts...)
  1206  	n, err := enc.Write(src)
  1207  	if err != nil {
  1208  		t.Error(err)
  1209  		return
  1210  	}
  1211  	if n != len(src) {
  1212  		t.Error(io.ErrShortWrite)
  1213  		return
  1214  	}
  1215  	err = enc.Flush()
  1216  	if err != nil {
  1217  		t.Error(err)
  1218  		return
  1219  	}
  1220  	// Extra flush and close should be noops.
  1221  	err = enc.Flush()
  1222  	if err != nil {
  1223  		t.Error(err)
  1224  		return
  1225  	}
  1226  	err = enc.Close()
  1227  	if err != nil {
  1228  		t.Error(err)
  1229  		return
  1230  	}
  1231  
  1232  	t.Logf("encoded to %d -> %d bytes", len(src), buf.Len())
  1233  	dec := NewReader(&buf)
  1234  	decoded, err := io.ReadAll(dec)
  1235  	if err != nil {
  1236  		t.Error(err)
  1237  		return
  1238  	}
  1239  	if len(decoded) != len(src) {
  1240  		t.Error("decoded len:", len(decoded), "!=", len(src))
  1241  		return
  1242  	}
  1243  	err = cmp(src, decoded)
  1244  	if err != nil {
  1245  		t.Error(err)
  1246  	}
  1247  }
  1248  
  1249  func testBlockRoundtrip(t *testing.T, src []byte) {
  1250  	dst := Encode(nil, src)
  1251  	t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
  1252  	decoded, err := Decode(nil, dst)
  1253  	if err != nil {
  1254  		t.Error(err)
  1255  		return
  1256  	}
  1257  	if len(decoded) != len(src) {
  1258  		t.Error("decoded len:", len(decoded), "!=", len(src))
  1259  		return
  1260  	}
  1261  	err = cmp(decoded, src)
  1262  	if err != nil {
  1263  		t.Error(err)
  1264  	}
  1265  }
  1266  
  1267  func testBetterBlockRoundtrip(t *testing.T, src []byte) {
  1268  	dst := EncodeBetter(nil, src)
  1269  	t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
  1270  	decoded, err := Decode(nil, dst)
  1271  	if err != nil {
  1272  		t.Error(err)
  1273  		return
  1274  	}
  1275  	if len(decoded) != len(src) {
  1276  		t.Error("decoded len:", len(decoded), "!=", len(src))
  1277  		return
  1278  	}
  1279  	err = cmp(src, decoded)
  1280  	if err != nil {
  1281  		t.Error(err)
  1282  	}
  1283  }
  1284  
  1285  func testBestBlockRoundtrip(t *testing.T, src []byte) {
  1286  	dst := EncodeBest(nil, src)
  1287  	t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
  1288  	decoded, err := Decode(nil, dst)
  1289  	if err != nil {
  1290  		t.Error(err)
  1291  		return
  1292  	}
  1293  	if len(decoded) != len(src) {
  1294  		t.Error("decoded len:", len(decoded), "!=", len(src))
  1295  		return
  1296  	}
  1297  	err = cmp(src, decoded)
  1298  	if err != nil {
  1299  		t.Error(err)
  1300  	}
  1301  }
  1302  
  1303  func testSnappyBlockRoundtrip(t *testing.T, src []byte) {
  1304  	// Write with s2, decode with snapref.
  1305  	t.Run("regular", func(t *testing.T) {
  1306  		dst := EncodeSnappy(nil, src)
  1307  		t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
  1308  		decoded, err := snapref.Decode(nil, dst)
  1309  		if err != nil {
  1310  			t.Error(err)
  1311  			return
  1312  		}
  1313  		if len(decoded) != len(src) {
  1314  			t.Error("decoded len:", len(decoded), "!=", len(src))
  1315  			return
  1316  		}
  1317  		err = cmp(src, decoded)
  1318  		if err != nil {
  1319  			t.Error(err)
  1320  		}
  1321  	})
  1322  	t.Run("better", func(t *testing.T) {
  1323  		dst := EncodeSnappyBetter(nil, src)
  1324  		t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
  1325  		decoded, err := snapref.Decode(nil, dst)
  1326  		if err != nil {
  1327  			t.Error(err)
  1328  			return
  1329  		}
  1330  		if len(decoded) != len(src) {
  1331  			t.Error("decoded len:", len(decoded), "!=", len(src))
  1332  			return
  1333  		}
  1334  		err = cmp(src, decoded)
  1335  		if err != nil {
  1336  			t.Error(err)
  1337  		}
  1338  	})
  1339  	t.Run("best", func(t *testing.T) {
  1340  		dst := EncodeSnappyBest(nil, src)
  1341  		t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
  1342  		decoded, err := snapref.Decode(nil, dst)
  1343  		if err != nil {
  1344  			t.Error(err)
  1345  			return
  1346  		}
  1347  		if len(decoded) != len(src) {
  1348  			t.Error("decoded len:", len(decoded), "!=", len(src))
  1349  			return
  1350  		}
  1351  		err = cmp(src, decoded)
  1352  		if err != nil {
  1353  			t.Error(err)
  1354  		}
  1355  	})
  1356  }
  1357  
  1358  func testSnappyDecode(t *testing.T, src []byte) {
  1359  	var buf bytes.Buffer
  1360  	enc := snapref.NewBufferedWriter(&buf)
  1361  	n, err := enc.Write(src)
  1362  	if err != nil {
  1363  		t.Error(err)
  1364  		return
  1365  	}
  1366  	if n != len(src) {
  1367  		t.Error(io.ErrShortWrite)
  1368  		return
  1369  	}
  1370  	enc.Close()
  1371  	t.Logf("encoded to %d -> %d bytes", len(src), buf.Len())
  1372  	dec := NewReader(&buf)
  1373  	decoded, err := io.ReadAll(dec)
  1374  	if err != nil {
  1375  		t.Error(err)
  1376  		return
  1377  	}
  1378  	if len(decoded) != len(src) {
  1379  		t.Error("decoded len:", len(decoded), "!=", len(src))
  1380  		return
  1381  	}
  1382  	err = cmp(src, decoded)
  1383  	if err != nil {
  1384  		t.Error(err)
  1385  	}
  1386  }
  1387  
  1388  func benchDecode(b *testing.B, src []byte) {
  1389  	b.Run("default", func(b *testing.B) {
  1390  		encoded := Encode(nil, src)
  1391  		b.SetBytes(int64(len(src)))
  1392  		b.ReportAllocs()
  1393  		b.ResetTimer()
  1394  		for i := 0; i < b.N; i++ {
  1395  			_, err := Decode(src[:0], encoded)
  1396  			if err != nil {
  1397  				b.Fatal(err)
  1398  			}
  1399  		}
  1400  		b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
  1401  	})
  1402  	b.Run("better", func(b *testing.B) {
  1403  		encoded := EncodeBetter(nil, src)
  1404  		b.SetBytes(int64(len(src)))
  1405  		b.ReportAllocs()
  1406  		b.ResetTimer()
  1407  		for i := 0; i < b.N; i++ {
  1408  			_, err := Decode(src[:0], encoded)
  1409  			if err != nil {
  1410  				b.Fatal(err)
  1411  			}
  1412  		}
  1413  		b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
  1414  	})
  1415  	b.Run("best", func(b *testing.B) {
  1416  		encoded := EncodeBest(nil, src)
  1417  		b.SetBytes(int64(len(src)))
  1418  		b.ReportAllocs()
  1419  		b.ResetTimer()
  1420  		for i := 0; i < b.N; i++ {
  1421  			_, err := Decode(src[:0], encoded)
  1422  			if err != nil {
  1423  				b.Fatal(err)
  1424  			}
  1425  		}
  1426  		b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
  1427  	})
  1428  	b.Run("snappy-input", func(b *testing.B) {
  1429  		encoded := snapref.Encode(nil, src)
  1430  		b.SetBytes(int64(len(src)))
  1431  		b.ReportAllocs()
  1432  		b.ResetTimer()
  1433  		for i := 0; i < b.N; i++ {
  1434  			_, err := Decode(src[:0], encoded)
  1435  			if err != nil {
  1436  				b.Fatal(err)
  1437  			}
  1438  		}
  1439  		b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
  1440  	})
  1441  }
  1442  
  1443  func benchEncode(b *testing.B, src []byte) {
  1444  	// Bandwidth is in amount of uncompressed data.
  1445  	dst := make([]byte, snapref.MaxEncodedLen(len(src)))
  1446  	b.ResetTimer()
  1447  	b.Run("default", func(b *testing.B) {
  1448  		b.SetBytes(int64(len(src)))
  1449  		b.ReportAllocs()
  1450  		for i := 0; i < b.N; i++ {
  1451  			Encode(dst, src)
  1452  		}
  1453  		b.ReportMetric(100*float64(len(Encode(dst, src)))/float64(len(src)), "pct")
  1454  	})
  1455  	b.Run("better", func(b *testing.B) {
  1456  		b.SetBytes(int64(len(src)))
  1457  		b.ReportAllocs()
  1458  		for i := 0; i < b.N; i++ {
  1459  			EncodeBetter(dst, src)
  1460  		}
  1461  		b.ReportMetric(100*float64(len(EncodeBetter(dst, src)))/float64(len(src)), "pct")
  1462  	})
  1463  	b.Run("best", func(b *testing.B) {
  1464  		b.SetBytes(int64(len(src)))
  1465  		b.ReportAllocs()
  1466  		for i := 0; i < b.N; i++ {
  1467  			EncodeBest(dst, src)
  1468  		}
  1469  		b.ReportMetric(100*float64(len(EncodeBest(dst, src)))/float64(len(src)), "pct")
  1470  	})
  1471  	b.Run("snappy-default", func(b *testing.B) {
  1472  		b.SetBytes(int64(len(src)))
  1473  		b.ReportAllocs()
  1474  		for i := 0; i < b.N; i++ {
  1475  			EncodeSnappy(dst, src)
  1476  		}
  1477  		b.ReportMetric(100*float64(len(EncodeSnappy(dst, src)))/float64(len(src)), "pct")
  1478  	})
  1479  	b.Run("snappy-better", func(b *testing.B) {
  1480  		b.SetBytes(int64(len(src)))
  1481  		b.ReportAllocs()
  1482  		for i := 0; i < b.N; i++ {
  1483  			EncodeSnappyBetter(dst, src)
  1484  		}
  1485  		b.ReportMetric(100*float64(len(EncodeSnappyBetter(dst, src)))/float64(len(src)), "pct")
  1486  	})
  1487  	b.Run("snappy-best", func(b *testing.B) {
  1488  		b.SetBytes(int64(len(src)))
  1489  		b.ReportAllocs()
  1490  		for i := 0; i < b.N; i++ {
  1491  			EncodeSnappyBest(dst, src)
  1492  		}
  1493  		b.ReportMetric(100*float64(len(EncodeSnappyBest(dst, src)))/float64(len(src)), "pct")
  1494  	})
  1495  	b.Run("snappy-ref-noasm", func(b *testing.B) {
  1496  		b.SetBytes(int64(len(src)))
  1497  		b.ReportAllocs()
  1498  		for i := 0; i < b.N; i++ {
  1499  			snapref.Encode(dst, src)
  1500  		}
  1501  		b.ReportMetric(100*float64(len(snapref.Encode(dst, src)))/float64(len(src)), "pct")
  1502  	})
  1503  }
  1504  
  1505  func testOrBenchmark(b testing.TB) string {
  1506  	if _, ok := b.(*testing.B); ok {
  1507  		return "benchmark"
  1508  	}
  1509  	return "test"
  1510  }
  1511  
  1512  func readFile(b testing.TB, filename string) []byte {
  1513  	src, err := os.ReadFile(filename)
  1514  	if err != nil {
  1515  		b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
  1516  	}
  1517  	if len(src) == 0 {
  1518  		b.Fatalf("%s has zero length", filename)
  1519  	}
  1520  	return src
  1521  }
  1522  
  1523  // expand returns a slice of length n containing mutated copies of src.
  1524  func expand(src []byte, n int) []byte {
  1525  	dst := make([]byte, n)
  1526  	cnt := uint8(0)
  1527  	for x := dst; len(x) > 0; cnt++ {
  1528  		idx := copy(x, src)
  1529  		for i := range x {
  1530  			if i >= len(src) {
  1531  				break
  1532  			}
  1533  			x[i] = src[i] ^ cnt
  1534  		}
  1535  		x = x[idx:]
  1536  	}
  1537  	return dst
  1538  }
  1539  
  1540  func benchTwain(b *testing.B, n int, decode bool) {
  1541  	data := expand(readFile(b, "../testdata/Mark.Twain-Tom.Sawyer.txt"), n)
  1542  	if decode {
  1543  		benchDecode(b, data)
  1544  	} else {
  1545  		benchEncode(b, data)
  1546  	}
  1547  }
  1548  
  1549  func BenchmarkTwainDecode1e1(b *testing.B) { benchTwain(b, 1e1, true) }
  1550  func BenchmarkTwainDecode1e2(b *testing.B) { benchTwain(b, 1e2, true) }
  1551  func BenchmarkTwainDecode1e3(b *testing.B) { benchTwain(b, 1e3, true) }
  1552  func BenchmarkTwainDecode1e4(b *testing.B) { benchTwain(b, 1e4, true) }
  1553  func BenchmarkTwainDecode1e5(b *testing.B) { benchTwain(b, 1e5, true) }
  1554  func BenchmarkTwainDecode1e6(b *testing.B) { benchTwain(b, 1e6, true) }
  1555  func BenchmarkTwainDecode1e7(b *testing.B) { benchTwain(b, 1e7, true) }
  1556  func BenchmarkTwainEncode1e1(b *testing.B) { benchTwain(b, 1e1, false) }
  1557  func BenchmarkTwainEncode1e2(b *testing.B) { benchTwain(b, 1e2, false) }
  1558  func BenchmarkTwainEncode1e3(b *testing.B) { benchTwain(b, 1e3, false) }
  1559  func BenchmarkTwainEncode1e4(b *testing.B) { benchTwain(b, 1e4, false) }
  1560  func BenchmarkTwainEncode1e5(b *testing.B) { benchTwain(b, 1e5, false) }
  1561  func BenchmarkTwainEncode1e6(b *testing.B) { benchTwain(b, 1e6, false) }
  1562  func BenchmarkTwainEncode1e7(b *testing.B) { benchTwain(b, 1e7, false) }
  1563  
  1564  func BenchmarkRandomEncodeBlock1MB(b *testing.B) {
  1565  	rng := rand.New(rand.NewSource(1))
  1566  	data := make([]byte, 1<<20)
  1567  	for i := range data {
  1568  		data[i] = uint8(rng.Intn(256))
  1569  	}
  1570  	benchEncode(b, data)
  1571  }
  1572  
  1573  func BenchmarkRandomEncodeBetterBlock16MB(b *testing.B) {
  1574  	rng := rand.New(rand.NewSource(1))
  1575  	data := make([]byte, 16<<20)
  1576  	for i := range data {
  1577  		data[i] = uint8(rng.Intn(256))
  1578  	}
  1579  	benchEncode(b, data)
  1580  }
  1581  
  1582  // testFiles' values are copied directly from
  1583  // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
  1584  // The label field is unused in snappy-go.
  1585  var testFiles = []struct {
  1586  	label     string
  1587  	filename  string
  1588  	sizeLimit int
  1589  }{
  1590  	{"html", "html", 0},
  1591  	{"urls", "urls.10K", 0},
  1592  	{"jpg", "fireworks.jpeg", 0},
  1593  	{"jpg_200b", "fireworks.jpeg", 200},
  1594  	{"pdf", "paper-100k.pdf", 0},
  1595  	{"html4", "html_x_4", 0},
  1596  	{"txt1", "alice29.txt", 0},
  1597  	{"txt2", "asyoulik.txt", 0},
  1598  	{"txt3", "lcet10.txt", 0},
  1599  	{"txt4", "plrabn12.txt", 0},
  1600  	{"pb", "geo.protodata", 0},
  1601  	{"gaviota", "kppkn.gtb", 0},
  1602  	{"txt1_128b", "alice29.txt", 128},
  1603  	{"txt1_1000b", "alice29.txt", 1000},
  1604  	{"txt1_10000b", "alice29.txt", 10000},
  1605  	{"txt1_20000b", "alice29.txt", 20000},
  1606  }
  1607  
  1608  const (
  1609  	// The benchmark data files are at this canonical URL.
  1610  	benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
  1611  )
  1612  
  1613  func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
  1614  	bDir := filepath.FromSlash(*benchdataDir)
  1615  	filename := filepath.Join(bDir, basename)
  1616  	if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
  1617  		return nil
  1618  	}
  1619  
  1620  	if !*download {
  1621  		b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
  1622  	}
  1623  	// Download the official snappy C++ implementation reference test data
  1624  	// files for benchmarking.
  1625  	if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) {
  1626  		return fmt.Errorf("failed to create %s: %s", bDir, err)
  1627  	}
  1628  
  1629  	f, err := os.Create(filename)
  1630  	if err != nil {
  1631  		return fmt.Errorf("failed to create %s: %s", filename, err)
  1632  	}
  1633  	defer f.Close()
  1634  	defer func() {
  1635  		if errRet != nil {
  1636  			os.Remove(filename)
  1637  		}
  1638  	}()
  1639  	url := benchURL + basename
  1640  	resp, err := http.Get(url)
  1641  	if err != nil {
  1642  		return fmt.Errorf("failed to download %s: %s", url, err)
  1643  	}
  1644  	defer resp.Body.Close()
  1645  	if s := resp.StatusCode; s != http.StatusOK {
  1646  		return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
  1647  	}
  1648  	_, err = io.Copy(f, resp.Body)
  1649  	if err != nil {
  1650  		return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
  1651  	}
  1652  	return nil
  1653  }
  1654  
  1655  func TestEstimateBlockSize(t *testing.T) {
  1656  	var input []byte
  1657  	for i := 0; i < 100; i++ {
  1658  		EstimateBlockSize(input)
  1659  		input = append(input, 0)
  1660  	}
  1661  }
  1662  
  1663  func benchFile(b *testing.B, i int, decode bool) {
  1664  	if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
  1665  		b.Fatalf("failed to download testdata: %s", err)
  1666  	}
  1667  	bDir := filepath.FromSlash(*benchdataDir)
  1668  	data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
  1669  	if !decode {
  1670  		b.Run("est-size", func(b *testing.B) {
  1671  			if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1672  				data = data[:n]
  1673  			}
  1674  			b.SetBytes(int64(len(data)))
  1675  			b.ReportAllocs()
  1676  			b.ResetTimer()
  1677  			b.RunParallel(func(pb *testing.PB) {
  1678  				for pb.Next() {
  1679  					_ = EstimateBlockSize(data)
  1680  				}
  1681  			})
  1682  			sz := float64(EstimateBlockSize(data))
  1683  			if sz > 0 {
  1684  				b.ReportMetric(100*sz/float64(len(data)), "pct")
  1685  				b.ReportMetric(sz, "B")
  1686  			}
  1687  		})
  1688  	}
  1689  	b.Run("block", func(b *testing.B) {
  1690  		if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1691  			data = data[:n]
  1692  		}
  1693  		if decode {
  1694  			b.SetBytes(int64(len(data)))
  1695  			b.ReportAllocs()
  1696  			b.ResetTimer()
  1697  			b.RunParallel(func(pb *testing.PB) {
  1698  				encoded := Encode(nil, data)
  1699  				tmp := make([]byte, len(data))
  1700  				for pb.Next() {
  1701  					var err error
  1702  					tmp, err = Decode(tmp, encoded)
  1703  					if err != nil {
  1704  						b.Fatal(err)
  1705  					}
  1706  				}
  1707  			})
  1708  		} else {
  1709  			b.SetBytes(int64(len(data)))
  1710  			b.ReportAllocs()
  1711  			b.ResetTimer()
  1712  			b.RunParallel(func(pb *testing.PB) {
  1713  				dst := make([]byte, MaxEncodedLen(len(data)))
  1714  				tmp := make([]byte, len(data))
  1715  				for pb.Next() {
  1716  					res := Encode(dst, data)
  1717  					if len(res) == 0 {
  1718  						panic(0)
  1719  					}
  1720  					if false {
  1721  						tmp, _ = Decode(tmp, res)
  1722  						if !bytes.Equal(tmp, data) {
  1723  							panic("wrong")
  1724  						}
  1725  					}
  1726  				}
  1727  			})
  1728  		}
  1729  		b.ReportMetric(100*float64(len(Encode(nil, data)))/float64(len(data)), "pct")
  1730  		b.ReportMetric(float64(len(Encode(nil, data))), "B")
  1731  	})
  1732  	b.Run("block-better", func(b *testing.B) {
  1733  		if decode {
  1734  			b.SetBytes(int64(len(data)))
  1735  			b.ReportAllocs()
  1736  			b.ResetTimer()
  1737  			b.RunParallel(func(pb *testing.PB) {
  1738  				encoded := EncodeBetter(nil, data)
  1739  				tmp := make([]byte, len(data))
  1740  				for pb.Next() {
  1741  					var err error
  1742  					tmp, err = Decode(tmp, encoded)
  1743  					if err != nil {
  1744  						b.Fatal(err)
  1745  					}
  1746  				}
  1747  			})
  1748  		} else {
  1749  			b.SetBytes(int64(len(data)))
  1750  			b.ReportAllocs()
  1751  			b.ResetTimer()
  1752  			b.RunParallel(func(pb *testing.PB) {
  1753  				dst := make([]byte, MaxEncodedLen(len(data)))
  1754  				tmp := make([]byte, len(data))
  1755  				for pb.Next() {
  1756  					res := EncodeBetter(dst, data)
  1757  					if len(res) == 0 {
  1758  						panic(0)
  1759  					}
  1760  					if false {
  1761  						tmp, _ = Decode(tmp, res)
  1762  						if !bytes.Equal(tmp, data) {
  1763  							panic("wrong")
  1764  						}
  1765  					}
  1766  				}
  1767  			})
  1768  		}
  1769  		b.ReportMetric(100*float64(len(EncodeBetter(nil, data)))/float64(len(data)), "pct")
  1770  		b.ReportMetric(float64(len(EncodeBetter(nil, data))), "B")
  1771  	})
  1772  
  1773  	b.Run("block-best", func(b *testing.B) {
  1774  		if decode {
  1775  			b.SetBytes(int64(len(data)))
  1776  			b.ReportAllocs()
  1777  			b.ResetTimer()
  1778  			b.RunParallel(func(pb *testing.PB) {
  1779  				encoded := EncodeBest(nil, data)
  1780  				tmp := make([]byte, len(data))
  1781  				for pb.Next() {
  1782  					var err error
  1783  					tmp, err = Decode(tmp, encoded)
  1784  					if err != nil {
  1785  						b.Fatal(err)
  1786  					}
  1787  				}
  1788  			})
  1789  			b.ReportMetric(100*float64(len(EncodeBest(nil, data)))/float64(len(data)), "pct")
  1790  		} else {
  1791  			b.SetBytes(int64(len(data)))
  1792  			b.ReportAllocs()
  1793  			b.ResetTimer()
  1794  			b.RunParallel(func(pb *testing.PB) {
  1795  				dst := make([]byte, MaxEncodedLen(len(data)))
  1796  				tmp := make([]byte, len(data))
  1797  				for pb.Next() {
  1798  					res := EncodeBest(dst, data)
  1799  					if len(res) == 0 {
  1800  						panic(0)
  1801  					}
  1802  					if false {
  1803  						tmp, _ = Decode(tmp, res)
  1804  						if !bytes.Equal(tmp, data) {
  1805  							panic("wrong")
  1806  						}
  1807  					}
  1808  				}
  1809  			})
  1810  			b.ReportMetric(100*float64(len(EncodeBest(nil, data)))/float64(len(data)), "pct")
  1811  		}
  1812  		b.ReportMetric(float64(len(EncodeBest(nil, data))), "B")
  1813  	})
  1814  }
  1815  
  1816  func benchFileSnappy(b *testing.B, i int, decode bool) {
  1817  	if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
  1818  		b.Fatalf("failed to download testdata: %s", err)
  1819  	}
  1820  	bDir := filepath.FromSlash(*benchdataDir)
  1821  	data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
  1822  	if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1823  		data = data[:n]
  1824  	}
  1825  
  1826  	b.Run("s2-snappy", func(b *testing.B) {
  1827  		if decode {
  1828  			b.SetBytes(int64(len(data)))
  1829  			b.ResetTimer()
  1830  			b.ReportAllocs()
  1831  			b.RunParallel(func(pb *testing.PB) {
  1832  				encoded := EncodeSnappy(nil, data)
  1833  				tmp := make([]byte, len(data))
  1834  				for pb.Next() {
  1835  					var err error
  1836  					tmp, err = Decode(tmp, encoded)
  1837  					if err != nil {
  1838  						b.Fatal(err)
  1839  					}
  1840  				}
  1841  			})
  1842  			b.ReportMetric(100*float64(len(EncodeSnappy(nil, data)))/float64(len(data)), "pct")
  1843  		} else {
  1844  			b.SetBytes(int64(len(data)))
  1845  			b.ReportAllocs()
  1846  			b.ResetTimer()
  1847  
  1848  			b.RunParallel(func(pb *testing.PB) {
  1849  				dst := make([]byte, MaxEncodedLen(len(data)))
  1850  				for pb.Next() {
  1851  					res := EncodeSnappy(dst, data)
  1852  					if len(res) == 0 {
  1853  						panic(0)
  1854  					}
  1855  				}
  1856  			})
  1857  			b.ReportMetric(100*float64(len(EncodeSnappy(nil, data)))/float64(len(data)), "pct")
  1858  		}
  1859  		b.ReportMetric(float64(len(EncodeSnappy(nil, data))), "B")
  1860  	})
  1861  
  1862  	b.Run("s2-snappy-better", func(b *testing.B) {
  1863  		if decode {
  1864  			b.SetBytes(int64(len(data)))
  1865  			b.ResetTimer()
  1866  			b.RunParallel(func(pb *testing.PB) {
  1867  				encoded := EncodeSnappyBetter(nil, data)
  1868  				tmp := make([]byte, len(data))
  1869  				b.ReportAllocs()
  1870  				b.ResetTimer()
  1871  
  1872  				for pb.Next() {
  1873  					var err error
  1874  					tmp, err = Decode(tmp, encoded)
  1875  					if err != nil {
  1876  						b.Fatal(err)
  1877  					}
  1878  				}
  1879  			})
  1880  			b.ReportMetric(100*float64(len(EncodeSnappyBetter(nil, data)))/float64(len(data)), "pct")
  1881  		} else {
  1882  			b.SetBytes(int64(len(data)))
  1883  			b.ReportAllocs()
  1884  			b.ResetTimer()
  1885  			b.RunParallel(func(pb *testing.PB) {
  1886  				dst := make([]byte, MaxEncodedLen(len(data)))
  1887  				tmp := make([]byte, len(data))
  1888  				for pb.Next() {
  1889  					res := EncodeSnappyBetter(dst, data)
  1890  					if len(res) == 0 {
  1891  						panic(0)
  1892  					}
  1893  					if false {
  1894  						tmp, _ = Decode(tmp, res)
  1895  						if !bytes.Equal(tmp, data) {
  1896  							panic("wrong")
  1897  						}
  1898  					}
  1899  				}
  1900  			})
  1901  			b.ReportMetric(100*float64(len(EncodeSnappyBetter(nil, data)))/float64(len(data)), "pct")
  1902  		}
  1903  		b.ReportMetric(float64(len(EncodeSnappyBetter(nil, data))), "B")
  1904  	})
  1905  
  1906  	b.Run("s2-snappy-best", func(b *testing.B) {
  1907  		if decode {
  1908  			b.SetBytes(int64(len(data)))
  1909  			b.ReportAllocs()
  1910  			b.ResetTimer()
  1911  			b.RunParallel(func(pb *testing.PB) {
  1912  				encoded := EncodeSnappyBest(nil, data)
  1913  				tmp := make([]byte, len(data))
  1914  				for pb.Next() {
  1915  					var err error
  1916  					tmp, err = Decode(tmp, encoded)
  1917  					if err != nil {
  1918  						b.Fatal(err)
  1919  					}
  1920  				}
  1921  			})
  1922  			b.ReportMetric(100*float64(len(EncodeSnappyBest(nil, data)))/float64(len(data)), "pct")
  1923  		} else {
  1924  			b.SetBytes(int64(len(data)))
  1925  			b.ReportAllocs()
  1926  			b.ResetTimer()
  1927  			b.RunParallel(func(pb *testing.PB) {
  1928  				dst := make([]byte, MaxEncodedLen(len(data)))
  1929  				tmp := make([]byte, len(data))
  1930  				for pb.Next() {
  1931  					res := EncodeSnappyBest(dst, data)
  1932  					if len(res) == 0 {
  1933  						panic(0)
  1934  					}
  1935  					if false {
  1936  						tmp, _ = snapref.Decode(tmp, res)
  1937  						if !bytes.Equal(tmp, data) {
  1938  							panic("wrong")
  1939  						}
  1940  					}
  1941  				}
  1942  			})
  1943  			b.ReportMetric(100*float64(len(EncodeSnappyBest(nil, data)))/float64(len(data)), "pct")
  1944  		}
  1945  		b.ReportMetric(float64(len(EncodeSnappyBest(nil, data))), "B")
  1946  	})
  1947  	b.Run("snappy-noasm", func(b *testing.B) {
  1948  		if decode {
  1949  			b.SetBytes(int64(len(data)))
  1950  			b.ReportAllocs()
  1951  			b.ResetTimer()
  1952  			b.RunParallel(func(pb *testing.PB) {
  1953  				encoded := snapref.Encode(nil, data)
  1954  				tmp := make([]byte, len(data))
  1955  				for pb.Next() {
  1956  					var err error
  1957  					tmp, err = snapref.Decode(tmp, encoded)
  1958  					if err != nil {
  1959  						b.Fatal(err)
  1960  					}
  1961  				}
  1962  			})
  1963  			b.ReportMetric(100*float64(len(snapref.Encode(nil, data)))/float64(len(data)), "pct")
  1964  		} else {
  1965  			b.SetBytes(int64(len(data)))
  1966  			b.ReportAllocs()
  1967  			b.ResetTimer()
  1968  			b.RunParallel(func(pb *testing.PB) {
  1969  				dst := make([]byte, snapref.MaxEncodedLen(len(data)))
  1970  				tmp := make([]byte, len(data))
  1971  				for pb.Next() {
  1972  					res := snapref.Encode(dst, data)
  1973  					if len(res) == 0 {
  1974  						panic(0)
  1975  					}
  1976  					if false {
  1977  						tmp, _ = snapref.Decode(tmp, res)
  1978  						if !bytes.Equal(tmp, data) {
  1979  							panic("wrong")
  1980  						}
  1981  					}
  1982  				}
  1983  			})
  1984  			b.ReportMetric(100*float64(len(snapref.Encode(nil, data)))/float64(len(data)), "pct")
  1985  		}
  1986  	})
  1987  }
  1988  
  1989  func TestRoundtrips(t *testing.T) {
  1990  	testFile(t, 0, 10)
  1991  	testFile(t, 1, 10)
  1992  	testFile(t, 2, 10)
  1993  	testFile(t, 3, 10)
  1994  	testFile(t, 4, 10)
  1995  	testFile(t, 5, 10)
  1996  	testFile(t, 6, 10)
  1997  	testFile(t, 7, 10)
  1998  	testFile(t, 8, 10)
  1999  	testFile(t, 9, 10)
  2000  	testFile(t, 10, 10)
  2001  	testFile(t, 11, 10)
  2002  	testFile(t, 12, 0)
  2003  	testFile(t, 13, 0)
  2004  	testFile(t, 14, 0)
  2005  	testFile(t, 15, 0)
  2006  }
  2007  
  2008  func testFile(t *testing.T, i, repeat int) {
  2009  	if err := downloadBenchmarkFiles(t, testFiles[i].filename); err != nil {
  2010  		t.Skipf("failed to download testdata: %s", err)
  2011  	}
  2012  
  2013  	if testing.Short() {
  2014  		repeat = 0
  2015  	}
  2016  	t.Run(fmt.Sprint(i, "-", testFiles[i].label), func(t *testing.T) {
  2017  		bDir := filepath.FromSlash(*benchdataDir)
  2018  		data := readFile(t, filepath.Join(bDir, testFiles[i].filename))
  2019  		if testing.Short() && len(data) > 10000 {
  2020  			t.SkipNow()
  2021  		}
  2022  		oSize := len(data)
  2023  		for i := 0; i < repeat; i++ {
  2024  			data = append(data, data[:oSize]...)
  2025  		}
  2026  		t.Run("s2", func(t *testing.T) {
  2027  			testWriterRoundtrip(t, data)
  2028  		})
  2029  		t.Run("s2-better", func(t *testing.T) {
  2030  			testWriterRoundtrip(t, data, WriterBetterCompression())
  2031  		})
  2032  		t.Run("s2-best", func(t *testing.T) {
  2033  			testWriterRoundtrip(t, data, WriterBestCompression())
  2034  		})
  2035  		t.Run("s2-uncompressed", func(t *testing.T) {
  2036  			testWriterRoundtrip(t, data, WriterUncompressed())
  2037  		})
  2038  		t.Run("block", func(t *testing.T) {
  2039  			d := data
  2040  			testBlockRoundtrip(t, d)
  2041  		})
  2042  		t.Run("block-better", func(t *testing.T) {
  2043  			d := data
  2044  			testBetterBlockRoundtrip(t, d)
  2045  		})
  2046  		t.Run("block-best", func(t *testing.T) {
  2047  			d := data
  2048  			testBestBlockRoundtrip(t, d)
  2049  		})
  2050  		t.Run("s2-snappy", func(t *testing.T) {
  2051  			d := data
  2052  			testSnappyBlockRoundtrip(t, d)
  2053  		})
  2054  		t.Run("snappy", func(t *testing.T) {
  2055  			testSnappyDecode(t, data)
  2056  		})
  2057  	})
  2058  }
  2059  
  2060  func TestDataRoundtrips(t *testing.T) {
  2061  	test := func(t *testing.T, data []byte) {
  2062  		t.Run("s2", func(t *testing.T) {
  2063  			testWriterRoundtrip(t, data)
  2064  		})
  2065  		t.Run("s2-better", func(t *testing.T) {
  2066  			testWriterRoundtrip(t, data, WriterBetterCompression())
  2067  		})
  2068  		t.Run("s2-best", func(t *testing.T) {
  2069  			testWriterRoundtrip(t, data, WriterBestCompression())
  2070  		})
  2071  		t.Run("block", func(t *testing.T) {
  2072  			d := data
  2073  			testBlockRoundtrip(t, d)
  2074  		})
  2075  		t.Run("block-better", func(t *testing.T) {
  2076  			d := data
  2077  			testBetterBlockRoundtrip(t, d)
  2078  		})
  2079  		t.Run("block-best", func(t *testing.T) {
  2080  			d := data
  2081  			testBestBlockRoundtrip(t, d)
  2082  		})
  2083  		t.Run("s2-snappy", func(t *testing.T) {
  2084  			d := data
  2085  			testSnappyBlockRoundtrip(t, d)
  2086  		})
  2087  		t.Run("snappy", func(t *testing.T) {
  2088  			testSnappyDecode(t, data)
  2089  		})
  2090  	}
  2091  	t.Run("longblock", func(t *testing.T) {
  2092  		data := make([]byte, 1<<25)
  2093  		if testing.Short() {
  2094  			data = data[:1<<20]
  2095  		}
  2096  		test(t, data)
  2097  	})
  2098  	t.Run("4f9e1a0", func(t *testing.T) {
  2099  		comp, _ := os.ReadFile("testdata/4f9e1a0da7915a3d69632f5613ed78bc998a8a23.zst")
  2100  		dec, _ := zstd.NewReader(bytes.NewBuffer(comp))
  2101  		data, _ := io.ReadAll(dec)
  2102  		test(t, data)
  2103  	})
  2104  	data, err := os.ReadFile("testdata/enc_regressions.zip")
  2105  	if err != nil {
  2106  		t.Fatal(err)
  2107  	}
  2108  	zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
  2109  	if err != nil {
  2110  		t.Fatal(err)
  2111  	}
  2112  	for _, tt := range zr.File {
  2113  		if !strings.HasSuffix(t.Name(), "") {
  2114  			continue
  2115  		}
  2116  		t.Run(tt.Name, func(t *testing.T) {
  2117  			r, err := tt.Open()
  2118  			if err != nil {
  2119  				t.Error(err)
  2120  				return
  2121  			}
  2122  			b, err := io.ReadAll(r)
  2123  			if err != nil {
  2124  				t.Error(err)
  2125  				return
  2126  			}
  2127  			test(t, b[:len(b):len(b)])
  2128  		})
  2129  	}
  2130  
  2131  }
  2132  
  2133  func BenchmarkDecodeS2BlockParallel(b *testing.B) {
  2134  	for i := range testFiles {
  2135  		b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
  2136  			benchFile(b, i, true)
  2137  		})
  2138  	}
  2139  }
  2140  
  2141  func BenchmarkEncodeS2BlockParallel(b *testing.B) {
  2142  	for i := range testFiles {
  2143  		b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
  2144  			benchFile(b, i, false)
  2145  		})
  2146  	}
  2147  }
  2148  
  2149  func BenchmarkDecodeSnappyBlockParallel(b *testing.B) {
  2150  	for i := range testFiles {
  2151  		b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
  2152  			benchFileSnappy(b, i, true)
  2153  		})
  2154  	}
  2155  }
  2156  
  2157  func BenchmarkEncodeSnappyBlockParallel(b *testing.B) {
  2158  	for i := range testFiles {
  2159  		b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
  2160  			benchFileSnappy(b, i, false)
  2161  		})
  2162  	}
  2163  }
  2164  
  2165  func TestMatchLen(t *testing.T) {
  2166  	// ref is a simple, reference implementation of matchLen.
  2167  	ref := func(a, b []byte) int {
  2168  		n := 0
  2169  		for i := range a {
  2170  			if a[i] != b[i] {
  2171  				break
  2172  			}
  2173  			n++
  2174  		}
  2175  		return n
  2176  	}
  2177  
  2178  	// We allow slightly shorter matches at the end of slices
  2179  	const maxBelow = 0
  2180  	nums := []int{0, 1, 2, 7, 8, 9, 16, 20, 29, 30, 31, 32, 33, 34, 38, 39, 40}
  2181  	for yIndex := 40; yIndex > 30; yIndex-- {
  2182  		xxx := bytes.Repeat([]byte("x"), 40)
  2183  		if yIndex < len(xxx) {
  2184  			xxx[yIndex] = 'y'
  2185  		}
  2186  		for _, i := range nums {
  2187  			for _, j := range nums {
  2188  				if i >= j {
  2189  					continue
  2190  				}
  2191  				got := matchLen(xxx[j:], xxx[i:])
  2192  				want := ref(xxx[j:], xxx[i:])
  2193  				if got > want {
  2194  					t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
  2195  					continue
  2196  				}
  2197  				if got < want-maxBelow {
  2198  					t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
  2199  				}
  2200  			}
  2201  		}
  2202  	}
  2203  }
  2204  

View as plain text