snappy_test.go

Documentation: github.com/golang/snappy

     1  // Copyright 2011 The Snappy-Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package snappy
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"flag"
    11  	"fmt"
    12  	"io"
    13  	"io/ioutil"
    14  	"math/rand"
    15  	"net/http"
    16  	"os"
    17  	"os/exec"
    18  	"path/filepath"
    19  	"runtime"
    20  	"strings"
    21  	"testing"
    22  )
    23  
    24  var (
    25  	download     = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
    26  	testdataDir  = flag.String("testdataDir", "testdata", "Directory containing the test data")
    27  	benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data")
    28  )
    29  
    30  // goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by
    31  // Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on
    32  // this GOARCH. There is more than one valid encoding of any given input, and
    33  // there is more than one good algorithm along the frontier of trading off
    34  // throughput for output size. Nonetheless, we presume that the C++ encoder's
    35  // algorithm is a good one and has been tested on a wide range of inputs, so
    36  // matching that exactly should mean that the Go encoder's algorithm is also
    37  // good, without needing to gather our own corpus of test data.
    38  //
    39  // The exact algorithm used by the C++ code is potentially endian dependent, as
    40  // it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes
    41  // at a time. The Go implementation is endian agnostic, in that its output is
    42  // the same (as little-endian C++ code), regardless of the CPU's endianness.
    43  //
    44  // Thus, when comparing Go's output to C++ output generated beforehand, such as
    45  // the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little-
    46  // endian system, we can run that test regardless of the runtime.GOARCH value.
    47  //
    48  // When comparing Go's output to dynamically generated C++ output, i.e. the
    49  // result of fork/exec'ing a C++ program, we can run that test only on
    50  // little-endian systems, because the C++ output might be different on
    51  // big-endian systems. The runtime package doesn't export endianness per se,
    52  // but we can restrict this match-C++ test to common little-endian systems.
    53  const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm"
    54  
    55  func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
    56  	got := maxEncodedLenOfMaxBlockSize
    57  	want := MaxEncodedLen(maxBlockSize)
    58  	if got != want {
    59  		t.Fatalf("got %d, want %d", got, want)
    60  	}
    61  }
    62  
    63  func cmp(a, b []byte) error {
    64  	if bytes.Equal(a, b) {
    65  		return nil
    66  	}
    67  	if len(a) != len(b) {
    68  		return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
    69  	}
    70  	for i := range a {
    71  		if a[i] != b[i] {
    72  			return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
    73  		}
    74  	}
    75  	return nil
    76  }
    77  
    78  func roundtrip(b, ebuf, dbuf []byte) error {
    79  	d, err := Decode(dbuf, Encode(ebuf, b))
    80  	if err != nil {
    81  		return fmt.Errorf("decoding error: %v", err)
    82  	}
    83  	if err := cmp(d, b); err != nil {
    84  		return fmt.Errorf("roundtrip mismatch: %v", err)
    85  	}
    86  	return nil
    87  }
    88  
    89  func TestEmpty(t *testing.T) {
    90  	if err := roundtrip(nil, nil, nil); err != nil {
    91  		t.Fatal(err)
    92  	}
    93  }
    94  
    95  func TestSmallCopy(t *testing.T) {
    96  	for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
    97  		for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
    98  			for i := 0; i < 32; i++ {
    99  				s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
   100  				if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
   101  					t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
   102  				}
   103  			}
   104  		}
   105  	}
   106  }
   107  
   108  func TestSmallRand(t *testing.T) {
   109  	rng := rand.New(rand.NewSource(1))
   110  	for n := 1; n < 20000; n += 23 {
   111  		b := make([]byte, n)
   112  		for i := range b {
   113  			b[i] = uint8(rng.Intn(256))
   114  		}
   115  		if err := roundtrip(b, nil, nil); err != nil {
   116  			t.Fatal(err)
   117  		}
   118  	}
   119  }
   120  
   121  func TestSmallRegular(t *testing.T) {
   122  	for n := 1; n < 20000; n += 23 {
   123  		b := make([]byte, n)
   124  		for i := range b {
   125  			b[i] = uint8(i%10 + 'a')
   126  		}
   127  		if err := roundtrip(b, nil, nil); err != nil {
   128  			t.Fatal(err)
   129  		}
   130  	}
   131  }
   132  
   133  func TestInvalidVarint(t *testing.T) {
   134  	testCases := []struct {
   135  		desc  string
   136  		input string
   137  	}{{
   138  		"invalid varint, final byte has continuation bit set",
   139  		"\xff",
   140  	}, {
   141  		"invalid varint, value overflows uint64",
   142  		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
   143  	}, {
   144  		// https://github.com/google/snappy/blob/master/format_description.txt
   145  		// says that "the stream starts with the uncompressed length [as a
   146  		// varint] (up to a maximum of 2^32 - 1)".
   147  		"valid varint (as uint64), but value overflows uint32",
   148  		"\x80\x80\x80\x80\x10",
   149  	}}
   150  
   151  	for _, tc := range testCases {
   152  		input := []byte(tc.input)
   153  		if _, err := DecodedLen(input); err != ErrCorrupt {
   154  			t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
   155  		}
   156  		if _, err := Decode(nil, input); err != ErrCorrupt {
   157  			t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
   158  		}
   159  	}
   160  }
   161  
   162  func TestDecode(t *testing.T) {
   163  	lit40Bytes := make([]byte, 40)
   164  	for i := range lit40Bytes {
   165  		lit40Bytes[i] = byte(i)
   166  	}
   167  	lit40 := string(lit40Bytes)
   168  
   169  	testCases := []struct {
   170  		desc    string
   171  		input   string
   172  		want    string
   173  		wantErr error
   174  	}{{
   175  		`decodedLen=0; valid input`,
   176  		"\x00",
   177  		"",
   178  		nil,
   179  	}, {
   180  		`decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
   181  		"\x03" + "\x08\xff\xff\xff",
   182  		"\xff\xff\xff",
   183  		nil,
   184  	}, {
   185  		`decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
   186  		"\x02" + "\x08\xff\xff\xff",
   187  		"",
   188  		ErrCorrupt,
   189  	}, {
   190  		`decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
   191  		"\x03" + "\x08\xff\xff",
   192  		"",
   193  		ErrCorrupt,
   194  	}, {
   195  		`decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
   196  		"\x28" + "\x9c" + lit40,
   197  		lit40,
   198  		nil,
   199  	}, {
   200  		`decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
   201  		"\x01" + "\xf0",
   202  		"",
   203  		ErrCorrupt,
   204  	}, {
   205  		`decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
   206  		"\x03" + "\xf0\x02\xff\xff\xff",
   207  		"\xff\xff\xff",
   208  		nil,
   209  	}, {
   210  		`decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
   211  		"\x01" + "\xf4\x00",
   212  		"",
   213  		ErrCorrupt,
   214  	}, {
   215  		`decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
   216  		"\x03" + "\xf4\x02\x00\xff\xff\xff",
   217  		"\xff\xff\xff",
   218  		nil,
   219  	}, {
   220  		`decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
   221  		"\x01" + "\xf8\x00\x00",
   222  		"",
   223  		ErrCorrupt,
   224  	}, {
   225  		`decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
   226  		"\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
   227  		"\xff\xff\xff",
   228  		nil,
   229  	}, {
   230  		`decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
   231  		"\x01" + "\xfc\x00\x00\x00",
   232  		"",
   233  		ErrCorrupt,
   234  	}, {
   235  		`decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
   236  		"\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
   237  		"",
   238  		ErrCorrupt,
   239  	}, {
   240  		`decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
   241  		"\x04" + "\xfc\x02\x00\x00\x00\xff",
   242  		"",
   243  		ErrCorrupt,
   244  	}, {
   245  		`decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
   246  		"\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
   247  		"\xff\xff\xff",
   248  		nil,
   249  	}, {
   250  		`decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
   251  		"\x04" + "\x01",
   252  		"",
   253  		ErrCorrupt,
   254  	}, {
   255  		`decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
   256  		"\x04" + "\x02\x00",
   257  		"",
   258  		ErrCorrupt,
   259  	}, {
   260  		`decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`,
   261  		"\x04" + "\x03\x00\x00\x00",
   262  		"",
   263  		ErrCorrupt,
   264  	}, {
   265  		`decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
   266  		"\x04" + "\x0cabcd",
   267  		"abcd",
   268  		nil,
   269  	}, {
   270  		`decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
   271  		"\x0d" + "\x0cabcd" + "\x15\x04",
   272  		"abcdabcdabcda",
   273  		nil,
   274  	}, {
   275  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
   276  		"\x08" + "\x0cabcd" + "\x01\x04",
   277  		"abcdabcd",
   278  		nil,
   279  	}, {
   280  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
   281  		"\x08" + "\x0cabcd" + "\x01\x02",
   282  		"abcdcdcd",
   283  		nil,
   284  	}, {
   285  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
   286  		"\x08" + "\x0cabcd" + "\x01\x01",
   287  		"abcddddd",
   288  		nil,
   289  	}, {
   290  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
   291  		"\x08" + "\x0cabcd" + "\x01\x00",
   292  		"",
   293  		ErrCorrupt,
   294  	}, {
   295  		`decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
   296  		"\x09" + "\x0cabcd" + "\x01\x04",
   297  		"",
   298  		ErrCorrupt,
   299  	}, {
   300  		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
   301  		"\x08" + "\x0cabcd" + "\x01\x05",
   302  		"",
   303  		ErrCorrupt,
   304  	}, {
   305  		`decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
   306  		"\x07" + "\x0cabcd" + "\x01\x04",
   307  		"",
   308  		ErrCorrupt,
   309  	}, {
   310  		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
   311  		"\x06" + "\x0cabcd" + "\x06\x03\x00",
   312  		"abcdbc",
   313  		nil,
   314  	}, {
   315  		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`,
   316  		"\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
   317  		"abcdbc",
   318  		nil,
   319  	}, {
   320  		`decodedLen=0; tagCopy4, 4 extra length|offset bytes; with msb set (0x93); discovered by go-fuzz`,
   321  		"\x00\xfc000\x93",
   322  		"",
   323  		ErrCorrupt,
   324  	}}
   325  
   326  	const (
   327  		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
   328  		// not present in either the input or the output. It is written to dBuf
   329  		// to check that Decode does not write bytes past the end of
   330  		// dBuf[:dLen].
   331  		//
   332  		// The magic number 37 was chosen because it is prime. A more 'natural'
   333  		// number like 32 might lead to a false negative if, for example, a
   334  		// byte was incorrectly copied 4*8 bytes later.
   335  		notPresentBase = 0xa0
   336  		notPresentLen  = 37
   337  	)
   338  
   339  	var dBuf [100]byte
   340  loop:
   341  	for i, tc := range testCases {
   342  		input := []byte(tc.input)
   343  		for _, x := range input {
   344  			if notPresentBase <= x && x < notPresentBase+notPresentLen {
   345  				t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
   346  				continue loop
   347  			}
   348  		}
   349  
   350  		dLen, n := binary.Uvarint(input)
   351  		if n <= 0 {
   352  			t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
   353  			continue
   354  		}
   355  		if dLen > uint64(len(dBuf)) {
   356  			t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
   357  			continue
   358  		}
   359  
   360  		for j := range dBuf {
   361  			dBuf[j] = byte(notPresentBase + j%notPresentLen)
   362  		}
   363  		g, gotErr := Decode(dBuf[:], input)
   364  		if got := string(g); got != tc.want || gotErr != tc.wantErr {
   365  			t.Errorf("#%d (%s):\ngot  %q, %v\nwant %q, %v",
   366  				i, tc.desc, got, gotErr, tc.want, tc.wantErr)
   367  			continue
   368  		}
   369  		for j, x := range dBuf {
   370  			if uint64(j) < dLen {
   371  				continue
   372  			}
   373  			if w := byte(notPresentBase + j%notPresentLen); x != w {
   374  				t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
   375  					i, tc.desc, j, x, w, dBuf)
   376  				continue loop
   377  			}
   378  		}
   379  	}
   380  }
   381  
   382  func TestDecodeCopy4(t *testing.T) {
   383  	dots := strings.Repeat(".", 65536)
   384  
   385  	input := strings.Join([]string{
   386  		"\x89\x80\x04",         // decodedLen = 65545.
   387  		"\x0cpqrs",             // 4-byte literal "pqrs".
   388  		"\xf4\xff\xff" + dots,  // 65536-byte literal dots.
   389  		"\x13\x04\x00\x01\x00", // tagCopy4; length=5 offset=65540.
   390  	}, "")
   391  
   392  	gotBytes, err := Decode(nil, []byte(input))
   393  	if err != nil {
   394  		t.Fatal(err)
   395  	}
   396  	got := string(gotBytes)
   397  	want := "pqrs" + dots + "pqrs."
   398  	if len(got) != len(want) {
   399  		t.Fatalf("got %d bytes, want %d", len(got), len(want))
   400  	}
   401  	if got != want {
   402  		for i := 0; i < len(got); i++ {
   403  			if g, w := got[i], want[i]; g != w {
   404  				t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w)
   405  			}
   406  		}
   407  	}
   408  }
   409  
   410  // TestDecodeLengthOffset tests decoding an encoding of the form literal +
   411  // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
   412  func TestDecodeLengthOffset(t *testing.T) {
   413  	const (
   414  		prefix = "abcdefghijklmnopqr"
   415  		suffix = "ABCDEFGHIJKLMNOPQR"
   416  
   417  		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
   418  		// not present in either the input or the output. It is written to
   419  		// gotBuf to check that Decode does not write bytes past the end of
   420  		// gotBuf[:totalLen].
   421  		//
   422  		// The magic number 37 was chosen because it is prime. A more 'natural'
   423  		// number like 32 might lead to a false negative if, for example, a
   424  		// byte was incorrectly copied 4*8 bytes later.
   425  		notPresentBase = 0xa0
   426  		notPresentLen  = 37
   427  	)
   428  	var gotBuf, wantBuf, inputBuf [128]byte
   429  	for length := 1; length <= 18; length++ {
   430  		for offset := 1; offset <= 18; offset++ {
   431  		loop:
   432  			for suffixLen := 0; suffixLen <= 18; suffixLen++ {
   433  				totalLen := len(prefix) + length + suffixLen
   434  
   435  				inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
   436  				inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
   437  				inputLen++
   438  				inputLen += copy(inputBuf[inputLen:], prefix)
   439  				inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
   440  				inputBuf[inputLen+1] = byte(offset)
   441  				inputBuf[inputLen+2] = 0x00
   442  				inputLen += 3
   443  				if suffixLen > 0 {
   444  					inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
   445  					inputLen++
   446  					inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
   447  				}
   448  				input := inputBuf[:inputLen]
   449  
   450  				for i := range gotBuf {
   451  					gotBuf[i] = byte(notPresentBase + i%notPresentLen)
   452  				}
   453  				got, err := Decode(gotBuf[:], input)
   454  				if err != nil {
   455  					t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
   456  					continue
   457  				}
   458  
   459  				wantLen := 0
   460  				wantLen += copy(wantBuf[wantLen:], prefix)
   461  				for i := 0; i < length; i++ {
   462  					wantBuf[wantLen] = wantBuf[wantLen-offset]
   463  					wantLen++
   464  				}
   465  				wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
   466  				want := wantBuf[:wantLen]
   467  
   468  				for _, x := range input {
   469  					if notPresentBase <= x && x < notPresentBase+notPresentLen {
   470  						t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
   471  							length, offset, suffixLen, x, input)
   472  						continue loop
   473  					}
   474  				}
   475  				for i, x := range gotBuf {
   476  					if i < totalLen {
   477  						continue
   478  					}
   479  					if w := byte(notPresentBase + i%notPresentLen); x != w {
   480  						t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
   481  							"Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
   482  							length, offset, suffixLen, totalLen, i, x, w, gotBuf)
   483  						continue loop
   484  					}
   485  				}
   486  				for _, x := range want {
   487  					if notPresentBase <= x && x < notPresentBase+notPresentLen {
   488  						t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
   489  							length, offset, suffixLen, x, want)
   490  						continue loop
   491  					}
   492  				}
   493  
   494  				if !bytes.Equal(got, want) {
   495  					t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot   % x\nwant  % x",
   496  						length, offset, suffixLen, input, got, want)
   497  					continue
   498  				}
   499  			}
   500  		}
   501  	}
   502  }
   503  
   504  const (
   505  	goldenText       = "Isaac.Newton-Opticks.txt"
   506  	goldenCompressed = goldenText + ".rawsnappy"
   507  )
   508  
   509  func TestDecodeGoldenInput(t *testing.T) {
   510  	tDir := filepath.FromSlash(*testdataDir)
   511  	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
   512  	if err != nil {
   513  		t.Fatalf("ReadFile: %v", err)
   514  	}
   515  	got, err := Decode(nil, src)
   516  	if err != nil {
   517  		t.Fatalf("Decode: %v", err)
   518  	}
   519  	want, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
   520  	if err != nil {
   521  		t.Fatalf("ReadFile: %v", err)
   522  	}
   523  	if err := cmp(got, want); err != nil {
   524  		t.Fatal(err)
   525  	}
   526  }
   527  
   528  func TestEncodeGoldenInput(t *testing.T) {
   529  	tDir := filepath.FromSlash(*testdataDir)
   530  	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
   531  	if err != nil {
   532  		t.Fatalf("ReadFile: %v", err)
   533  	}
   534  	got := Encode(nil, src)
   535  	want, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
   536  	if err != nil {
   537  		t.Fatalf("ReadFile: %v", err)
   538  	}
   539  	if err := cmp(got, want); err != nil {
   540  		t.Fatal(err)
   541  	}
   542  }
   543  
   544  func TestExtendMatchGoldenInput(t *testing.T) {
   545  	tDir := filepath.FromSlash(*testdataDir)
   546  	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
   547  	if err != nil {
   548  		t.Fatalf("ReadFile: %v", err)
   549  	}
   550  	for i, tc := range extendMatchGoldenTestCases {
   551  		got := extendMatch(src, tc.i, tc.j)
   552  		if got != tc.want {
   553  			t.Errorf("test #%d: i, j = %5d, %5d: got %5d (= j + %6d), want %5d (= j + %6d)",
   554  				i, tc.i, tc.j, got, got-tc.j, tc.want, tc.want-tc.j)
   555  		}
   556  	}
   557  }
   558  
   559  func TestExtendMatch(t *testing.T) {
   560  	// ref is a simple, reference implementation of extendMatch.
   561  	ref := func(src []byte, i, j int) int {
   562  		for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
   563  		}
   564  		return j
   565  	}
   566  
   567  	nums := []int{0, 1, 2, 7, 8, 9, 29, 30, 31, 32, 33, 34, 38, 39, 40}
   568  	for yIndex := 40; yIndex > 30; yIndex-- {
   569  		xxx := bytes.Repeat([]byte("x"), 40)
   570  		if yIndex < len(xxx) {
   571  			xxx[yIndex] = 'y'
   572  		}
   573  		for _, i := range nums {
   574  			for _, j := range nums {
   575  				if i >= j {
   576  					continue
   577  				}
   578  				got := extendMatch(xxx, i, j)
   579  				want := ref(xxx, i, j)
   580  				if got != want {
   581  					t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
   582  				}
   583  			}
   584  		}
   585  	}
   586  }
   587  
   588  const snappytoolCmdName = "cmd/snappytool/snappytool"
   589  
   590  func skipTestSameEncodingAsCpp() (msg string) {
   591  	if !goEncoderShouldMatchCppEncoder {
   592  		return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
   593  	}
   594  	if _, err := os.Stat(snappytoolCmdName); err != nil {
   595  		return fmt.Sprintf("could not find snappytool: %v", err)
   596  	}
   597  	return ""
   598  }
   599  
   600  func runTestSameEncodingAsCpp(src []byte) error {
   601  	got := Encode(nil, src)
   602  
   603  	cmd := exec.Command(snappytoolCmdName, "-e")
   604  	cmd.Stdin = bytes.NewReader(src)
   605  	want, err := cmd.Output()
   606  	if err != nil {
   607  		return fmt.Errorf("could not run snappytool: %v", err)
   608  	}
   609  	return cmp(got, want)
   610  }
   611  
   612  func TestSameEncodingAsCppShortCopies(t *testing.T) {
   613  	if msg := skipTestSameEncodingAsCpp(); msg != "" {
   614  		t.Skip(msg)
   615  	}
   616  	src := bytes.Repeat([]byte{'a'}, 20)
   617  	for i := 0; i <= len(src); i++ {
   618  		if err := runTestSameEncodingAsCpp(src[:i]); err != nil {
   619  			t.Errorf("i=%d: %v", i, err)
   620  		}
   621  	}
   622  }
   623  
   624  func TestSameEncodingAsCppLongFiles(t *testing.T) {
   625  	if msg := skipTestSameEncodingAsCpp(); msg != "" {
   626  		t.Skip(msg)
   627  	}
   628  	bDir := filepath.FromSlash(*benchdataDir)
   629  	failed := false
   630  	for i, tf := range testFiles {
   631  		if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
   632  			t.Fatalf("failed to download testdata: %s", err)
   633  		}
   634  		data := readFile(t, filepath.Join(bDir, tf.filename))
   635  		if n := tf.sizeLimit; 0 < n && n < len(data) {
   636  			data = data[:n]
   637  		}
   638  		if err := runTestSameEncodingAsCpp(data); err != nil {
   639  			t.Errorf("i=%d: %v", i, err)
   640  			failed = true
   641  		}
   642  	}
   643  	if failed {
   644  		t.Errorf("was the snappytool program built against the C++ snappy library version " +
   645  			"d53de187 or later, committed on 2016-04-05? See " +
   646  			"https://github.com/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc")
   647  	}
   648  }
   649  
   650  // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
   651  // described in decode_amd64.s and its claim of a 10 byte overrun worst case.
   652  func TestSlowForwardCopyOverrun(t *testing.T) {
   653  	const base = 100
   654  
   655  	for length := 1; length < 18; length++ {
   656  		for offset := 1; offset < 18; offset++ {
   657  			highWaterMark := base
   658  			d := base
   659  			l := length
   660  			o := offset
   661  
   662  			// makeOffsetAtLeast8
   663  			for o < 8 {
   664  				if end := d + 8; highWaterMark < end {
   665  					highWaterMark = end
   666  				}
   667  				l -= o
   668  				d += o
   669  				o += o
   670  			}
   671  
   672  			// fixUpSlowForwardCopy
   673  			a := d
   674  			d += l
   675  
   676  			// finishSlowForwardCopy
   677  			for l > 0 {
   678  				if end := a + 8; highWaterMark < end {
   679  					highWaterMark = end
   680  				}
   681  				a += 8
   682  				l -= 8
   683  			}
   684  
   685  			dWant := base + length
   686  			overrun := highWaterMark - dWant
   687  			if d != dWant || overrun < 0 || 10 < overrun {
   688  				t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
   689  					length, offset, d, overrun, dWant)
   690  			}
   691  		}
   692  	}
   693  }
   694  
   695  // TestEncodeNoiseThenRepeats encodes input for which the first half is very
   696  // incompressible and the second half is very compressible. The encoded form's
   697  // length should be closer to 50% of the original length than 100%.
   698  func TestEncodeNoiseThenRepeats(t *testing.T) {
   699  	for _, origLen := range []int{256 * 1024, 2048 * 1024} {
   700  		src := make([]byte, origLen)
   701  		rng := rand.New(rand.NewSource(1))
   702  		firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
   703  		for i := range firstHalf {
   704  			firstHalf[i] = uint8(rng.Intn(256))
   705  		}
   706  		for i := range secondHalf {
   707  			secondHalf[i] = uint8(i >> 8)
   708  		}
   709  		dst := Encode(nil, src)
   710  		if got, want := len(dst), origLen*3/4; got >= want {
   711  			t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
   712  		}
   713  	}
   714  }
   715  
   716  func TestFramingFormat(t *testing.T) {
   717  	// src is comprised of alternating 1e5-sized sequences of random
   718  	// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
   719  	// because it is larger than maxBlockSize (64k).
   720  	src := make([]byte, 1e6)
   721  	rng := rand.New(rand.NewSource(1))
   722  	for i := 0; i < 10; i++ {
   723  		if i%2 == 0 {
   724  			for j := 0; j < 1e5; j++ {
   725  				src[1e5*i+j] = uint8(rng.Intn(256))
   726  			}
   727  		} else {
   728  			for j := 0; j < 1e5; j++ {
   729  				src[1e5*i+j] = uint8(i)
   730  			}
   731  		}
   732  	}
   733  
   734  	buf := new(bytes.Buffer)
   735  	if _, err := NewWriter(buf).Write(src); err != nil {
   736  		t.Fatalf("Write: encoding: %v", err)
   737  	}
   738  	dst, err := ioutil.ReadAll(NewReader(buf))
   739  	if err != nil {
   740  		t.Fatalf("ReadAll: decoding: %v", err)
   741  	}
   742  	if err := cmp(dst, src); err != nil {
   743  		t.Fatal(err)
   744  	}
   745  }
   746  
   747  func TestWriterGoldenOutput(t *testing.T) {
   748  	buf := new(bytes.Buffer)
   749  	w := NewBufferedWriter(buf)
   750  	defer w.Close()
   751  	w.Write([]byte("abcd")) // Not compressible.
   752  	w.Flush()
   753  	w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
   754  	w.Flush()
   755  	// The next chunk is also compressible, but a naive, greedy encoding of the
   756  	// overall length 67 copy as a length 64 copy (the longest expressible as a
   757  	// tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
   758  	// tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
   759  	// bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
   760  	// (of length 60) and a 2-byte tagCopy1 (of length 7).
   761  	w.Write(bytes.Repeat([]byte{'B'}, 68))
   762  	w.Write([]byte("efC"))                 // Not compressible.
   763  	w.Write(bytes.Repeat([]byte{'C'}, 20)) // Compressible.
   764  	w.Write(bytes.Repeat([]byte{'B'}, 20)) // Compressible.
   765  	w.Write([]byte("g"))                   // Not compressible.
   766  	w.Flush()
   767  
   768  	got := buf.String()
   769  	want := strings.Join([]string{
   770  		magicChunk,
   771  		"\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
   772  		"\x68\x10\xe6\xb6", // Checksum.
   773  		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
   774  		"\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
   775  		"\x5f\xeb\xf2\x10", // Checksum.
   776  		"\x96\x01",         // Compressed payload: Uncompressed length (varint encoded): 150.
   777  		"\x00\x41",         // Compressed payload: tagLiteral, length=1,  "A".
   778  		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
   779  		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
   780  		"\x52\x01\x00",     // Compressed payload: tagCopy2,   length=21, offset=1.
   781  		"\x00\x18\x00\x00", // Compressed chunk, 24 bytes long (including 4 byte checksum).
   782  		"\x30\x85\x69\xeb", // Checksum.
   783  		"\x70",             // Compressed payload: Uncompressed length (varint encoded): 112.
   784  		"\x00\x42",         // Compressed payload: tagLiteral, length=1,  "B".
   785  		"\xee\x01\x00",     // Compressed payload: tagCopy2,   length=60, offset=1.
   786  		"\x0d\x01",         // Compressed payload: tagCopy1,   length=7,  offset=1.
   787  		"\x08\x65\x66\x43", // Compressed payload: tagLiteral, length=3,  "efC".
   788  		"\x4e\x01\x00",     // Compressed payload: tagCopy2,   length=20, offset=1.
   789  		"\x4e\x5a\x00",     // Compressed payload: tagCopy2,   length=20, offset=90.
   790  		"\x00\x67",         // Compressed payload: tagLiteral, length=1,  "g".
   791  	}, "")
   792  	if got != want {
   793  		t.Fatalf("\ngot:  % x\nwant: % x", got, want)
   794  	}
   795  }
   796  
   797  func TestEmitLiteral(t *testing.T) {
   798  	testCases := []struct {
   799  		length int
   800  		want   string
   801  	}{
   802  		{1, "\x00"},
   803  		{2, "\x04"},
   804  		{59, "\xe8"},
   805  		{60, "\xec"},
   806  		{61, "\xf0\x3c"},
   807  		{62, "\xf0\x3d"},
   808  		{254, "\xf0\xfd"},
   809  		{255, "\xf0\xfe"},
   810  		{256, "\xf0\xff"},
   811  		{257, "\xf4\x00\x01"},
   812  		{65534, "\xf4\xfd\xff"},
   813  		{65535, "\xf4\xfe\xff"},
   814  		{65536, "\xf4\xff\xff"},
   815  	}
   816  
   817  	dst := make([]byte, 70000)
   818  	nines := bytes.Repeat([]byte{0x99}, 65536)
   819  	for _, tc := range testCases {
   820  		lit := nines[:tc.length]
   821  		n := emitLiteral(dst, lit)
   822  		if !bytes.HasSuffix(dst[:n], lit) {
   823  			t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
   824  			continue
   825  		}
   826  		got := string(dst[:n-tc.length])
   827  		if got != tc.want {
   828  			t.Errorf("length=%d:\ngot  % x\nwant % x", tc.length, got, tc.want)
   829  			continue
   830  		}
   831  	}
   832  }
   833  
   834  func TestEmitCopy(t *testing.T) {
   835  	testCases := []struct {
   836  		offset int
   837  		length int
   838  		want   string
   839  	}{
   840  		{8, 04, "\x01\x08"},
   841  		{8, 11, "\x1d\x08"},
   842  		{8, 12, "\x2e\x08\x00"},
   843  		{8, 13, "\x32\x08\x00"},
   844  		{8, 59, "\xea\x08\x00"},
   845  		{8, 60, "\xee\x08\x00"},
   846  		{8, 61, "\xf2\x08\x00"},
   847  		{8, 62, "\xf6\x08\x00"},
   848  		{8, 63, "\xfa\x08\x00"},
   849  		{8, 64, "\xfe\x08\x00"},
   850  		{8, 65, "\xee\x08\x00\x05\x08"},
   851  		{8, 66, "\xee\x08\x00\x09\x08"},
   852  		{8, 67, "\xee\x08\x00\x0d\x08"},
   853  		{8, 68, "\xfe\x08\x00\x01\x08"},
   854  		{8, 69, "\xfe\x08\x00\x05\x08"},
   855  		{8, 80, "\xfe\x08\x00\x3e\x08\x00"},
   856  
   857  		{256, 04, "\x21\x00"},
   858  		{256, 11, "\x3d\x00"},
   859  		{256, 12, "\x2e\x00\x01"},
   860  		{256, 13, "\x32\x00\x01"},
   861  		{256, 59, "\xea\x00\x01"},
   862  		{256, 60, "\xee\x00\x01"},
   863  		{256, 61, "\xf2\x00\x01"},
   864  		{256, 62, "\xf6\x00\x01"},
   865  		{256, 63, "\xfa\x00\x01"},
   866  		{256, 64, "\xfe\x00\x01"},
   867  		{256, 65, "\xee\x00\x01\x25\x00"},
   868  		{256, 66, "\xee\x00\x01\x29\x00"},
   869  		{256, 67, "\xee\x00\x01\x2d\x00"},
   870  		{256, 68, "\xfe\x00\x01\x21\x00"},
   871  		{256, 69, "\xfe\x00\x01\x25\x00"},
   872  		{256, 80, "\xfe\x00\x01\x3e\x00\x01"},
   873  
   874  		{2048, 04, "\x0e\x00\x08"},
   875  		{2048, 11, "\x2a\x00\x08"},
   876  		{2048, 12, "\x2e\x00\x08"},
   877  		{2048, 13, "\x32\x00\x08"},
   878  		{2048, 59, "\xea\x00\x08"},
   879  		{2048, 60, "\xee\x00\x08"},
   880  		{2048, 61, "\xf2\x00\x08"},
   881  		{2048, 62, "\xf6\x00\x08"},
   882  		{2048, 63, "\xfa\x00\x08"},
   883  		{2048, 64, "\xfe\x00\x08"},
   884  		{2048, 65, "\xee\x00\x08\x12\x00\x08"},
   885  		{2048, 66, "\xee\x00\x08\x16\x00\x08"},
   886  		{2048, 67, "\xee\x00\x08\x1a\x00\x08"},
   887  		{2048, 68, "\xfe\x00\x08\x0e\x00\x08"},
   888  		{2048, 69, "\xfe\x00\x08\x12\x00\x08"},
   889  		{2048, 80, "\xfe\x00\x08\x3e\x00\x08"},
   890  	}
   891  
   892  	dst := make([]byte, 1024)
   893  	for _, tc := range testCases {
   894  		n := emitCopy(dst, tc.offset, tc.length)
   895  		got := string(dst[:n])
   896  		if got != tc.want {
   897  			t.Errorf("offset=%d, length=%d:\ngot  % x\nwant % x", tc.offset, tc.length, got, tc.want)
   898  		}
   899  	}
   900  }
   901  
   902  func TestNewBufferedWriter(t *testing.T) {
   903  	// Test all 32 possible sub-sequences of these 5 input slices.
   904  	//
   905  	// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
   906  	// capacity: 6 * maxBlockSize is 393,216.
   907  	inputs := [][]byte{
   908  		bytes.Repeat([]byte{'a'}, 40000),
   909  		bytes.Repeat([]byte{'b'}, 150000),
   910  		bytes.Repeat([]byte{'c'}, 60000),
   911  		bytes.Repeat([]byte{'d'}, 120000),
   912  		bytes.Repeat([]byte{'e'}, 30000),
   913  	}
   914  loop:
   915  	for i := 0; i < 1<<uint(len(inputs)); i++ {
   916  		var want []byte
   917  		buf := new(bytes.Buffer)
   918  		w := NewBufferedWriter(buf)
   919  		for j, input := range inputs {
   920  			if i&(1<<uint(j)) == 0 {
   921  				continue
   922  			}
   923  			if _, err := w.Write(input); err != nil {
   924  				t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
   925  				continue loop
   926  			}
   927  			want = append(want, input...)
   928  		}
   929  		if err := w.Close(); err != nil {
   930  			t.Errorf("i=%#02x: Close: %v", i, err)
   931  			continue
   932  		}
   933  		got, err := ioutil.ReadAll(NewReader(buf))
   934  		if err != nil {
   935  			t.Errorf("i=%#02x: ReadAll: %v", i, err)
   936  			continue
   937  		}
   938  		if err := cmp(got, want); err != nil {
   939  			t.Errorf("i=%#02x: %v", i, err)
   940  			continue
   941  		}
   942  	}
   943  }
   944  
   945  func TestFlush(t *testing.T) {
   946  	buf := new(bytes.Buffer)
   947  	w := NewBufferedWriter(buf)
   948  	defer w.Close()
   949  	if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
   950  		t.Fatalf("Write: %v", err)
   951  	}
   952  	if n := buf.Len(); n != 0 {
   953  		t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
   954  	}
   955  	if err := w.Flush(); err != nil {
   956  		t.Fatalf("Flush: %v", err)
   957  	}
   958  	if n := buf.Len(); n == 0 {
   959  		t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
   960  	}
   961  }
   962  
   963  func TestReaderUncompressedDataOK(t *testing.T) {
   964  	r := NewReader(strings.NewReader(magicChunk +
   965  		"\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
   966  		"\x68\x10\xe6\xb6" + // Checksum.
   967  		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
   968  	))
   969  	g, err := ioutil.ReadAll(r)
   970  	if err != nil {
   971  		t.Fatal(err)
   972  	}
   973  	if got, want := string(g), "abcd"; got != want {
   974  		t.Fatalf("got %q, want %q", got, want)
   975  	}
   976  }
   977  
   978  func TestReaderUncompressedDataNoPayload(t *testing.T) {
   979  	r := NewReader(strings.NewReader(magicChunk +
   980  		"\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long.
   981  		"", // No payload; corrupt input.
   982  	))
   983  	if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
   984  		t.Fatalf("got %v, want %v", err, ErrCorrupt)
   985  	}
   986  }
   987  
   988  func TestReaderUncompressedDataTooLong(t *testing.T) {
   989  	// https://github.com/google/snappy/blob/master/framing_format.txt section
   990  	// 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004.
   991  	const n = 0x10005
   992  
   993  	r := NewReader(strings.NewReader(magicChunk +
   994  		"\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long.
   995  		strings.Repeat("\x00", n),
   996  	))
   997  	if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
   998  		t.Fatalf("got %v, want %v", err, ErrCorrupt)
   999  	}
  1000  }
  1001  
  1002  func TestReaderReset(t *testing.T) {
  1003  	gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
  1004  	buf := new(bytes.Buffer)
  1005  	if _, err := NewWriter(buf).Write(gold); err != nil {
  1006  		t.Fatalf("Write: %v", err)
  1007  	}
  1008  	encoded, invalid, partial := buf.String(), "invalid", "partial"
  1009  	r := NewReader(nil)
  1010  	for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
  1011  		if s == partial {
  1012  			r.Reset(strings.NewReader(encoded))
  1013  			if _, err := r.Read(make([]byte, 101)); err != nil {
  1014  				t.Errorf("#%d: %v", i, err)
  1015  				continue
  1016  			}
  1017  			continue
  1018  		}
  1019  		r.Reset(strings.NewReader(s))
  1020  		got, err := ioutil.ReadAll(r)
  1021  		switch s {
  1022  		case encoded:
  1023  			if err != nil {
  1024  				t.Errorf("#%d: %v", i, err)
  1025  				continue
  1026  			}
  1027  			if err := cmp(got, gold); err != nil {
  1028  				t.Errorf("#%d: %v", i, err)
  1029  				continue
  1030  			}
  1031  		case invalid:
  1032  			if err == nil {
  1033  				t.Errorf("#%d: got nil error, want non-nil", i)
  1034  				continue
  1035  			}
  1036  		}
  1037  	}
  1038  }
  1039  
  1040  func TestReaderReadByte(t *testing.T) {
  1041  	// Test all 32 possible sub-sequences of these 5 input slices prefixed by
  1042  	// their size encoded as a uvarint.
  1043  	//
  1044  	// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
  1045  	// capacity: 6 * maxBlockSize is 393,216.
  1046  	inputs := [][]byte{
  1047  		bytes.Repeat([]byte{'a'}, 40000),
  1048  		bytes.Repeat([]byte{'b'}, 150000),
  1049  		bytes.Repeat([]byte{'c'}, 60000),
  1050  		bytes.Repeat([]byte{'d'}, 120000),
  1051  		bytes.Repeat([]byte{'e'}, 30000),
  1052  	}
  1053  loop:
  1054  	for i := 0; i < 1<<uint(len(inputs)); i++ {
  1055  		var want []int
  1056  		buf := new(bytes.Buffer)
  1057  		w := NewBufferedWriter(buf)
  1058  		p := make([]byte, binary.MaxVarintLen64)
  1059  		for j, input := range inputs {
  1060  			if i&(1<<uint(j)) == 0 {
  1061  				continue
  1062  			}
  1063  			n := binary.PutUvarint(p, uint64(len(input)))
  1064  			if _, err := w.Write(p[:n]); err != nil {
  1065  				t.Errorf("i=%#02x: j=%d: Write Uvarint: %v", i, j, err)
  1066  				continue loop
  1067  			}
  1068  			if _, err := w.Write(input); err != nil {
  1069  				t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
  1070  				continue loop
  1071  			}
  1072  			want = append(want, j)
  1073  		}
  1074  		if err := w.Close(); err != nil {
  1075  			t.Errorf("i=%#02x: Close: %v", i, err)
  1076  			continue
  1077  		}
  1078  		r := NewReader(buf)
  1079  		for _, j := range want {
  1080  			size, err := binary.ReadUvarint(r)
  1081  			if err != nil {
  1082  				t.Errorf("i=%#02x: ReadUvarint: %v", i, err)
  1083  				continue loop
  1084  			}
  1085  			if wantedSize := uint64(len(inputs[j])); size != wantedSize {
  1086  				t.Errorf("i=%#02x: expected size %d, got %d", i, wantedSize, size)
  1087  				continue loop
  1088  			}
  1089  			got := make([]byte, size)
  1090  			if _, err := io.ReadFull(r, got); err != nil {
  1091  				t.Errorf("i=%#02x: ReadFull: %v", i, err)
  1092  				continue loop
  1093  			}
  1094  			if err := cmp(got, inputs[j]); err != nil {
  1095  				t.Errorf("i=%#02x: %v", i, err)
  1096  				continue
  1097  			}
  1098  		}
  1099  		if _, err := r.ReadByte(); err != io.EOF {
  1100  			t.Errorf("i=%#02x: expected size EOF, got %v", i, err)
  1101  		}
  1102  	}
  1103  }
  1104  
  1105  func TestWriterReset(t *testing.T) {
  1106  	gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
  1107  	const n = 20
  1108  	for _, buffered := range []bool{false, true} {
  1109  		var w *Writer
  1110  		if buffered {
  1111  			w = NewBufferedWriter(nil)
  1112  			defer w.Close()
  1113  		} else {
  1114  			w = NewWriter(nil)
  1115  		}
  1116  
  1117  		var gots, wants [][]byte
  1118  		failed := false
  1119  		for i := 0; i <= n; i++ {
  1120  			buf := new(bytes.Buffer)
  1121  			w.Reset(buf)
  1122  			want := gold[:len(gold)*i/n]
  1123  			if _, err := w.Write(want); err != nil {
  1124  				t.Errorf("#%d: Write: %v", i, err)
  1125  				failed = true
  1126  				continue
  1127  			}
  1128  			if buffered {
  1129  				if err := w.Flush(); err != nil {
  1130  					t.Errorf("#%d: Flush: %v", i, err)
  1131  					failed = true
  1132  					continue
  1133  				}
  1134  			}
  1135  			got, err := ioutil.ReadAll(NewReader(buf))
  1136  			if err != nil {
  1137  				t.Errorf("#%d: ReadAll: %v", i, err)
  1138  				failed = true
  1139  				continue
  1140  			}
  1141  			gots = append(gots, got)
  1142  			wants = append(wants, want)
  1143  		}
  1144  		if failed {
  1145  			continue
  1146  		}
  1147  		for i := range gots {
  1148  			if err := cmp(gots[i], wants[i]); err != nil {
  1149  				t.Errorf("#%d: %v", i, err)
  1150  			}
  1151  		}
  1152  	}
  1153  }
  1154  
  1155  func TestWriterResetWithoutFlush(t *testing.T) {
  1156  	buf0 := new(bytes.Buffer)
  1157  	buf1 := new(bytes.Buffer)
  1158  	w := NewBufferedWriter(buf0)
  1159  	if _, err := w.Write([]byte("xxx")); err != nil {
  1160  		t.Fatalf("Write #0: %v", err)
  1161  	}
  1162  	// Note that we don't Flush the Writer before calling Reset.
  1163  	w.Reset(buf1)
  1164  	if _, err := w.Write([]byte("yyy")); err != nil {
  1165  		t.Fatalf("Write #1: %v", err)
  1166  	}
  1167  	if err := w.Flush(); err != nil {
  1168  		t.Fatalf("Flush: %v", err)
  1169  	}
  1170  	got, err := ioutil.ReadAll(NewReader(buf1))
  1171  	if err != nil {
  1172  		t.Fatalf("ReadAll: %v", err)
  1173  	}
  1174  	if err := cmp(got, []byte("yyy")); err != nil {
  1175  		t.Fatal(err)
  1176  	}
  1177  }
  1178  
  1179  type writeCounter int
  1180  
  1181  func (c *writeCounter) Write(p []byte) (int, error) {
  1182  	*c++
  1183  	return len(p), nil
  1184  }
  1185  
  1186  // TestNumUnderlyingWrites tests that each Writer flush only makes one or two
  1187  // Write calls on its underlying io.Writer, depending on whether or not the
  1188  // flushed buffer was compressible.
  1189  func TestNumUnderlyingWrites(t *testing.T) {
  1190  	testCases := []struct {
  1191  		input []byte
  1192  		want  int
  1193  	}{
  1194  		{bytes.Repeat([]byte{'x'}, 100), 1},
  1195  		{bytes.Repeat([]byte{'y'}, 100), 1},
  1196  		{[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
  1197  	}
  1198  
  1199  	var c writeCounter
  1200  	w := NewBufferedWriter(&c)
  1201  	defer w.Close()
  1202  	for i, tc := range testCases {
  1203  		c = 0
  1204  		if _, err := w.Write(tc.input); err != nil {
  1205  			t.Errorf("#%d: Write: %v", i, err)
  1206  			continue
  1207  		}
  1208  		if err := w.Flush(); err != nil {
  1209  			t.Errorf("#%d: Flush: %v", i, err)
  1210  			continue
  1211  		}
  1212  		if int(c) != tc.want {
  1213  			t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
  1214  			continue
  1215  		}
  1216  	}
  1217  }
  1218  
  1219  func benchDecode(b *testing.B, src []byte) {
  1220  	encoded := Encode(nil, src)
  1221  	// Bandwidth is in amount of uncompressed data.
  1222  	b.SetBytes(int64(len(src)))
  1223  	b.ResetTimer()
  1224  	for i := 0; i < b.N; i++ {
  1225  		Decode(src, encoded)
  1226  	}
  1227  }
  1228  
  1229  func benchEncode(b *testing.B, src []byte) {
  1230  	// Bandwidth is in amount of uncompressed data.
  1231  	b.SetBytes(int64(len(src)))
  1232  	dst := make([]byte, MaxEncodedLen(len(src)))
  1233  	b.ResetTimer()
  1234  	for i := 0; i < b.N; i++ {
  1235  		Encode(dst, src)
  1236  	}
  1237  }
  1238  
  1239  func testOrBenchmark(b testing.TB) string {
  1240  	if _, ok := b.(*testing.B); ok {
  1241  		return "benchmark"
  1242  	}
  1243  	return "test"
  1244  }
  1245  
  1246  func readFile(b testing.TB, filename string) []byte {
  1247  	src, err := ioutil.ReadFile(filename)
  1248  	if err != nil {
  1249  		b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
  1250  	}
  1251  	if len(src) == 0 {
  1252  		b.Fatalf("%s has zero length", filename)
  1253  	}
  1254  	return src
  1255  }
  1256  
  1257  // expand returns a slice of length n containing repeated copies of src.
  1258  func expand(src []byte, n int) []byte {
  1259  	dst := make([]byte, n)
  1260  	for x := dst; len(x) > 0; {
  1261  		i := copy(x, src)
  1262  		x = x[i:]
  1263  	}
  1264  	return dst
  1265  }
  1266  
  1267  func benchWords(b *testing.B, n int, decode bool) {
  1268  	// Note: the file is OS-language dependent so the resulting values are not
  1269  	// directly comparable for non-US-English OS installations.
  1270  	data := expand(readFile(b, "/usr/share/dict/words"), n)
  1271  	if decode {
  1272  		benchDecode(b, data)
  1273  	} else {
  1274  		benchEncode(b, data)
  1275  	}
  1276  }
  1277  
  1278  func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
  1279  func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
  1280  func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
  1281  func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
  1282  func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
  1283  func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
  1284  func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
  1285  func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
  1286  func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
  1287  func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
  1288  func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
  1289  func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
  1290  
  1291  func BenchmarkRandomEncode(b *testing.B) {
  1292  	rng := rand.New(rand.NewSource(1))
  1293  	data := make([]byte, 1<<20)
  1294  	for i := range data {
  1295  		data[i] = uint8(rng.Intn(256))
  1296  	}
  1297  	benchEncode(b, data)
  1298  }
  1299  
  1300  // testFiles' values are copied directly from
  1301  // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
  1302  // The label field is unused in snappy-go.
  1303  var testFiles = []struct {
  1304  	label     string
  1305  	filename  string
  1306  	sizeLimit int
  1307  }{
  1308  	{"html", "html", 0},
  1309  	{"urls", "urls.10K", 0},
  1310  	{"jpg", "fireworks.jpeg", 0},
  1311  	{"jpg_200", "fireworks.jpeg", 200},
  1312  	{"pdf", "paper-100k.pdf", 0},
  1313  	{"html4", "html_x_4", 0},
  1314  	{"txt1", "alice29.txt", 0},
  1315  	{"txt2", "asyoulik.txt", 0},
  1316  	{"txt3", "lcet10.txt", 0},
  1317  	{"txt4", "plrabn12.txt", 0},
  1318  	{"pb", "geo.protodata", 0},
  1319  	{"gaviota", "kppkn.gtb", 0},
  1320  }
  1321  
  1322  const (
  1323  	// The benchmark data files are at this canonical URL.
  1324  	benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
  1325  )
  1326  
  1327  func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
  1328  	bDir := filepath.FromSlash(*benchdataDir)
  1329  	filename := filepath.Join(bDir, basename)
  1330  	if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
  1331  		return nil
  1332  	}
  1333  
  1334  	if !*download {
  1335  		b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
  1336  	}
  1337  	// Download the official snappy C++ implementation reference test data
  1338  	// files for benchmarking.
  1339  	if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) {
  1340  		return fmt.Errorf("failed to create %s: %s", bDir, err)
  1341  	}
  1342  
  1343  	f, err := os.Create(filename)
  1344  	if err != nil {
  1345  		return fmt.Errorf("failed to create %s: %s", filename, err)
  1346  	}
  1347  	defer f.Close()
  1348  	defer func() {
  1349  		if errRet != nil {
  1350  			os.Remove(filename)
  1351  		}
  1352  	}()
  1353  	url := benchURL + basename
  1354  	resp, err := http.Get(url)
  1355  	if err != nil {
  1356  		return fmt.Errorf("failed to download %s: %s", url, err)
  1357  	}
  1358  	defer resp.Body.Close()
  1359  	if s := resp.StatusCode; s != http.StatusOK {
  1360  		return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
  1361  	}
  1362  	_, err = io.Copy(f, resp.Body)
  1363  	if err != nil {
  1364  		return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
  1365  	}
  1366  	return nil
  1367  }
  1368  
  1369  func benchFile(b *testing.B, i int, decode bool) {
  1370  	if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
  1371  		b.Fatalf("failed to download testdata: %s", err)
  1372  	}
  1373  	bDir := filepath.FromSlash(*benchdataDir)
  1374  	data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
  1375  	if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1376  		data = data[:n]
  1377  	}
  1378  	if decode {
  1379  		benchDecode(b, data)
  1380  	} else {
  1381  		benchEncode(b, data)
  1382  	}
  1383  }
  1384  
  1385  // Naming convention is kept similar to what snappy's C++ implementation uses.
  1386  func Benchmark_UFlat0(b *testing.B)  { benchFile(b, 0, true) }
  1387  func Benchmark_UFlat1(b *testing.B)  { benchFile(b, 1, true) }
  1388  func Benchmark_UFlat2(b *testing.B)  { benchFile(b, 2, true) }
  1389  func Benchmark_UFlat3(b *testing.B)  { benchFile(b, 3, true) }
  1390  func Benchmark_UFlat4(b *testing.B)  { benchFile(b, 4, true) }
  1391  func Benchmark_UFlat5(b *testing.B)  { benchFile(b, 5, true) }
  1392  func Benchmark_UFlat6(b *testing.B)  { benchFile(b, 6, true) }
  1393  func Benchmark_UFlat7(b *testing.B)  { benchFile(b, 7, true) }
  1394  func Benchmark_UFlat8(b *testing.B)  { benchFile(b, 8, true) }
  1395  func Benchmark_UFlat9(b *testing.B)  { benchFile(b, 9, true) }
  1396  func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
  1397  func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
  1398  func Benchmark_ZFlat0(b *testing.B)  { benchFile(b, 0, false) }
  1399  func Benchmark_ZFlat1(b *testing.B)  { benchFile(b, 1, false) }
  1400  func Benchmark_ZFlat2(b *testing.B)  { benchFile(b, 2, false) }
  1401  func Benchmark_ZFlat3(b *testing.B)  { benchFile(b, 3, false) }
  1402  func Benchmark_ZFlat4(b *testing.B)  { benchFile(b, 4, false) }
  1403  func Benchmark_ZFlat5(b *testing.B)  { benchFile(b, 5, false) }
  1404  func Benchmark_ZFlat6(b *testing.B)  { benchFile(b, 6, false) }
  1405  func Benchmark_ZFlat7(b *testing.B)  { benchFile(b, 7, false) }
  1406  func Benchmark_ZFlat8(b *testing.B)  { benchFile(b, 8, false) }
  1407  func Benchmark_ZFlat9(b *testing.B)  { benchFile(b, 9, false) }
  1408  func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
  1409  func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }
  1410  
  1411  func BenchmarkExtendMatch(b *testing.B) {
  1412  	tDir := filepath.FromSlash(*testdataDir)
  1413  	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  1414  	if err != nil {
  1415  		b.Fatalf("ReadFile: %v", err)
  1416  	}
  1417  	b.ResetTimer()
  1418  	for i := 0; i < b.N; i++ {
  1419  		for _, tc := range extendMatchGoldenTestCases {
  1420  			extendMatch(src, tc.i, tc.j)
  1421  		}
  1422  	}
  1423  }
  1424
View as plain text