...

Source file src/github.com/klauspost/compress/gzip/gzip_test.go

Documentation: github.com/klauspost/compress/gzip

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gzip
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"fmt"
    11  	"io"
    12  	"math/rand"
    13  	"os"
    14  	"strconv"
    15  	"strings"
    16  	"testing"
    17  	"time"
    18  )
    19  
    20  // TestEmpty tests that an empty payload still forms a valid GZIP stream.
    21  func TestEmpty(t *testing.T) {
    22  	buf := new(bytes.Buffer)
    23  
    24  	if err := NewWriter(buf).Close(); err != nil {
    25  		t.Fatalf("Writer.Close: %v", err)
    26  	}
    27  
    28  	r, err := NewReader(buf)
    29  	if err != nil {
    30  		t.Fatalf("NewReader: %v", err)
    31  	}
    32  	b, err := io.ReadAll(r)
    33  	if err != nil {
    34  		t.Fatalf("ReadAll: %v", err)
    35  	}
    36  	if len(b) != 0 {
    37  		t.Fatalf("got %d bytes, want 0", len(b))
    38  	}
    39  	if err := r.Close(); err != nil {
    40  		t.Fatalf("Reader.Close: %v", err)
    41  	}
    42  }
    43  
    44  // TestRoundTrip tests that gzipping and then gunzipping is the identity
    45  // function.
    46  func TestRoundTrip(t *testing.T) {
    47  	buf := new(bytes.Buffer)
    48  
    49  	w := NewWriter(buf)
    50  	w.Comment = "comment"
    51  	w.Extra = []byte("extra")
    52  	w.ModTime = time.Unix(1e8, 0)
    53  	w.Name = "name"
    54  	if _, err := w.Write([]byte("payload")); err != nil {
    55  		t.Fatalf("Write: %v", err)
    56  	}
    57  	if err := w.Close(); err != nil {
    58  		t.Fatalf("Writer.Close: %v", err)
    59  	}
    60  
    61  	r, err := NewReader(buf)
    62  	if err != nil {
    63  		t.Fatalf("NewReader: %v", err)
    64  	}
    65  	b, err := io.ReadAll(r)
    66  	if err != nil {
    67  		t.Fatalf("ReadAll: %v", err)
    68  	}
    69  	if string(b) != "payload" {
    70  		t.Fatalf("payload is %q, want %q", string(b), "payload")
    71  	}
    72  	if r.Comment != "comment" {
    73  		t.Fatalf("comment is %q, want %q", r.Comment, "comment")
    74  	}
    75  	if string(r.Extra) != "extra" {
    76  		t.Fatalf("extra is %q, want %q", r.Extra, "extra")
    77  	}
    78  	if r.ModTime.Unix() != 1e8 {
    79  		t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8))
    80  	}
    81  	if r.Name != "name" {
    82  		t.Fatalf("name is %q, want %q", r.Name, "name")
    83  	}
    84  	if err := r.Close(); err != nil {
    85  		t.Fatalf("Reader.Close: %v", err)
    86  	}
    87  }
    88  
    89  // TestLatin1 tests the internal functions for converting to and from Latin-1.
    90  func TestLatin1(t *testing.T) {
    91  	latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0}
    92  	utf8 := "Äußerung"
    93  	z := Reader{r: bufio.NewReader(bytes.NewReader(latin1))}
    94  	s, err := z.readString()
    95  	if err != nil {
    96  		t.Fatalf("readString: %v", err)
    97  	}
    98  	if s != utf8 {
    99  		t.Fatalf("read latin-1: got %q, want %q", s, utf8)
   100  	}
   101  
   102  	buf := bytes.NewBuffer(make([]byte, 0, len(latin1)))
   103  	c := Writer{w: buf}
   104  	if err = c.writeString(utf8); err != nil {
   105  		t.Fatalf("writeString: %v", err)
   106  	}
   107  	s = buf.String()
   108  	if s != string(latin1) {
   109  		t.Fatalf("write utf-8: got %q, want %q", s, string(latin1))
   110  	}
   111  }
   112  
   113  // TestLatin1RoundTrip tests that metadata that is representable in Latin-1
   114  // survives a round trip.
   115  func TestLatin1RoundTrip(t *testing.T) {
   116  	testCases := []struct {
   117  		name string
   118  		ok   bool
   119  	}{
   120  		{"", true},
   121  		{"ASCII is OK", true},
   122  		{"unless it contains a NUL\x00", false},
   123  		{"no matter where \x00 occurs", false},
   124  		{"\x00\x00\x00", false},
   125  		{"Látin-1 also passes (U+00E1)", true},
   126  		{"but LĀtin Extended-A (U+0100) does not", false},
   127  		{"neither does 日本語", false},
   128  		{"invalid UTF-8 also \xffails", false},
   129  		{"\x00 as does Látin-1 with NUL", false},
   130  	}
   131  	for _, tc := range testCases {
   132  		buf := new(bytes.Buffer)
   133  
   134  		w := NewWriter(buf)
   135  		w.Name = tc.name
   136  		err := w.Close()
   137  		if (err == nil) != tc.ok {
   138  			t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err)
   139  			continue
   140  		}
   141  		if !tc.ok {
   142  			continue
   143  		}
   144  
   145  		r, err := NewReader(buf)
   146  		if err != nil {
   147  			t.Errorf("NewReader: %v", err)
   148  			continue
   149  		}
   150  		_, err = io.ReadAll(r)
   151  		if err != nil {
   152  			t.Errorf("ReadAll: %v", err)
   153  			continue
   154  		}
   155  		if r.Name != tc.name {
   156  			t.Errorf("name is %q, want %q", r.Name, tc.name)
   157  			continue
   158  		}
   159  		if err := r.Close(); err != nil {
   160  			t.Errorf("Reader.Close: %v", err)
   161  			continue
   162  		}
   163  	}
   164  }
   165  
   166  func TestWriterFlush(t *testing.T) {
   167  	buf := new(bytes.Buffer)
   168  
   169  	w := NewWriter(buf)
   170  	w.Comment = "comment"
   171  	w.Extra = []byte("extra")
   172  	w.ModTime = time.Unix(1e8, 0)
   173  	w.Name = "name"
   174  
   175  	n0 := buf.Len()
   176  	if n0 != 0 {
   177  		t.Fatalf("buffer size = %d before writes; want 0", n0)
   178  	}
   179  
   180  	if err := w.Flush(); err != nil {
   181  		t.Fatal(err)
   182  	}
   183  
   184  	n1 := buf.Len()
   185  	if n1 == 0 {
   186  		t.Fatal("no data after first flush")
   187  	}
   188  
   189  	w.Write([]byte("x"))
   190  
   191  	n2 := buf.Len()
   192  	if n1 != n2 {
   193  		t.Fatalf("after writing a single byte, size changed from %d to %d; want no change", n1, n2)
   194  	}
   195  
   196  	if err := w.Flush(); err != nil {
   197  		t.Fatal(err)
   198  	}
   199  
   200  	n3 := buf.Len()
   201  	if n2 == n3 {
   202  		t.Fatal("Flush didn't flush any data")
   203  	}
   204  }
   205  
   206  // Multiple gzip files concatenated form a valid gzip file.
   207  func TestConcat(t *testing.T) {
   208  	var buf bytes.Buffer
   209  	w := NewWriter(&buf)
   210  	w.Write([]byte("hello "))
   211  	w.Close()
   212  	w = NewWriter(&buf)
   213  	w.Write([]byte("world\n"))
   214  	w.Close()
   215  
   216  	r, err := NewReader(&buf)
   217  	if err != nil {
   218  		t.Fatal(err)
   219  	}
   220  	data, err := io.ReadAll(r)
   221  	if string(data) != "hello world\n" || err != nil {
   222  		t.Fatalf("ReadAll = %q, %v, want %q, nil", data, err, "hello world")
   223  	}
   224  }
   225  
   226  func TestWriterReset(t *testing.T) {
   227  	buf := new(bytes.Buffer)
   228  	buf2 := new(bytes.Buffer)
   229  	z := NewWriter(buf)
   230  	msg := []byte("hello world")
   231  	z.Write(msg)
   232  	z.Close()
   233  	z.Reset(buf2)
   234  	z.Write(msg)
   235  	z.Close()
   236  	if buf.String() != buf2.String() {
   237  		t.Errorf("buf2 %q != original buf of %q", buf2.String(), buf.String())
   238  	}
   239  }
   240  
   241  var testbuf []byte
   242  
   243  func testFile(i, level int, t *testing.T) {
   244  	dat, _ := os.ReadFile("testdata/test.json")
   245  	dl := len(dat)
   246  	if len(testbuf) != i*dl {
   247  		// Make results predictable
   248  		testbuf = make([]byte, i*dl)
   249  		for j := 0; j < i; j++ {
   250  			copy(testbuf[j*dl:j*dl+dl], dat)
   251  		}
   252  	}
   253  
   254  	br := bytes.NewBuffer(testbuf)
   255  	var buf bytes.Buffer
   256  	w, err := NewWriterLevel(&buf, level)
   257  	if err != nil {
   258  		t.Fatal(err)
   259  	}
   260  	n, err := io.Copy(w, br)
   261  	if err != nil {
   262  		t.Fatal(err)
   263  	}
   264  	if int(n) != len(testbuf) {
   265  		t.Fatal("Short write:", n, "!=", testbuf)
   266  	}
   267  	err = w.Close()
   268  	if err != nil {
   269  		t.Fatal(err)
   270  	}
   271  	r, err := NewReader(&buf)
   272  	if err != nil {
   273  		t.Fatal(err.Error())
   274  	}
   275  	decoded, err := io.ReadAll(r)
   276  	if err != nil {
   277  		t.Fatal(err.Error())
   278  	}
   279  	if !bytes.Equal(testbuf, decoded) {
   280  		t.Errorf("decoded content does not match.")
   281  	}
   282  }
   283  
   284  func TestFile1xM3(t *testing.T) { testFile(1, -3, t) }
   285  func TestFile1xM2(t *testing.T) { testFile(1, -2, t) }
   286  func TestFile1xM1(t *testing.T) { testFile(1, -1, t) }
   287  func TestFile1x0(t *testing.T)  { testFile(1, 0, t) }
   288  func TestFile1x1(t *testing.T)  { testFile(1, 1, t) }
   289  func TestFile1x2(t *testing.T)  { testFile(1, 2, t) }
   290  func TestFile1x3(t *testing.T)  { testFile(1, 3, t) }
   291  func TestFile1x4(t *testing.T)  { testFile(1, 4, t) }
   292  func TestFile1x5(t *testing.T)  { testFile(1, 5, t) }
   293  func TestFile1x6(t *testing.T)  { testFile(1, 6, t) }
   294  func TestFile1x7(t *testing.T)  { testFile(1, 7, t) }
   295  func TestFile1x8(t *testing.T)  { testFile(1, 8, t) }
   296  func TestFile1x9(t *testing.T)  { testFile(1, 9, t) }
   297  func TestFile10(t *testing.T)   { testFile(10, DefaultCompression, t) }
   298  
   299  func TestFile50(t *testing.T) {
   300  	if testing.Short() {
   301  		t.Skip("skipping during short test")
   302  	}
   303  	testFile(50, DefaultCompression, t)
   304  }
   305  
   306  func TestFile200(t *testing.T) {
   307  	if testing.Short() {
   308  		t.Skip("skipping during short test")
   309  	}
   310  	testFile(200, BestSpeed, t)
   311  }
   312  
   313  func TestFileWindow(t *testing.T) {
   314  	for sz := MinCustomWindowSize; sz <= MaxCustomWindowSize; sz *= 2 {
   315  		t.Run(fmt.Sprint(sz), func(t *testing.T) {
   316  			testFileWindow(1, sz, t)
   317  		})
   318  	}
   319  }
   320  
   321  func testFileWindow(i, window int, t *testing.T) {
   322  	dat, _ := os.ReadFile("testdata/test.json")
   323  	dl := len(dat)
   324  	if len(testbuf) != i*dl {
   325  		// Make results predictable
   326  		testbuf = make([]byte, i*dl)
   327  		for j := 0; j < i; j++ {
   328  			copy(testbuf[j*dl:j*dl+dl], dat)
   329  		}
   330  	}
   331  
   332  	br := bytes.NewBuffer(testbuf)
   333  	var buf bytes.Buffer
   334  	w, err := NewWriterWindow(&buf, window)
   335  	if err != nil {
   336  		t.Fatal(err)
   337  	}
   338  	n, err := io.Copy(w, br)
   339  	if err != nil {
   340  		t.Fatal(err)
   341  	}
   342  	if int(n) != len(testbuf) {
   343  		t.Fatal("Short write:", n, "!=", testbuf)
   344  	}
   345  	err = w.Close()
   346  	if err != nil {
   347  		t.Fatal(err)
   348  	}
   349  	t.Logf("size: %d bytes", buf.Len())
   350  	r, err := NewReader(&buf)
   351  	if err != nil {
   352  		t.Fatal(err.Error())
   353  	}
   354  	decoded, err := io.ReadAll(r)
   355  	if err != nil {
   356  		t.Fatal(err.Error())
   357  	}
   358  	if !bytes.Equal(testbuf, decoded) {
   359  		t.Errorf("decoded content does not match.")
   360  	}
   361  }
   362  
   363  func testBigGzip(i int, t *testing.T) {
   364  	if len(testbuf) != i {
   365  		// Make results predictable
   366  		rand.Seed(1337)
   367  		testbuf = make([]byte, i)
   368  		for idx := range testbuf {
   369  			testbuf[idx] = byte(65 + rand.Intn(20))
   370  		}
   371  	}
   372  	c := BestCompression
   373  	if testing.Short() {
   374  		c = BestSpeed
   375  	}
   376  
   377  	br := bytes.NewBuffer(testbuf)
   378  	var buf bytes.Buffer
   379  	w, err := NewWriterLevel(&buf, c)
   380  	if err != nil {
   381  		t.Fatal(err)
   382  	}
   383  	n, err := io.Copy(w, br)
   384  	if err != nil {
   385  		t.Fatal(err)
   386  	}
   387  	if int(n) != len(testbuf) {
   388  		t.Fatal("Short write:", n, "!=", len(testbuf))
   389  	}
   390  	err = w.Close()
   391  	if err != nil {
   392  		t.Fatal(err.Error())
   393  	}
   394  
   395  	r, err := NewReader(&buf)
   396  	if err != nil {
   397  		t.Fatal(err.Error())
   398  	}
   399  	decoded, err := io.ReadAll(r)
   400  	if err != nil {
   401  		t.Fatal(err.Error())
   402  	}
   403  	if !bytes.Equal(testbuf, decoded) {
   404  		t.Errorf("decoded content does not match.")
   405  	}
   406  }
   407  
   408  func TestGzip1K(t *testing.T)   { testBigGzip(1000, t) }
   409  func TestGzip100K(t *testing.T) { testBigGzip(100000, t) }
   410  func TestGzip1M(t *testing.T) {
   411  	if testing.Short() {
   412  		t.Skip("skipping during short test")
   413  	}
   414  
   415  	testBigGzip(1000000, t)
   416  }
   417  func TestGzip10M(t *testing.T) {
   418  	if testing.Short() {
   419  		t.Skip("skipping during short test")
   420  	}
   421  	testBigGzip(10000000, t)
   422  }
   423  
   424  // Test if two runs produce identical results.
   425  func TestDeterministicLM2(t *testing.T) { testDeterm(-2, t) }
   426  
   427  // Level 0 is not deterministic since it depends on the size of each write.
   428  // func TestDeterministicL0(t *testing.T)  { testDeterm(0, t) }
   429  func TestDeterministicL1(t *testing.T) { testDeterm(1, t) }
   430  func TestDeterministicL2(t *testing.T) { testDeterm(2, t) }
   431  func TestDeterministicL3(t *testing.T) { testDeterm(3, t) }
   432  func TestDeterministicL4(t *testing.T) { testDeterm(4, t) }
   433  func TestDeterministicL5(t *testing.T) { testDeterm(5, t) }
   434  func TestDeterministicL6(t *testing.T) { testDeterm(6, t) }
   435  func TestDeterministicL7(t *testing.T) { testDeterm(7, t) }
   436  func TestDeterministicL8(t *testing.T) { testDeterm(8, t) }
   437  func TestDeterministicL9(t *testing.T) { testDeterm(9, t) }
   438  
   439  func testDeterm(level int, t *testing.T) {
   440  	var length = 500000
   441  	if testing.Short() {
   442  		length = 100000
   443  	}
   444  	rand.Seed(1337)
   445  	t1 := make([]byte, length)
   446  	for idx := range t1 {
   447  		t1[idx] = byte(65 + rand.Intn(8))
   448  	}
   449  
   450  	br := bytes.NewBuffer(t1)
   451  	var b1 bytes.Buffer
   452  	w, err := NewWriterLevel(&b1, level)
   453  	if err != nil {
   454  		t.Fatal(err)
   455  	}
   456  	_, err = io.Copy(w, br)
   457  	if err != nil {
   458  		t.Fatal(err)
   459  	}
   460  	w.Flush()
   461  	w.Close()
   462  
   463  	// We recreate the buffer, so we have a goos chance of getting a
   464  	// different memory address.
   465  	rand.Seed(1337)
   466  	t2 := make([]byte, length)
   467  	for idx := range t2 {
   468  		t2[idx] = byte(65 + rand.Intn(8))
   469  	}
   470  
   471  	br2 := bytes.NewBuffer(t2)
   472  	var b2 bytes.Buffer
   473  	w2, err := NewWriterLevel(&b2, level)
   474  	if err != nil {
   475  		t.Fatal(err)
   476  	}
   477  
   478  	// We write the same data, but with a different size than
   479  	// the default copy.
   480  	for {
   481  		_, err = io.CopyN(w2, br2, 1234)
   482  		if err == io.EOF {
   483  			err = nil
   484  			break
   485  		} else if err != nil {
   486  			break
   487  		}
   488  	}
   489  	if err != nil {
   490  		t.Fatal(err)
   491  	}
   492  	w2.Flush()
   493  	w2.Close()
   494  
   495  	b1b := b1.Bytes()
   496  	b2b := b2.Bytes()
   497  
   498  	if !bytes.Equal(b1b, b2b) {
   499  		t.Fatalf("Level %d did not produce deterministric result, len(a) = %d, len(b) = %d", level, len(b1b), len(b2b))
   500  	}
   501  }
   502  
   503  func BenchmarkGzipLM3(b *testing.B) { benchmarkGzipN(b, -3) }
   504  func BenchmarkGzipLM2(b *testing.B) { benchmarkGzipN(b, -2) }
   505  func BenchmarkGzipL1(b *testing.B)  { benchmarkGzipN(b, 1) }
   506  func BenchmarkGzipL2(b *testing.B)  { benchmarkGzipN(b, 2) }
   507  func BenchmarkGzipL3(b *testing.B)  { benchmarkGzipN(b, 3) }
   508  func BenchmarkGzipL4(b *testing.B)  { benchmarkGzipN(b, 4) }
   509  func BenchmarkGzipL5(b *testing.B)  { benchmarkGzipN(b, 5) }
   510  func BenchmarkGzipL6(b *testing.B)  { benchmarkGzipN(b, 6) }
   511  func BenchmarkGzipL7(b *testing.B)  { benchmarkGzipN(b, 7) }
   512  func BenchmarkGzipL8(b *testing.B)  { benchmarkGzipN(b, 8) }
   513  func BenchmarkGzipL9(b *testing.B)  { benchmarkGzipN(b, 9) }
   514  
   515  func benchmarkGzipN(b *testing.B, level int) {
   516  	dat, _ := os.ReadFile("testdata/test.json")
   517  	dat = append(dat, dat...)
   518  	dat = append(dat, dat...)
   519  	dat = append(dat, dat...)
   520  	dat = append(dat, dat...)
   521  	dat = append(dat, dat...)
   522  	b.SetBytes(int64(len(dat)))
   523  	w, _ := NewWriterLevel(io.Discard, level)
   524  	b.ResetTimer()
   525  	for n := 0; n < b.N; n++ {
   526  		w.Reset(io.Discard)
   527  		n, err := w.Write(dat)
   528  		if n != len(dat) {
   529  			panic("short write")
   530  		}
   531  		if err != nil {
   532  			panic(err)
   533  		}
   534  		err = w.Close()
   535  		if err != nil {
   536  			panic(err)
   537  		}
   538  	}
   539  }
   540  
   541  /*
   542  func BenchmarkOldGzipL1(b *testing.B) { benchmarkOldGzipN(b, 1) }
   543  func BenchmarkOldGzipL2(b *testing.B) { benchmarkOldGzipN(b, 2) }
   544  func BenchmarkOldGzipL3(b *testing.B) { benchmarkOldGzipN(b, 3) }
   545  func BenchmarkOldGzipL4(b *testing.B) { benchmarkOldGzipN(b, 4) }
   546  func BenchmarkOldGzipL5(b *testing.B) { benchmarkOldGzipN(b, 5) }
   547  func BenchmarkOldGzipL6(b *testing.B) { benchmarkOldGzipN(b, 6) }
   548  func BenchmarkOldGzipL7(b *testing.B) { benchmarkOldGzipN(b, 7) }
   549  func BenchmarkOldGzipL8(b *testing.B) { benchmarkOldGzipN(b, 8) }
   550  func BenchmarkOldGzipL9(b *testing.B) { benchmarkOldGzipN(b, 9) }
   551  
   552  func benchmarkOldGzipN(b *testing.B, level int) {
   553  	dat, _ := os.ReadFile("testdata/test.json")
   554  	dat = append(dat, dat...)
   555  	dat = append(dat, dat...)
   556  	dat = append(dat, dat...)
   557  	dat = append(dat, dat...)
   558  	dat = append(dat, dat...)
   559  
   560  	b.SetBytes(int64(len(dat)))
   561  	w, _ := oldgz.NewWriterLevel(io.Discard, level)
   562  	b.ResetTimer()
   563  	for n := 0; n < b.N; n++ {
   564  		w.Reset(io.Discard)
   565  		n, err := w.Write(dat)
   566  		if n != len(dat) {
   567  			panic("short write")
   568  		}
   569  		if err != nil {
   570  			panic(err)
   571  		}
   572  		err = w.Close()
   573  		if err != nil {
   574  			panic(err)
   575  		}
   576  	}
   577  }
   578  
   579  */
   580  
   581  func BenchmarkCompressAllocations(b *testing.B) {
   582  	payload := []byte(strings.Repeat("Tiny payload", 20))
   583  	for j := -2; j <= 9; j++ {
   584  		b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) {
   585  			b.Run("gzip", func(b *testing.B) {
   586  				b.ReportAllocs()
   587  
   588  				for i := 0; i < b.N; i++ {
   589  					w, err := NewWriterLevel(io.Discard, j)
   590  					if err != nil {
   591  						b.Fatal(err)
   592  					}
   593  					w.Write(payload)
   594  					w.Close()
   595  				}
   596  			})
   597  		})
   598  	}
   599  }
   600  
   601  func BenchmarkCompressAllocationsSingle(b *testing.B) {
   602  	payload := []byte(strings.Repeat("Tiny payload", 20))
   603  	const level = 2
   604  
   605  	b.Run("gzip", func(b *testing.B) {
   606  		b.ReportAllocs()
   607  
   608  		for i := 0; i < b.N; i++ {
   609  			w, err := NewWriterLevel(io.Discard, level)
   610  			if err != nil {
   611  				b.Fatal(err)
   612  			}
   613  			w.Write(payload)
   614  			w.Close()
   615  		}
   616  	})
   617  }
   618  

View as plain text