...

Source file src/github.com/klauspost/compress/s2/writer_test.go

Documentation: github.com/klauspost/compress/s2

     1  // Copyright (c) 2019+ Klaus Post. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package s2
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"math/rand"
    12  	"os"
    13  	"runtime"
    14  	"strings"
    15  	"testing"
    16  
    17  	"github.com/klauspost/compress/internal/snapref"
    18  	"github.com/klauspost/compress/zip"
    19  )
    20  
    21  func testOptions(_ testing.TB) map[string][]WriterOption {
    22  	var testOptions = map[string][]WriterOption{
    23  		"default": {WriterAddIndex()},
    24  		"better":  {WriterBetterCompression()},
    25  		"best":    {WriterBestCompression()},
    26  		"none":    {WriterUncompressed()},
    27  	}
    28  
    29  	x := make(map[string][]WriterOption)
    30  	cloneAdd := func(org []WriterOption, add ...WriterOption) []WriterOption {
    31  		y := make([]WriterOption, len(org)+len(add))
    32  		copy(y, org)
    33  		copy(y[len(org):], add)
    34  		return y
    35  	}
    36  	for name, opt := range testOptions {
    37  		x[name] = opt
    38  		x[name+"-c1"] = cloneAdd(opt, WriterConcurrency(1))
    39  	}
    40  	testOptions = x
    41  	x = make(map[string][]WriterOption)
    42  	for name, opt := range testOptions {
    43  		x[name] = opt
    44  		if !testing.Short() {
    45  			x[name+"-4k-win"] = cloneAdd(opt, WriterBlockSize(4<<10))
    46  			x[name+"-4M-win"] = cloneAdd(opt, WriterBlockSize(4<<20))
    47  		}
    48  	}
    49  	testOptions = x
    50  	x = make(map[string][]WriterOption)
    51  	for name, opt := range testOptions {
    52  		x[name] = opt
    53  		x[name+"-pad-min"] = cloneAdd(opt, WriterPadding(2), WriterPaddingSrc(zeroReader{}))
    54  		if !testing.Short() {
    55  			x[name+"-pad-8000"] = cloneAdd(opt, WriterPadding(8000), WriterPaddingSrc(zeroReader{}))
    56  			x[name+"-pad-max"] = cloneAdd(opt, WriterPadding(4<<20), WriterPaddingSrc(zeroReader{}))
    57  		}
    58  	}
    59  	for name, opt := range testOptions {
    60  		x[name] = opt
    61  		x[name+"-snappy"] = cloneAdd(opt, WriterSnappyCompat())
    62  		x[name+"-custom"] = cloneAdd(opt, WriterCustomEncoder(snapref.EncodeBlockInto))
    63  	}
    64  	testOptions = x
    65  	return testOptions
    66  }
    67  
    68  type zeroReader struct{}
    69  
    70  func (zeroReader) Read(p []byte) (int, error) {
    71  	for i := range p {
    72  		p[i] = 0
    73  	}
    74  	return len(p), nil
    75  }
    76  
    77  func TestEncoderRegression(t *testing.T) {
    78  	data, err := os.ReadFile("testdata/enc_regressions.zip")
    79  	if err != nil {
    80  		t.Fatal(err)
    81  	}
    82  	zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
    83  	if err != nil {
    84  		t.Fatal(err)
    85  	}
    86  	// Same as fuzz test...
    87  	test := func(t *testing.T, data []byte) {
    88  		if testing.Short() && len(data) > 10000 {
    89  			t.SkipNow()
    90  		}
    91  		var blocksTested bool
    92  		for name, opts := range testOptions(t) {
    93  			t.Run(name, func(t *testing.T) {
    94  				var buf bytes.Buffer
    95  				dec := NewReader(nil)
    96  				enc := NewWriter(&buf, opts...)
    97  
    98  				if !blocksTested {
    99  					comp := Encode(make([]byte, MaxEncodedLen(len(data))), data)
   100  					decoded, err := Decode(nil, comp)
   101  					if err != nil {
   102  						t.Error(err)
   103  						return
   104  					}
   105  					if !bytes.Equal(data, decoded) {
   106  						t.Error("block decoder mismatch")
   107  						return
   108  					}
   109  					if mel := MaxEncodedLen(len(data)); len(comp) > mel {
   110  						t.Error(fmt.Errorf("MaxEncodedLen Exceed: input: %d, mel: %d, got %d", len(data), mel, len(comp)))
   111  						return
   112  					}
   113  					comp = EncodeBetter(make([]byte, MaxEncodedLen(len(data))), data)
   114  					decoded, err = Decode(nil, comp)
   115  					if err != nil {
   116  						t.Error(err)
   117  						return
   118  					}
   119  					if !bytes.Equal(data, decoded) {
   120  						t.Error("block decoder mismatch")
   121  						return
   122  					}
   123  					if mel := MaxEncodedLen(len(data)); len(comp) > mel {
   124  						t.Error(fmt.Errorf("MaxEncodedLen Exceed: input: %d, mel: %d, got %d", len(data), mel, len(comp)))
   125  						return
   126  					}
   127  
   128  					comp = EncodeBest(make([]byte, MaxEncodedLen(len(data))), data)
   129  					decoded, err = Decode(nil, comp)
   130  					if err != nil {
   131  						t.Error(err)
   132  						return
   133  					}
   134  					if !bytes.Equal(data, decoded) {
   135  						t.Error("block decoder mismatch")
   136  						return
   137  					}
   138  					if mel := MaxEncodedLen(len(data)); len(comp) > mel {
   139  						t.Error(fmt.Errorf("MaxEncodedLen Exceed: input: %d, mel: %d, got %d", len(data), mel, len(comp)))
   140  						return
   141  					}
   142  					blocksTested = true
   143  				}
   144  
   145  				// Test writer.
   146  				n, err := enc.Write(data)
   147  				if err != nil {
   148  					t.Error(err)
   149  					return
   150  				}
   151  				if n != len(data) {
   152  					t.Error(fmt.Errorf("Write: Short write, want %d, got %d", len(data), n))
   153  					return
   154  				}
   155  				err = enc.Close()
   156  				if err != nil {
   157  					t.Error(err)
   158  					return
   159  				}
   160  				// Calling close twice should not affect anything.
   161  				err = enc.Close()
   162  				if err != nil {
   163  					t.Error(err)
   164  					return
   165  				}
   166  				comp := buf.Bytes()
   167  				if enc.pad > 0 && len(comp)%enc.pad != 0 {
   168  					t.Error(fmt.Errorf("wanted size to be mutiple of %d, got size %d with remainder %d", enc.pad, len(comp), len(comp)%enc.pad))
   169  					return
   170  				}
   171  				var got []byte
   172  				if !strings.Contains(name, "-snappy") {
   173  					dec.Reset(&buf)
   174  					got, err = io.ReadAll(dec)
   175  				} else {
   176  					sdec := snapref.NewReader(&buf)
   177  					got, err = io.ReadAll(sdec)
   178  				}
   179  				if err != nil {
   180  					t.Error(err)
   181  					return
   182  				}
   183  				if !bytes.Equal(data, got) {
   184  					t.Error("block (reset) decoder mismatch")
   185  					return
   186  				}
   187  
   188  				// Test Reset on both and use ReadFrom instead.
   189  				buf.Reset()
   190  				enc.Reset(&buf)
   191  				n2, err := enc.ReadFrom(bytes.NewBuffer(data))
   192  				if err != nil {
   193  					t.Error(err)
   194  					return
   195  				}
   196  				if n2 != int64(len(data)) {
   197  					t.Error(fmt.Errorf("ReadFrom: Short read, want %d, got %d", len(data), n2))
   198  					return
   199  				}
   200  				err = enc.Close()
   201  				if err != nil {
   202  					t.Error(err)
   203  					return
   204  				}
   205  				if enc.pad > 0 && buf.Len()%enc.pad != 0 {
   206  					t.Error(fmt.Errorf("wanted size to be mutiple of %d, got size %d with remainder %d", enc.pad, buf.Len(), buf.Len()%enc.pad))
   207  					return
   208  				}
   209  				if !strings.Contains(name, "-snappy") {
   210  					dec.Reset(&buf)
   211  					got, err = io.ReadAll(dec)
   212  				} else {
   213  					sdec := snapref.NewReader(&buf)
   214  					got, err = io.ReadAll(sdec)
   215  				}
   216  				if err != nil {
   217  					t.Error(err)
   218  					return
   219  				}
   220  				if !bytes.Equal(data, got) {
   221  					t.Error("frame (reset) decoder mismatch")
   222  					return
   223  				}
   224  			})
   225  		}
   226  	}
   227  	for _, tt := range zr.File {
   228  		if !strings.HasSuffix(t.Name(), "") {
   229  			continue
   230  		}
   231  		t.Run(tt.Name, func(t *testing.T) {
   232  			r, err := tt.Open()
   233  			if err != nil {
   234  				t.Error(err)
   235  				return
   236  			}
   237  			b, err := io.ReadAll(r)
   238  			if err != nil {
   239  				t.Error(err)
   240  				return
   241  			}
   242  			test(t, b[:len(b):len(b)])
   243  		})
   244  	}
   245  }
   246  
   247  func TestIndex(t *testing.T) {
   248  	fatalErr := func(t testing.TB, err error) {
   249  		if err != nil {
   250  			t.Fatal(err)
   251  		}
   252  	}
   253  
   254  	// Create a test corpus
   255  	var input []byte
   256  	if !testing.Short() {
   257  		input = make([]byte, 10<<20)
   258  	} else {
   259  		input = make([]byte, 500<<10)
   260  	}
   261  	rng := rand.New(rand.NewSource(0xabeefcafe))
   262  	rng.Read(input)
   263  	// Make it compressible...
   264  	for i, v := range input {
   265  		input[i] = '0' + v&3
   266  	}
   267  	// Compress it...
   268  	var buf bytes.Buffer
   269  	// We use smaller blocks just for the example...
   270  	enc := NewWriter(&buf, WriterBlockSize(100<<10), WriterAddIndex(), WriterBetterCompression(), WriterConcurrency(runtime.GOMAXPROCS(0)))
   271  	todo := input
   272  	for len(todo) > 0 {
   273  		// Write random sized inputs..
   274  		x := todo[:rng.Intn(1+len(todo)&65535)]
   275  		if len(x) == 0 {
   276  			x = todo[:1]
   277  		}
   278  		_, err := enc.Write(x)
   279  		fatalErr(t, err)
   280  		// Flush once in a while
   281  		if rng.Intn(8) == 0 {
   282  			err = enc.Flush()
   283  			fatalErr(t, err)
   284  		}
   285  		todo = todo[len(x):]
   286  	}
   287  
   288  	// Close and also get index...
   289  	idxBytes, err := enc.CloseIndex()
   290  	fatalErr(t, err)
   291  	if false {
   292  		// Load the index.
   293  		var index Index
   294  		_, err = index.Load(idxBytes)
   295  		fatalErr(t, err)
   296  		t.Log(string(index.JSON()))
   297  	}
   298  	// This is our compressed stream...
   299  	compressed := buf.Bytes()
   300  	for wantOffset := int64(0); wantOffset < int64(len(input)); wantOffset += 65531 {
   301  		t.Run(fmt.Sprintf("offset-%d", wantOffset), func(t *testing.T) {
   302  			// Let's assume we want to read from uncompressed offset 'i'
   303  			// and we cannot seek in input, but we have the index.
   304  			want := input[wantOffset:]
   305  
   306  			// Load the index.
   307  			var index Index
   308  			_, err = index.Load(idxBytes)
   309  			fatalErr(t, err)
   310  
   311  			// Find offset in file:
   312  			compressedOffset, uncompressedOffset, err := index.Find(wantOffset)
   313  			fatalErr(t, err)
   314  
   315  			// Offset the input to the compressed offset.
   316  			// Notice how we do not provide any bytes before the offset.
   317  			in := io.Reader(bytes.NewBuffer(compressed[compressedOffset:]))
   318  
   319  			// When creating the decoder we must specify that it should not
   320  			// expect a stream identifier at the beginning og the frame.
   321  			dec := NewReader(in, ReaderIgnoreStreamIdentifier())
   322  
   323  			// We now have a reader, but it will start outputting at uncompressedOffset,
   324  			// and not the actual offset we want, so skip forward to that.
   325  			toSkip := wantOffset - uncompressedOffset
   326  			err = dec.Skip(toSkip)
   327  			fatalErr(t, err)
   328  
   329  			// Read the rest of the stream...
   330  			got, err := io.ReadAll(dec)
   331  			fatalErr(t, err)
   332  			if !bytes.Equal(got, want) {
   333  				t.Error("Result mismatch", wantOffset)
   334  			}
   335  
   336  			// Test with stream index...
   337  			for i := io.SeekStart; i <= io.SeekEnd; i++ {
   338  				t.Run(fmt.Sprintf("seek-%d", i), func(t *testing.T) {
   339  					// Read it from a seekable stream
   340  					dec = NewReader(bytes.NewReader(compressed))
   341  
   342  					rs, err := dec.ReadSeeker(true, nil)
   343  					fatalErr(t, err)
   344  
   345  					// Read a little...
   346  					var tmp = make([]byte, len(input)/2)
   347  					_, err = io.ReadFull(rs, tmp[:])
   348  					fatalErr(t, err)
   349  
   350  					toSkip := wantOffset
   351  					switch i {
   352  					case io.SeekStart:
   353  					case io.SeekCurrent:
   354  						toSkip = wantOffset - int64(len(input)/2)
   355  					case io.SeekEnd:
   356  						toSkip = -(int64(len(input)) - wantOffset)
   357  					}
   358  					gotOffset, err := rs.Seek(toSkip, i)
   359  					if gotOffset != wantOffset {
   360  						t.Errorf("got offset %d, want %d", gotOffset, wantOffset)
   361  					}
   362  					// Read the rest of the stream...
   363  					got, err := io.ReadAll(dec)
   364  					fatalErr(t, err)
   365  					if !bytes.Equal(got, want) {
   366  						t.Error("Result mismatch", wantOffset)
   367  					}
   368  				})
   369  			}
   370  			t.Run("ReadAt", func(t *testing.T) {
   371  				// Read it from a seekable stream
   372  				dec = NewReader(bytes.NewReader(compressed))
   373  
   374  				rs, err := dec.ReadSeeker(true, nil)
   375  				fatalErr(t, err)
   376  
   377  				// Read a little...
   378  				var tmp = make([]byte, len(input)/2)
   379  				_, err = io.ReadFull(rs, tmp[:])
   380  				fatalErr(t, err)
   381  				wantLen := len(tmp)
   382  				if wantLen+int(wantOffset) > len(input) {
   383  					wantLen = len(input) - int(wantOffset)
   384  				}
   385  				// Read from wantOffset
   386  				n, err := rs.ReadAt(tmp, wantOffset)
   387  				if n != wantLen {
   388  					t.Errorf("got length %d, want %d", n, wantLen)
   389  				}
   390  				if err != io.EOF {
   391  					fatalErr(t, err)
   392  				}
   393  				want := want[:n]
   394  				got := tmp[:n]
   395  
   396  				// Read the rest of the stream...
   397  				if !bytes.Equal(got, want) {
   398  					t.Error("Result mismatch", wantOffset)
   399  				}
   400  			})
   401  		})
   402  	}
   403  }
   404  
   405  func TestWriterPadding(t *testing.T) {
   406  	n := 100
   407  	if testing.Short() {
   408  		n = 5
   409  	}
   410  	rng := rand.New(rand.NewSource(0x1337))
   411  	d := NewReader(nil)
   412  
   413  	for i := 0; i < n; i++ {
   414  		padding := (rng.Int() & 0xffff) + 1
   415  		src := make([]byte, (rng.Int()&0xfffff)+1)
   416  		for i := range src {
   417  			src[i] = uint8(rng.Uint32()) & 3
   418  		}
   419  		var dst bytes.Buffer
   420  		e := NewWriter(&dst, WriterPadding(padding))
   421  		// Test the added padding is invisible.
   422  		_, err := io.Copy(e, bytes.NewBuffer(src))
   423  		if err != nil {
   424  			t.Fatal(err)
   425  		}
   426  		err = e.Close()
   427  		if err != nil {
   428  			t.Fatal(err)
   429  		}
   430  		err = e.Close()
   431  		if err != nil {
   432  			t.Fatal(err)
   433  		}
   434  
   435  		if dst.Len()%padding != 0 {
   436  			t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, dst.Len(), dst.Len()%padding)
   437  		}
   438  		var got bytes.Buffer
   439  		d.Reset(&dst)
   440  		_, err = io.Copy(&got, d)
   441  		if err != nil {
   442  			t.Fatal(err)
   443  		}
   444  		if !bytes.Equal(src, got.Bytes()) {
   445  			t.Fatal("output mismatch")
   446  		}
   447  
   448  		// Try after reset
   449  		dst.Reset()
   450  		e.Reset(&dst)
   451  		_, err = io.Copy(e, bytes.NewBuffer(src))
   452  		if err != nil {
   453  			t.Fatal(err)
   454  		}
   455  		err = e.Close()
   456  		if err != nil {
   457  			t.Fatal(err)
   458  		}
   459  		if dst.Len()%padding != 0 {
   460  			t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, dst.Len(), dst.Len()%padding)
   461  		}
   462  
   463  		got.Reset()
   464  		d.Reset(&dst)
   465  		_, err = io.Copy(&got, d)
   466  		if err != nil {
   467  			t.Fatal(err)
   468  		}
   469  		if !bytes.Equal(src, got.Bytes()) {
   470  			t.Fatal("output mismatch after reset")
   471  		}
   472  	}
   473  }
   474  
   475  func TestBigRegularWrites(t *testing.T) {
   476  	var buf [maxBlockSize * 2]byte
   477  	dst := bytes.NewBuffer(nil)
   478  	enc := NewWriter(dst, WriterBestCompression())
   479  	max := uint8(10)
   480  	if testing.Short() {
   481  		max = 4
   482  	}
   483  	for n := uint8(0); n < max; n++ {
   484  		for i := range buf[:] {
   485  			buf[i] = n
   486  		}
   487  		// Writes may not keep a reference to the data beyond the Write call.
   488  		_, err := enc.Write(buf[:])
   489  		if err != nil {
   490  			t.Fatal(err)
   491  		}
   492  	}
   493  	err := enc.Close()
   494  	if err != nil {
   495  		t.Fatal(err)
   496  	}
   497  
   498  	dec := NewReader(dst)
   499  	_, err = io.Copy(io.Discard, dec)
   500  	if err != nil {
   501  		t.Fatal(err)
   502  	}
   503  }
   504  
   505  func TestBigEncodeBuffer(t *testing.T) {
   506  	const blockSize = 1 << 20
   507  	var buf [blockSize * 2]byte
   508  	dst := bytes.NewBuffer(nil)
   509  	enc := NewWriter(dst, WriterBlockSize(blockSize), WriterBestCompression())
   510  	max := uint8(10)
   511  	if testing.Short() {
   512  		max = 4
   513  	}
   514  	for n := uint8(0); n < max; n++ {
   515  		// Change the buffer to a new value.
   516  		for i := range buf[:] {
   517  			buf[i] = n
   518  		}
   519  		err := enc.EncodeBuffer(buf[:])
   520  		if err != nil {
   521  			t.Fatal(err)
   522  		}
   523  		// We can write it again since we aren't changing it.
   524  		err = enc.EncodeBuffer(buf[:])
   525  		if err != nil {
   526  			t.Fatal(err)
   527  		}
   528  		err = enc.Flush()
   529  		if err != nil {
   530  			t.Fatal(err)
   531  		}
   532  	}
   533  	err := enc.Close()
   534  	if err != nil {
   535  		t.Fatal(err)
   536  	}
   537  
   538  	dec := NewReader(dst)
   539  	n, err := io.Copy(io.Discard, dec)
   540  	if err != nil {
   541  		t.Fatal(err)
   542  	}
   543  	t.Log(n)
   544  }
   545  
   546  func TestBigEncodeBufferSync(t *testing.T) {
   547  	const blockSize = 1 << 20
   548  	var buf [blockSize * 2]byte
   549  	dst := bytes.NewBuffer(nil)
   550  	enc := NewWriter(dst, WriterBlockSize(blockSize), WriterConcurrency(1), WriterBestCompression())
   551  	max := uint8(10)
   552  	if testing.Short() {
   553  		max = 2
   554  	}
   555  	for n := uint8(0); n < max; n++ {
   556  		// Change the buffer to a new value.
   557  		for i := range buf[:] {
   558  			buf[i] = n
   559  		}
   560  		// When WriterConcurrency == 1 we can encode and reuse the buffer.
   561  		err := enc.EncodeBuffer(buf[:])
   562  		if err != nil {
   563  			t.Fatal(err)
   564  		}
   565  	}
   566  	err := enc.Close()
   567  	if err != nil {
   568  		t.Fatal(err)
   569  	}
   570  
   571  	dec := NewReader(dst)
   572  	n, err := io.Copy(io.Discard, dec)
   573  	if err != nil {
   574  		t.Fatal(err)
   575  	}
   576  	t.Log(n)
   577  }
   578  
   579  func BenchmarkWriterRandom(b *testing.B) {
   580  	rng := rand.New(rand.NewSource(1))
   581  	// Make max window so we never get matches.
   582  	data := make([]byte, 4<<20)
   583  	for i := range data {
   584  		data[i] = uint8(rng.Intn(256))
   585  	}
   586  
   587  	for name, opts := range testOptions(b) {
   588  		w := NewWriter(io.Discard, opts...)
   589  		b.Run(name, func(b *testing.B) {
   590  			b.ResetTimer()
   591  			b.ReportAllocs()
   592  			b.SetBytes(int64(len(data)))
   593  			for i := 0; i < b.N; i++ {
   594  				err := w.EncodeBuffer(data)
   595  				if err != nil {
   596  					b.Fatal(err)
   597  				}
   598  			}
   599  			// Flush output
   600  			w.Flush()
   601  		})
   602  		w.Close()
   603  	}
   604  }
   605  
   606  func BenchmarkIndexFind(b *testing.B) {
   607  	fatalErr := func(t testing.TB, err error) {
   608  		if err != nil {
   609  			t.Fatal(err)
   610  		}
   611  	}
   612  	for blocks := 1; blocks <= 65536; blocks *= 2 {
   613  		if blocks == 65536 {
   614  			blocks = 65535
   615  		}
   616  
   617  		var index Index
   618  		index.reset(100)
   619  		index.TotalUncompressed = int64(blocks) * 100
   620  		index.TotalCompressed = int64(blocks) * 100
   621  		for i := 0; i < blocks; i++ {
   622  			err := index.add(int64(i*100), int64(i*100))
   623  			fatalErr(b, err)
   624  		}
   625  
   626  		rng := rand.New(rand.NewSource(0xabeefcafe))
   627  		b.Run(fmt.Sprintf("blocks-%d", len(index.info)), func(b *testing.B) {
   628  			b.ResetTimer()
   629  			b.ReportAllocs()
   630  			const prime4bytes = 2654435761
   631  			rng2 := rng.Int63()
   632  			for i := 0; i < b.N; i++ {
   633  				rng2 = ((rng2 + prime4bytes) * prime4bytes) >> 32
   634  				// Find offset:
   635  				_, _, err := index.Find(rng2 % (int64(blocks) * 100))
   636  				fatalErr(b, err)
   637  			}
   638  		})
   639  	}
   640  }
   641  

View as plain text