...

Source file src/github.com/klauspost/compress/s2/lz4convert_test.go

Documentation: github.com/klauspost/compress/s2

     1  // Copyright (c) 2022 Klaus Post. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package s2
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"path/filepath"
    12  	"sort"
    13  	"testing"
    14  
    15  	"github.com/klauspost/compress/internal/fuzz"
    16  	"github.com/klauspost/compress/internal/lz4ref"
    17  	"github.com/klauspost/compress/internal/snapref"
    18  )
    19  
    20  func TestLZ4Converter_ConvertBlock(t *testing.T) {
    21  	for _, tf := range testFiles {
    22  		t.Run(tf.label, func(t *testing.T) {
    23  			if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
    24  				t.Fatalf("failed to download testdata: %s", err)
    25  			}
    26  
    27  			bDir := filepath.FromSlash(*benchdataDir)
    28  			data := readFile(t, filepath.Join(bDir, tf.filename))
    29  			if n := tf.sizeLimit; 0 < n && n < len(data) {
    30  				data = data[:n]
    31  			}
    32  
    33  			lz4Data := make([]byte, lz4ref.CompressBlockBound(len(data)))
    34  			n, err := lz4ref.CompressBlock(data, lz4Data)
    35  			if err != nil {
    36  				t.Fatal(err)
    37  			}
    38  			if n == 0 {
    39  				t.Skip("incompressible")
    40  				return
    41  			}
    42  			t.Log("input size:", len(data))
    43  			t.Log("lz4 size:", n)
    44  			lz4Data = lz4Data[:n]
    45  			s2Dst := make([]byte, binary.MaxVarintLen32, MaxEncodedLen(len(data)))
    46  			s2Dst = s2Dst[:binary.PutUvarint(s2Dst, uint64(len(data)))]
    47  			hdr := len(s2Dst)
    48  
    49  			conv := LZ4Converter{}
    50  
    51  			szS := 0
    52  			out, n, err := conv.ConvertBlockSnappy(s2Dst, lz4Data)
    53  			if err != nil {
    54  				t.Fatal(err)
    55  			}
    56  			if n != len(data) {
    57  				t.Fatalf("length mismatch: want %d, got %d", len(data), n)
    58  			}
    59  			szS = len(out) - hdr
    60  			t.Log("lz4->snappy size:", szS)
    61  
    62  			decom, err := snapref.Decode(nil, out)
    63  			if err != nil {
    64  				t.Fatal(err)
    65  			}
    66  			if !bytes.Equal(decom, data) {
    67  				t.Errorf("output mismatch")
    68  			}
    69  
    70  			sz := 0
    71  			out, n, err = conv.ConvertBlock(s2Dst, lz4Data)
    72  			if err != nil {
    73  				t.Fatal(err)
    74  			}
    75  			if n != len(data) {
    76  				t.Fatalf("length mismatch: want %d, got %d", len(data), n)
    77  			}
    78  			sz = len(out) - hdr
    79  			t.Log("lz4->s2 size:", sz)
    80  
    81  			decom, err = Decode(nil, out)
    82  			if err != nil {
    83  				t.Fatal(err)
    84  			}
    85  			if !bytes.Equal(decom, data) {
    86  				t.Errorf("output mismatch")
    87  			}
    88  
    89  			out2 := Encode(s2Dst[:0], data)
    90  			sz2 := len(out2) - hdr
    91  			t.Log("s2 (default) size:", sz2)
    92  
    93  			out2 = EncodeBetter(s2Dst[:0], data)
    94  			sz3 := len(out2) - hdr
    95  			t.Log("s2 (better) size:", sz3)
    96  
    97  			t.Log("lz4 -> s2 bytes saved:", len(lz4Data)-sz)
    98  			t.Log("lz4 -> snappy bytes saved:", len(lz4Data)-szS)
    99  			t.Log("data -> s2 (default) bytes saved:", len(lz4Data)-sz2)
   100  			t.Log("data -> s2 (better) bytes saved:", len(lz4Data)-sz3)
   101  			t.Log("direct data -> s2 (default) compared to converted from lz4:", sz-sz2)
   102  			t.Log("direct data -> s2 (better) compared to converted from lz4:", sz-sz3)
   103  		})
   104  	}
   105  }
   106  
   107  func TestLZ4Converter_ConvertBlockSingle(t *testing.T) {
   108  	// Mainly for analyzing fuzz failures.
   109  	lz4Data := []byte{0x6f, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x1, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x30, 0xf, 0x30, 0x30, 0xe4, 0x1f, 0x30, 0x30, 0x30, 0xff, 0xff, 0x30, 0x2f, 0x30, 0x30, 0x30, 0x30, 0xcf, 0x7f, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0xaf, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0xff, 0xff, 0x30, 0xf, 0x30, 0x30, 0x30, 0x1f, 0x30, 0x30, 0x30, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30}
   110  	lz4Decoded := make([]byte, 4<<20)
   111  	lzN := lz4ref.UncompressBlock(lz4Decoded, lz4Data)
   112  	data := lz4Decoded
   113  	if lzN < 0 {
   114  		t.Skip(lzN)
   115  	} else {
   116  		data = data[:lzN]
   117  	}
   118  	t.Log("uncompressed size:", lzN)
   119  	t.Log("lz4 size:", len(lz4Data))
   120  	s2Dst := make([]byte, binary.MaxVarintLen32, MaxEncodedLen(len(data)))
   121  	s2Dst = s2Dst[:binary.PutUvarint(s2Dst, uint64(len(data)))]
   122  	hdr := len(s2Dst)
   123  
   124  	conv := LZ4Converter{}
   125  
   126  	szS := 0
   127  	out, n, err := conv.ConvertBlockSnappy(s2Dst, lz4Data)
   128  	if err != nil {
   129  		t.Fatal(err)
   130  	}
   131  	if n != len(data) {
   132  		t.Fatalf("length mismatch: want %d, got %d", len(data), n)
   133  	}
   134  	szS = len(out) - hdr
   135  	t.Log("lz4->snappy size:", szS)
   136  
   137  	decom, err := snapref.Decode(nil, out)
   138  	if err != nil {
   139  		t.Fatal(err)
   140  	}
   141  	if !bytes.Equal(decom, data) {
   142  		t.Errorf("output mismatch")
   143  	}
   144  
   145  	sz := 0
   146  	out, n, err = conv.ConvertBlock(s2Dst, lz4Data)
   147  	if err != nil {
   148  		t.Fatal(err)
   149  	}
   150  	if n != len(data) {
   151  		t.Fatalf("length mismatch: want %d, got %d", len(data), n)
   152  	}
   153  	sz = len(out) - hdr
   154  	t.Log("lz4->s2 size:", sz)
   155  
   156  	decom, err = Decode(nil, out)
   157  	if err != nil {
   158  		t.Fatal(err)
   159  	}
   160  	if !bytes.Equal(decom, data) {
   161  		t.Errorf("output mismatch")
   162  	}
   163  
   164  	out2 := Encode(s2Dst[:0], data)
   165  	sz2 := len(out2) - hdr
   166  	t.Log("s2 (default) size:", sz2)
   167  
   168  	out2 = EncodeBetter(s2Dst[:0], data)
   169  	sz3 := len(out2) - hdr
   170  	t.Log("s2 (better) size:", sz3)
   171  
   172  	t.Log("lz4 -> s2 bytes saved:", len(lz4Data)-sz)
   173  	t.Log("lz4 -> snappy bytes saved:", len(lz4Data)-szS)
   174  	t.Log("data -> s2 (default) bytes saved:", len(lz4Data)-sz2)
   175  	t.Log("data -> s2 (better) bytes saved:", len(lz4Data)-sz3)
   176  	t.Log("direct data -> s2 (default) compared to converted from lz4:", sz-sz2)
   177  	t.Log("direct data -> s2 (better) compared to converted from lz4:", sz-sz3)
   178  }
   179  
   180  func BenchmarkLZ4Converter_ConvertBlock(b *testing.B) {
   181  	for _, tf := range testFiles {
   182  		b.Run(tf.label, func(b *testing.B) {
   183  			if err := downloadBenchmarkFiles(b, tf.filename); err != nil {
   184  				b.Fatalf("failed to download testdata: %s", err)
   185  			}
   186  
   187  			bDir := filepath.FromSlash(*benchdataDir)
   188  			data := readFile(b, filepath.Join(bDir, tf.filename))
   189  			if n := tf.sizeLimit; 0 < n && n < len(data) {
   190  				data = data[:n]
   191  			}
   192  
   193  			lz4Data := make([]byte, lz4ref.CompressBlockBound(len(data)))
   194  			n, err := lz4ref.CompressBlock(data, lz4Data)
   195  			if err != nil {
   196  				b.Fatal(err)
   197  			}
   198  			if n == 0 {
   199  				b.Skip("incompressible")
   200  				return
   201  			}
   202  			lz4Data = lz4Data[:n]
   203  			s2Dst := make([]byte, MaxEncodedLen(len(data)))
   204  			conv := LZ4Converter{}
   205  			b.ReportAllocs()
   206  			b.ResetTimer()
   207  			b.SetBytes(int64(len(data)))
   208  			sz := 0
   209  			for i := 0; i < b.N; i++ {
   210  				out, n, err := conv.ConvertBlock(s2Dst[:0], lz4Data)
   211  				if err != nil {
   212  					b.Fatal(err)
   213  				}
   214  				if n != len(data) {
   215  					b.Fatalf("length mismatch: want %d, got %d", len(data), n)
   216  				}
   217  				sz = len(out)
   218  			}
   219  			b.ReportMetric(float64(len(lz4Data)-sz), "b_saved")
   220  		})
   221  	}
   222  }
   223  
   224  func BenchmarkLZ4Converter_ConvertBlockSnappy(b *testing.B) {
   225  	for _, tf := range testFiles {
   226  		b.Run(tf.label, func(b *testing.B) {
   227  			if err := downloadBenchmarkFiles(b, tf.filename); err != nil {
   228  				b.Fatalf("failed to download testdata: %s", err)
   229  			}
   230  
   231  			bDir := filepath.FromSlash(*benchdataDir)
   232  			data := readFile(b, filepath.Join(bDir, tf.filename))
   233  			if n := tf.sizeLimit; 0 < n && n < len(data) {
   234  				data = data[:n]
   235  			}
   236  
   237  			lz4Data := make([]byte, lz4ref.CompressBlockBound(len(data)))
   238  			n, err := lz4ref.CompressBlock(data, lz4Data)
   239  			if err != nil {
   240  				b.Fatal(err)
   241  			}
   242  			if n == 0 {
   243  				b.Skip("incompressible")
   244  				return
   245  			}
   246  			lz4Data = lz4Data[:n]
   247  			s2Dst := make([]byte, MaxEncodedLen(len(data)))
   248  			conv := LZ4Converter{}
   249  			b.ReportAllocs()
   250  			b.ResetTimer()
   251  			b.SetBytes(int64(len(data)))
   252  			sz := 0
   253  			for i := 0; i < b.N; i++ {
   254  				out, n, err := conv.ConvertBlockSnappy(s2Dst[:0], lz4Data)
   255  				if err != nil {
   256  					b.Fatal(err)
   257  				}
   258  				if n != len(data) {
   259  					b.Fatalf("length mismatch: want %d, got %d", len(data), n)
   260  				}
   261  				sz = len(out)
   262  			}
   263  			b.ReportMetric(float64(len(lz4Data)-sz), "b_saved")
   264  		})
   265  	}
   266  }
   267  
   268  func BenchmarkLZ4Converter_ConvertBlockParallel(b *testing.B) {
   269  	sort.Slice(testFiles, func(i, j int) bool {
   270  		return testFiles[i].filename < testFiles[j].filename
   271  	})
   272  	for _, tf := range testFiles {
   273  		b.Run(tf.filename, func(b *testing.B) {
   274  			if err := downloadBenchmarkFiles(b, tf.filename); err != nil {
   275  				b.Fatalf("failed to download testdata: %s", err)
   276  			}
   277  
   278  			bDir := filepath.FromSlash(*benchdataDir)
   279  			data := readFile(b, filepath.Join(bDir, tf.filename))
   280  
   281  			lz4Data := make([]byte, lz4ref.CompressBlockBound(len(data)))
   282  			n, err := lz4ref.CompressBlock(data, lz4Data)
   283  			if err != nil {
   284  				b.Fatal(err)
   285  			}
   286  			if n == 0 {
   287  				b.Skip("incompressible")
   288  				return
   289  			}
   290  			lz4Data = lz4Data[:n]
   291  			conv := LZ4Converter{}
   292  			b.ReportAllocs()
   293  			b.ResetTimer()
   294  			b.SetBytes(int64(len(data)))
   295  			b.RunParallel(func(pb *testing.PB) {
   296  				s2Dst := make([]byte, MaxEncodedLen(len(data)))
   297  				for pb.Next() {
   298  					_, n, err := conv.ConvertBlock(s2Dst[:0], lz4Data)
   299  					if err != nil {
   300  						b.Fatal(err)
   301  					}
   302  					if n != len(data) {
   303  						b.Fatalf("length mismatch: want %d, got %d", len(data), n)
   304  					}
   305  				}
   306  			})
   307  		})
   308  	}
   309  }
   310  func BenchmarkCompressBlockReference(b *testing.B) {
   311  	b.Skip("Only reference for BenchmarkLZ4Converter_ConvertBlock")
   312  	for _, tf := range testFiles {
   313  		b.Run(tf.label, func(b *testing.B) {
   314  			if err := downloadBenchmarkFiles(b, tf.filename); err != nil {
   315  				b.Fatalf("failed to download testdata: %s", err)
   316  			}
   317  			bDir := filepath.FromSlash(*benchdataDir)
   318  			data := readFile(b, filepath.Join(bDir, tf.filename))
   319  			if n := tf.sizeLimit; 0 < n && n < len(data) {
   320  				data = data[:n]
   321  			}
   322  
   323  			lz4Data := make([]byte, lz4ref.CompressBlockBound(len(data)))
   324  			n, err := lz4ref.CompressBlock(data, lz4Data)
   325  			if err != nil {
   326  				b.Fatal(err)
   327  			}
   328  			if n == 0 {
   329  				b.Skip("incompressible")
   330  				return
   331  			}
   332  			s2Dst := make([]byte, MaxEncodedLen(len(data)))
   333  
   334  			b.Run("default", func(b *testing.B) {
   335  				b.ReportAllocs()
   336  				b.ResetTimer()
   337  				b.SetBytes(int64(len(data)))
   338  				for i := 0; i < b.N; i++ {
   339  					_ = Encode(s2Dst, data)
   340  				}
   341  			})
   342  			b.Run("better", func(b *testing.B) {
   343  				b.ReportAllocs()
   344  				b.ResetTimer()
   345  				b.SetBytes(int64(len(data)))
   346  				for i := 0; i < b.N; i++ {
   347  					_ = EncodeBetter(s2Dst, data)
   348  				}
   349  			})
   350  		})
   351  	}
   352  }
   353  
   354  func FuzzLZ4Block(f *testing.F) {
   355  	fuzz.AddFromZip(f, "testdata/fuzz/lz4-convert-corpus-raw.zip", fuzz.TypeRaw, false)
   356  	fuzz.AddFromZip(f, "testdata/fuzz/FuzzLZ4Block.zip", fuzz.TypeGoFuzz, false)
   357  	// Fuzzing tweaks:
   358  	const (
   359  		// Max input size:
   360  		maxSize = 1 << 20
   361  	)
   362  
   363  	conv := LZ4Converter{}
   364  
   365  	f.Fuzz(func(t *testing.T, data []byte) {
   366  		if len(data) > maxSize || len(data) == 0 {
   367  			return
   368  		}
   369  
   370  		lz4Decoded := make([]byte, len(data)*2+65536)
   371  		lzN := lz4ref.UncompressBlock(lz4Decoded, data)
   372  		converted := make([]byte, len(data)*2+4096)
   373  		hdr := 0
   374  		if lzN >= 0 {
   375  			hdr = binary.PutUvarint(converted, uint64(lzN))
   376  		}
   377  
   378  		cV, cN, cErr := conv.ConvertBlock(converted[:hdr], data)
   379  		if lzN >= 0 && cErr == nil {
   380  			if cN != lzN {
   381  				panic(fmt.Sprintf("uncompressed lz4 size: %d, s2 size: %d", lzN, cN))
   382  			}
   383  			lz4Decoded = lz4Decoded[:lzN]
   384  			// Both success
   385  			s2Dec, err := Decode(nil, cV)
   386  			if err != nil {
   387  				panic(fmt.Sprintf("block: %#v: %v", cV, err))
   388  			}
   389  			if !bytes.Equal(lz4Decoded, s2Dec) {
   390  				panic("output mismatch")
   391  			}
   392  			return
   393  		}
   394  		if lzN >= 0 && cErr != nil {
   395  			panic(fmt.Sprintf("lz4 returned %d, conversion returned %v\n lz4 block: %#v", lzN, cErr, data))
   396  		}
   397  		if lzN < 0 && cErr == nil {
   398  			// We might get an error if there isn't enough space to decompress the LZ4 content.
   399  			// Try with the decompressed size from conversion.
   400  			lz4Decoded = make([]byte, cN)
   401  			lzN = lz4ref.UncompressBlock(lz4Decoded, data)
   402  			if lzN < 0 {
   403  				panic(fmt.Sprintf("lz4 returned %d, conversion returned %v, input: %#v", lzN, cErr, data))
   404  			}
   405  			// Compare now that we have success...
   406  			lz4Decoded = lz4Decoded[:lzN]
   407  
   408  			// Re-add correct header.
   409  			tmp := make([]byte, binary.MaxVarintLen32+len(cV))
   410  			hdr = binary.PutUvarint(tmp, uint64(cN))
   411  			cV = append(tmp[:hdr], cV...)
   412  
   413  			// Both success
   414  			s2Dec, err := Decode(nil, cV)
   415  			if err != nil {
   416  				panic(fmt.Sprintf("block: %#v: %v\ninput: %#v\n", cV, err, data))
   417  			}
   418  			if !bytes.Equal(lz4Decoded, s2Dec) {
   419  				panic("output mismatch")
   420  			}
   421  		}
   422  		// Snappy....
   423  		hdr = binary.PutUvarint(converted, uint64(lzN))
   424  		cV, cN, cErr = conv.ConvertBlockSnappy(converted[:hdr], data)
   425  		if lzN >= 0 && cErr == nil {
   426  			if cN != lzN {
   427  				panic(fmt.Sprintf("uncompressed lz4 size: %d, s2 size: %d", lzN, cN))
   428  			}
   429  			lz4Decoded = lz4Decoded[:lzN]
   430  			// Both success
   431  			s2Dec, err := snapref.Decode(nil, cV)
   432  			if err != nil {
   433  				panic(fmt.Sprintf("block: %#v: %v", cV, err))
   434  			}
   435  			if !bytes.Equal(lz4Decoded, s2Dec) {
   436  				panic("output mismatch")
   437  			}
   438  			return
   439  		}
   440  		// Snappy can expand a lot due to 64 byte match length limit
   441  		if lzN >= 0 && cErr != ErrDstTooSmall {
   442  			panic(fmt.Sprintf("lz4 returned %d, conversion returned %v\n lz4 block: %#v", lzN, cErr, data))
   443  		}
   444  		if lzN < 0 && cErr == nil {
   445  			panic(fmt.Sprintf("lz4 returned %d, conversion returned %v, input: %#v", lzN, cErr, data))
   446  		}
   447  	})
   448  }
   449  

View as plain text