...

Source file src/github.com/klauspost/compress/fse/fse_test.go

Documentation: github.com/klauspost/compress/fse

     1  // Copyright 2018 Klaus Post. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
     5  
     6  package fse
     7  
     8  import (
     9  	"bytes"
    10  	"fmt"
    11  	"os"
    12  	"reflect"
    13  	"strings"
    14  	"testing"
    15  )
    16  
    17  type inputFn func() ([]byte, error)
    18  
    19  var testfiles = []struct {
    20  	name string
    21  	fn   inputFn
    22  	err  error
    23  }{
    24  	// gettysburg.txt is a small plain text.
    25  	{name: "gettysburg", fn: func() ([]byte, error) { return os.ReadFile("../testdata/gettysburg.txt") }},
    26  	// Digits is the digits of the irrational number e. Its decimal representation
    27  	// does not repeat, but there are only 10 possible digits, so it should be
    28  	// reasonably compressible.
    29  	{name: "digits", fn: func() ([]byte, error) { return os.ReadFile("../testdata/e.txt") }},
    30  	// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
    31  	{name: "twain", fn: func() ([]byte, error) { return os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") }},
    32  	// Random bytes
    33  	{name: "random", fn: func() ([]byte, error) { return os.ReadFile("../testdata/sharnd.out") }, err: ErrIncompressible},
    34  	// Low entropy
    35  	{name: "low-ent", fn: func() ([]byte, error) { return []byte(strings.Repeat("1221", 10000)), nil }},
    36  	// Super Low entropy
    37  	{name: "superlow-ent", fn: func() ([]byte, error) { return []byte(strings.Repeat("1", 10000) + strings.Repeat("2", 500)), nil }},
    38  	// Zero bytes
    39  	{name: "zeroes", fn: func() ([]byte, error) { return make([]byte, 10000), nil }, err: ErrUseRLE},
    40  	{name: "crash1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash1.bin") }, err: ErrIncompressible},
    41  	{name: "crash2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash2.bin") }, err: ErrIncompressible},
    42  	{name: "crash3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash3.bin") }, err: ErrIncompressible},
    43  	{name: "endzerobits", fn: func() ([]byte, error) { return os.ReadFile("../testdata/endzerobits.bin") }, err: nil},
    44  	{name: "endnonzero", fn: func() ([]byte, error) { return os.ReadFile("../testdata/endnonzero.bin") }, err: ErrIncompressible},
    45  	{name: "case1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case1.bin") }, err: ErrIncompressible},
    46  	{name: "case2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case2.bin") }, err: ErrIncompressible},
    47  	{name: "case3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case3.bin") }, err: ErrIncompressible},
    48  	{name: "pngdata.001", fn: func() ([]byte, error) { return os.ReadFile("../testdata/pngdata.bin") }, err: nil},
    49  	{name: "normcount2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/normcount2.bin") }, err: nil},
    50  }
    51  
    52  var decTestfiles = []struct {
    53  	name string
    54  	fn   inputFn
    55  	err  string
    56  }{
    57  	// gettysburg.txt is a small plain text.
    58  	{name: "hang1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang1.bin") }, err: "corruption detected (bitCount 252 > 32)"},
    59  	{name: "hang2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang2.bin") }, err: "newState (0) == oldState (0) and no bits"},
    60  	{name: "hang3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang3.bin") }, err: "maxSymbolValue too small"},
    61  	{name: "symlen1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-symlen1.bin") }, err: "symbolLen (257) too big"},
    62  	{name: "crash4", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash4.bin") }, err: "symbolLen (1) too small"},
    63  	{name: "crash5", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash5.bin") }, err: "symbolLen (1) too small"},
    64  	{name: "crash6", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-crash6.bin") }, err: "newState (32768) outside table size (32768)"},
    65  	{name: "something", fn: func() ([]byte, error) { return os.ReadFile("../testdata/fse-artifact3.bin") }, err: "corrupt stream, did not find end of stream"},
    66  }
    67  
    68  func TestCompress(t *testing.T) {
    69  	for _, test := range testfiles {
    70  		t.Run(test.name, func(t *testing.T) {
    71  			var s Scratch
    72  			buf0, err := test.fn()
    73  			if err != nil {
    74  				t.Fatal(err)
    75  			}
    76  			b, err := Compress(buf0, &s)
    77  			if err != test.err {
    78  				t.Errorf("want error %v (%T), got %v (%T)", test.err, test.err, err, err)
    79  			}
    80  			if b == nil {
    81  				t.Log(test.name + ": not compressible")
    82  				return
    83  			}
    84  			t.Logf("%s: %d -> %d bytes (%.2f:1)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
    85  		})
    86  	}
    87  }
    88  
    89  func ExampleCompress() {
    90  	// Read data
    91  	data, err := os.ReadFile("../testdata/e.txt")
    92  	if err != nil {
    93  		panic(err)
    94  	}
    95  
    96  	// Create re-usable scratch buffer.
    97  	var s Scratch
    98  	b, err := Compress(data, &s)
    99  	if err != nil {
   100  		panic(err)
   101  	}
   102  	fmt.Printf("Compress: %d -> %d bytes (%.2f:1)\n", len(data), len(b), float64(len(data))/float64(len(b)))
   103  	// OUTPUT: Compress: 100003 -> 41564 bytes (2.41:1)
   104  }
   105  
   106  func TestDecompress(t *testing.T) {
   107  	for _, test := range decTestfiles {
   108  		t.Run(test.name, func(t *testing.T) {
   109  			var s Scratch
   110  			s.DecompressLimit = 1 << 20
   111  			buf0, err := test.fn()
   112  			if err != nil {
   113  				t.Fatal(err)
   114  			}
   115  			b, err := Decompress(buf0, &s)
   116  			if fmt.Sprint(err) != test.err {
   117  				t.Errorf("want error %q, got %q (%T)", test.err, err, err)
   118  				return
   119  			}
   120  			if err != nil {
   121  				return
   122  			}
   123  			if len(b) == 0 {
   124  				t.Error(test.name + ": no output")
   125  				return
   126  			}
   127  			t.Logf("%s: %d -> %d bytes (1:%.2f)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
   128  		})
   129  	}
   130  }
   131  
   132  func ExampleDecompress() {
   133  	// Read data
   134  	data, err := os.ReadFile("../testdata/e.txt")
   135  	if err != nil {
   136  		panic(err)
   137  	}
   138  
   139  	// Create re-usable scratch buffer.
   140  	var s Scratch
   141  	b, err := Compress(data, &s)
   142  	if err != nil {
   143  		panic(err)
   144  	}
   145  
   146  	// Since we use the output of compression, it cannot be used as output for decompression.
   147  	s.Out = make([]byte, 0, len(data))
   148  	d, err := Decompress(b, &s)
   149  	if err != nil {
   150  		panic(err)
   151  	}
   152  	fmt.Printf("Input matches: %t\n", bytes.Equal(d, data))
   153  	// OUTPUT: Input matches: true
   154  }
   155  
   156  func BenchmarkCompress(b *testing.B) {
   157  	for _, tt := range testfiles {
   158  		test := tt
   159  		b.Run(test.name, func(b *testing.B) {
   160  			var s Scratch
   161  			buf0, err := test.fn()
   162  			if err != nil {
   163  				b.Fatal(err)
   164  			}
   165  			_, err = Compress(buf0, &s)
   166  			if err != test.err {
   167  				b.Fatal("unexpected error:", err)
   168  			}
   169  			if err != nil {
   170  				b.Skip("skipping benchmark: ", err)
   171  				return
   172  			}
   173  			b.ResetTimer()
   174  			b.ReportAllocs()
   175  			b.SetBytes(int64(len(buf0)))
   176  			for i := 0; i < b.N; i++ {
   177  				_, _ = Compress(buf0, &s)
   178  			}
   179  		})
   180  	}
   181  }
   182  
   183  func TestReadNCount(t *testing.T) {
   184  	for i := range testfiles {
   185  		var s Scratch
   186  		test := testfiles[i]
   187  		t.Run(test.name, func(t *testing.T) {
   188  			name := test.name + ": "
   189  			buf0, err := testfiles[i].fn()
   190  			if err != nil {
   191  				t.Fatal(err)
   192  			}
   193  			b, err := Compress(buf0, &s)
   194  			if err != test.err {
   195  				t.Error(err)
   196  				return
   197  			}
   198  			if err != nil {
   199  				t.Skip(name + err.Error())
   200  				return
   201  			}
   202  			t.Logf("%s: %d -> %d bytes (%.2f:1)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
   203  			//t.Logf("%v", b)
   204  			var s2 Scratch
   205  			dc, err := Decompress(b, &s2)
   206  			if err != nil {
   207  				t.Fatal(err)
   208  			}
   209  			want := s.norm[:s.symbolLen]
   210  			got := s2.norm[:s2.symbolLen]
   211  			if !reflect.DeepEqual(want, got) {
   212  				if s.actualTableLog != s2.actualTableLog {
   213  					t.Errorf(name+"norm table, want tablelog: %d, got %d", s.actualTableLog, s2.actualTableLog)
   214  				}
   215  				if s.symbolLen != s2.symbolLen {
   216  					t.Errorf(name+"norm table, want size: %d, got %d", s.symbolLen, s2.symbolLen)
   217  				}
   218  				t.Errorf(name + "norm table, got delta: \n")
   219  				return
   220  			}
   221  			for i, dec := range s2.decTable {
   222  				dd := dec.symbol
   223  				ee := s.ct.tableSymbol[i]
   224  				if dd != ee {
   225  					t.Errorf("table symbol mismatch. idx %d, enc: %v, dec:%v", i, ee, dd)
   226  					break
   227  				}
   228  			}
   229  			if dc != nil {
   230  				if len(buf0) != len(dc) {
   231  					t.Errorf(name+"decompressed, want size: %d, got %d", len(buf0), len(dc))
   232  					if len(buf0) > len(dc) {
   233  						buf0 = buf0[:len(dc)]
   234  					} else {
   235  						dc = dc[:len(buf0)]
   236  					}
   237  					if !bytes.Equal(buf0, dc) {
   238  						t.Errorf(name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc)
   239  					}
   240  					return
   241  				}
   242  				if !bytes.Equal(buf0, dc) {
   243  					t.Errorf(name + "decompressed, got delta.")
   244  				}
   245  				if !t.Failed() {
   246  					t.Log("... roundtrip ok!")
   247  				}
   248  			}
   249  		})
   250  	}
   251  }
   252  
   253  func BenchmarkDecompress(b *testing.B) {
   254  	for _, tt := range testfiles {
   255  		test := tt
   256  		b.Run(test.name, func(b *testing.B) {
   257  			var s, s2 Scratch
   258  			buf0, err := test.fn()
   259  			if err != nil {
   260  				b.Fatal(err)
   261  			}
   262  			out, err := Compress(buf0, &s)
   263  			if err != test.err {
   264  				b.Fatal(err)
   265  			}
   266  			if err != nil {
   267  				b.Skip(test.name + ": " + err.Error())
   268  				return
   269  			}
   270  			got, err := Decompress(out, &s2)
   271  			if err != nil {
   272  				b.Fatal(err)
   273  			}
   274  			if !bytes.Equal(buf0, got) {
   275  				b.Fatal("output mismatch")
   276  			}
   277  			b.ResetTimer()
   278  			b.ReportAllocs()
   279  			b.SetBytes(int64(len(buf0)))
   280  			for i := 0; i < b.N; i++ {
   281  				_, err = Decompress(out, &s2)
   282  				if err != nil {
   283  					b.Fatal(err)
   284  				}
   285  			}
   286  		})
   287  	}
   288  }
   289  

View as plain text