1
2
3
4
5 package zstd
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "math/rand"
12 "os"
13 "runtime"
14 "strings"
15 "sync"
16 "testing"
17 "time"
18
19 "github.com/klauspost/compress/zip"
20 "github.com/klauspost/compress/zstd/internal/xxhash"
21 )
22
23 var testWindowSizes = []int{MinWindowSize, 1 << 16, 1 << 22, 1 << 24}
24
25 type testEncOpt struct {
26 name string
27 o []EOption
28 }
29
30 func getEncOpts(cMax int) []testEncOpt {
31 var o []testEncOpt
32 for level := speedNotSet + 1; level < speedLast; level++ {
33 if isRaceTest && level >= SpeedBestCompression {
34 break
35 }
36 for conc := 1; conc <= 4; conc *= 2 {
37 for _, wind := range testWindowSizes {
38 addOpt := func(name string, options ...EOption) {
39 opts := append([]EOption(nil), WithEncoderLevel(level), WithEncoderConcurrency(conc), WithWindowSize(wind))
40 name = fmt.Sprintf("%s-c%d-w%dk-%s", level.String(), conc, wind/1024, name)
41 o = append(o, testEncOpt{name: name, o: append(opts, options...)})
42 }
43 addOpt("default")
44 if testing.Short() {
45 break
46 }
47 addOpt("nocrc", WithEncoderCRC(false))
48 addOpt("lowmem", WithLowerEncoderMem(true))
49 addOpt("alllit", WithAllLitEntropyCompression(true))
50 addOpt("nolit", WithNoEntropyCompression(true))
51 addOpt("pad1k", WithEncoderPadding(1024))
52 addOpt("zerof", WithZeroFrames(true))
53 addOpt("1seg", WithSingleSegment(true))
54 }
55 if testing.Short() && conc == 2 {
56 break
57 }
58 if conc >= cMax {
59 break
60 }
61 }
62 }
63 return o
64 }
65
66 func TestEncoder_EncodeAllSimple(t *testing.T) {
67 in, err := os.ReadFile("testdata/z000028")
68 if err != nil {
69 t.Fatal(err)
70 }
71 dec, err := NewReader(nil)
72 if err != nil {
73 t.Fatal(err)
74 }
75 defer dec.Close()
76
77 in = append(in, in...)
78 for _, opts := range getEncOpts(4) {
79 t.Run(opts.name, func(t *testing.T) {
80 runtime.GC()
81 e, err := NewWriter(nil, opts.o...)
82 if err != nil {
83 t.Fatal(err)
84 }
85 defer e.Close()
86 start := time.Now()
87 dst := e.EncodeAll(in, nil)
88
89 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
90 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
91
92 decoded, err := dec.DecodeAll(dst, nil)
93 if err != nil {
94 t.Error(err, len(decoded))
95 }
96 if !bytes.Equal(decoded, in) {
97 os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
98 os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
99 t.Fatal("Decoded does not match")
100 }
101
102 })
103 }
104 }
105
106 func TestEncoder_EncodeAllConcurrent(t *testing.T) {
107 in, err := os.ReadFile("testdata/z000028")
108 if err != nil {
109 t.Fatal(err)
110 }
111 in = append(in, in...)
112
113
114 n := 400 / runtime.GOMAXPROCS(0)
115 if testing.Short() {
116 n = 20 / runtime.GOMAXPROCS(0)
117 }
118 dec, err := NewReader(nil)
119 if err != nil {
120 t.Fatal(err)
121 }
122 defer dec.Close()
123 for _, opts := range getEncOpts(2) {
124 t.Run(opts.name, func(t *testing.T) {
125 rng := rand.New(rand.NewSource(0x1337))
126 e, err := NewWriter(nil, opts.o...)
127 if err != nil {
128 t.Fatal(err)
129 }
130 defer e.Close()
131 var wg sync.WaitGroup
132 wg.Add(n)
133 for i := 0; i < n; i++ {
134 in := in[rng.Int()&1023:]
135 in = in[:rng.Intn(len(in))]
136 go func() {
137 defer wg.Done()
138 dst := e.EncodeAll(in, nil)
139 if len(dst) > e.MaxEncodedSize(len(in)) {
140 t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in)))
141 }
142
143 decoded, err := dec.DecodeAll(dst, nil)
144 if err != nil {
145 t.Error(err, len(decoded))
146 }
147 if !bytes.Equal(decoded, in) {
148
149
150 t.Error("Decoded does not match")
151 return
152 }
153 }()
154 }
155 wg.Wait()
156
157 })
158 }
159 }
160
161 func TestEncoder_EncodeAllEncodeXML(t *testing.T) {
162 f, err := os.Open("testdata/xml.zst")
163 if err != nil {
164 t.Fatal(err)
165 }
166 defer f.Close()
167
168 dec, err := NewReader(f)
169 if err != nil {
170 t.Fatal(err)
171 }
172 defer dec.Close()
173 in, err := io.ReadAll(dec)
174 if err != nil {
175 t.Fatal(err)
176 }
177 if testing.Short() {
178 in = in[:10000]
179 }
180
181 for level := speedNotSet + 1; level < speedLast; level++ {
182 t.Run(level.String(), func(t *testing.T) {
183 if isRaceTest && level >= SpeedBestCompression {
184 t.SkipNow()
185 }
186 e, err := NewWriter(nil, WithEncoderLevel(level))
187 if err != nil {
188 t.Fatal(err)
189 }
190 defer e.Close()
191 start := time.Now()
192 dst := e.EncodeAll(in, nil)
193 if len(dst) > e.MaxEncodedSize(len(in)) {
194 t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in)))
195 }
196
197 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
198 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
199
200 decoded, err := dec.DecodeAll(dst, nil)
201 if err != nil {
202 t.Error(err, len(decoded))
203 }
204 if !bytes.Equal(decoded, in) {
205 os.WriteFile("testdata/"+t.Name()+"-xml.got", decoded, os.ModePerm)
206 t.Error("Decoded does not match")
207 return
208 }
209
210 })
211 }
212 }
213
214 func TestEncoderRegression(t *testing.T) {
215 defer timeout(4 * time.Minute)()
216 data, err := os.ReadFile("testdata/comp-crashers.zip")
217 if err != nil {
218 t.Fatal(err)
219 }
220
221 dec, err := NewReader(nil)
222 if err != nil {
223 t.Error(err)
224 return
225 }
226 defer dec.Close()
227 for _, opts := range getEncOpts(2) {
228 t.Run(opts.name, func(t *testing.T) {
229 zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
230 if err != nil {
231 t.Fatal(err)
232 }
233 enc, err := NewWriter(
234 nil,
235 opts.o...,
236 )
237 if err != nil {
238 t.Fatal(err)
239 }
240 defer enc.Close()
241
242 for i, tt := range zr.File {
243 if !strings.HasSuffix(t.Name(), "") {
244 continue
245 }
246 if testing.Short() && i > 10 {
247 break
248 }
249
250 t.Run(tt.Name, func(t *testing.T) {
251 r, err := tt.Open()
252 if err != nil {
253 t.Error(err)
254 return
255 }
256 in, err := io.ReadAll(r)
257 if err != nil {
258 t.Error(err)
259 }
260 encoded := enc.EncodeAll(in, nil)
261 if len(encoded) > enc.MaxEncodedSize(len(in)) {
262 t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in)))
263 }
264
265 got, err := dec.DecodeAll(encoded, make([]byte, 0, len(in)))
266 if err != nil {
267 t.Logf("error: %v\nwant: %v\ngot: %v", err, len(in), len(got))
268 t.Fatal(err)
269 }
270
271 var dst bytes.Buffer
272 enc.ResetContentSize(&dst, int64(len(in)))
273 _, err = enc.Write(in)
274 if err != nil {
275 t.Error(err)
276 }
277 err = enc.Close()
278 if err != nil {
279 t.Error(err)
280 }
281 encoded = dst.Bytes()
282 if len(encoded) > enc.MaxEncodedSize(len(in)) {
283 t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in)))
284 }
285 got, err = dec.DecodeAll(encoded, make([]byte, 0, len(in)/2))
286 if err != nil {
287 t.Logf("error: %v\nwant: %v\ngot: %v", err, in, got)
288 t.Error(err)
289 }
290 })
291 }
292 })
293 }
294 }
295
296 func TestEncoder_EncodeAllTwain(t *testing.T) {
297 in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
298 if err != nil {
299 t.Fatal(err)
300 }
301 testWindowSizes := testWindowSizes
302 if testing.Short() {
303 testWindowSizes = []int{1 << 20}
304 }
305
306 dec, err := NewReader(nil)
307 if err != nil {
308 t.Fatal(err)
309 }
310 defer dec.Close()
311
312 for level := speedNotSet + 1; level < speedLast; level++ {
313 t.Run(level.String(), func(t *testing.T) {
314 if isRaceTest && level >= SpeedBestCompression {
315 t.SkipNow()
316 }
317 for _, windowSize := range testWindowSizes {
318 t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
319 e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
320 if err != nil {
321 t.Fatal(err)
322 }
323 defer e.Close()
324 start := time.Now()
325 dst := e.EncodeAll(in, nil)
326 t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
327 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
328 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
329
330 decoded, err := dec.DecodeAll(dst, nil)
331 if err != nil {
332 t.Error(err, len(decoded))
333 }
334 if !bytes.Equal(decoded, in) {
335 os.WriteFile("testdata/"+t.Name()+"-Mark.Twain-Tom.Sawyer.txt.got", decoded, os.ModePerm)
336 t.Fatal("Decoded does not match")
337 }
338 t.Log("Encoded content matched")
339 })
340 }
341 })
342 }
343 }
344
345 func TestEncoder_EncodeRLE(t *testing.T) {
346 in := make([]byte, 1<<20)
347 testWindowSizes := testWindowSizes
348 if testing.Short() {
349 testWindowSizes = []int{1 << 20}
350 }
351
352 dec, err := NewReader(nil)
353 if err != nil {
354 t.Fatal(err)
355 }
356 defer dec.Close()
357
358 for level := speedNotSet + 1; level < speedLast; level++ {
359 t.Run(level.String(), func(t *testing.T) {
360 if isRaceTest && level >= SpeedBestCompression {
361 t.SkipNow()
362 }
363 for _, windowSize := range testWindowSizes {
364 t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
365 e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
366 if err != nil {
367 t.Fatal(err)
368 }
369 defer e.Close()
370 start := time.Now()
371 dst := e.EncodeAll(in, nil)
372 t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
373 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
374 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
375
376 decoded, err := dec.DecodeAll(dst, nil)
377 if err != nil {
378 t.Error(err, len(decoded))
379 }
380 if !bytes.Equal(decoded, in) {
381 os.WriteFile("testdata/"+t.Name()+"-RLE.got", decoded, os.ModePerm)
382 t.Fatal("Decoded does not match")
383 }
384 t.Log("Encoded content matched")
385 })
386 }
387 })
388 }
389 }
390
391 func TestEncoder_EncodeAllPi(t *testing.T) {
392 in, err := os.ReadFile("../testdata/pi.txt")
393 if err != nil {
394 t.Fatal(err)
395 }
396 testWindowSizes := testWindowSizes
397 if testing.Short() {
398 testWindowSizes = []int{1 << 20}
399 }
400
401 dec, err := NewReader(nil)
402 if err != nil {
403 t.Fatal(err)
404 }
405 defer dec.Close()
406
407 for level := speedNotSet + 1; level < speedLast; level++ {
408 t.Run(level.String(), func(t *testing.T) {
409 if isRaceTest && level >= SpeedBestCompression {
410 t.SkipNow()
411 }
412 for _, windowSize := range testWindowSizes {
413 t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
414 e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
415 if err != nil {
416 t.Fatal(err)
417 }
418 defer e.Close()
419 start := time.Now()
420 dst := e.EncodeAll(in, nil)
421 t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
422 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
423 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
424
425 decoded, err := dec.DecodeAll(dst, nil)
426 if err != nil {
427 t.Error(err, len(decoded))
428 }
429 if !bytes.Equal(decoded, in) {
430 os.WriteFile("testdata/"+t.Name()+"-pi.txt.got", decoded, os.ModePerm)
431 t.Fatal("Decoded does not match")
432 }
433 t.Log("Encoded content matched")
434 })
435 }
436 })
437 }
438 }
439
440 func TestWithEncoderPadding(t *testing.T) {
441 n := 100
442 if testing.Short() {
443 n = 2
444 }
445 rng := rand.New(rand.NewSource(0x1337))
446 d, err := NewReader(nil)
447 if err != nil {
448 t.Fatal(err)
449 }
450 defer d.Close()
451
452 for i := 0; i < n; i++ {
453 padding := (rng.Int() & 0xfff) + 1
454 src := make([]byte, (rng.Int()&0xfffff)+1)
455 for i := range src {
456 src[i] = uint8(rng.Uint32()) & 7
457 }
458 e, err := NewWriter(nil, WithEncoderPadding(padding), WithEncoderCRC(rng.Uint32()&1 == 0))
459 if err != nil {
460 t.Fatal(err)
461 }
462
463 dst := e.EncodeAll(src, nil)
464 if len(dst)%padding != 0 {
465 t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
466 }
467 got, err := d.DecodeAll(dst, nil)
468 if err != nil {
469 t.Fatal(err)
470 }
471 if !bytes.Equal(src, got) {
472 t.Fatal("output mismatch")
473 }
474
475 dst = e.EncodeAll(src, make([]byte, rng.Int()&255))
476 if len(dst)%padding != 0 {
477 t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
478 }
479
480
481 var buf bytes.Buffer
482 e.ResetContentSize(&buf, int64(len(src)))
483 _, err = io.Copy(e, bytes.NewBuffer(src))
484 if err != nil {
485 t.Fatal(err)
486 }
487 err = e.Close()
488 if err != nil {
489 t.Fatal(err)
490 }
491 dst = buf.Bytes()
492 if len(dst)%padding != 0 {
493 t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
494 }
495
496 got, err = d.DecodeAll(dst, nil)
497 if err != nil {
498 t.Fatal(err)
499 }
500 if !bytes.Equal(src, got) {
501 t.Fatal("output mismatch")
502 }
503
504 buf.Reset()
505 e.Reset(&buf)
506 _, err = io.Copy(e, bytes.NewBuffer(src))
507 if err != nil {
508 t.Fatal(err)
509 }
510 err = e.Close()
511 if err != nil {
512 t.Fatal(err)
513 }
514 dst = buf.Bytes()
515 if len(dst)%padding != 0 {
516 t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
517 }
518
519 got, err = d.DecodeAll(dst, nil)
520 if err != nil {
521 t.Fatal(err)
522 }
523 if !bytes.Equal(src, got) {
524 t.Fatal("output mismatch")
525 }
526 }
527 }
528 func TestEncoder_EncoderXML(t *testing.T) {
529 testEncoderRoundtrip(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
530 testEncoderRoundtripWriter(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
531 }
532
533 func TestEncoder_EncoderTwain(t *testing.T) {
534 testEncoderRoundtrip(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
535 testEncoderRoundtripWriter(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
536 }
537
538 func TestEncoder_EncoderPi(t *testing.T) {
539 testEncoderRoundtrip(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
540 testEncoderRoundtripWriter(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
541 }
542
543 func TestEncoder_EncoderSilesia(t *testing.T) {
544 testEncoderRoundtrip(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
545 testEncoderRoundtripWriter(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
546 }
547
548 func TestEncoder_EncoderSimple(t *testing.T) {
549 testEncoderRoundtrip(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
550 testEncoderRoundtripWriter(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
551 }
552
553 func TestEncoder_EncoderHTML(t *testing.T) {
554 testEncoderRoundtrip(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
555 testEncoderRoundtripWriter(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
556 }
557
558 func TestEncoder_EncoderEnwik9(t *testing.T) {
559
560
561 }
562
563
564 func testEncoderRoundtrip(t *testing.T, file string, wantCRC []byte) {
565 for _, opt := range getEncOpts(1) {
566 t.Run(opt.name, func(t *testing.T) {
567 opt := opt
568
569 f, err := os.Open(file)
570 if err != nil {
571 if os.IsNotExist(err) {
572 t.Skip("No input file:", file)
573 return
574 }
575 t.Fatal(err)
576 }
577 defer f.Close()
578 if stat, err := f.Stat(); testing.Short() && err == nil {
579 if stat.Size() > 10000 {
580 t.SkipNow()
581 }
582 }
583 input := io.Reader(f)
584 if strings.HasSuffix(file, ".zst") {
585 dec, err := NewReader(f)
586 if err != nil {
587 t.Fatal(err)
588 }
589 input = dec
590 defer dec.Close()
591 }
592
593 pr, pw := io.Pipe()
594 dec2, err := NewReader(pr)
595 if err != nil {
596 t.Fatal(err)
597 }
598 defer dec2.Close()
599
600 enc, err := NewWriter(pw, opt.o...)
601 if err != nil {
602 t.Fatal(err)
603 }
604 defer enc.Close()
605 var wantSize int64
606 start := time.Now()
607 go func() {
608 n, err := enc.ReadFrom(input)
609 if err != nil {
610 t.Error(err)
611 return
612 }
613 wantSize = n
614 err = enc.Close()
615 if err != nil {
616 t.Error(err)
617 return
618 }
619 pw.Close()
620 }()
621 var gotSize int64
622
623
624 d := xxhash.New()
625 if true {
626 gotSize, err = io.Copy(d, dec2)
627 } else {
628 fout, err := os.Create(file + ".got")
629 if err != nil {
630 t.Fatal(err)
631 }
632 gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
633 if err != nil {
634 t.Fatal(err)
635 }
636 }
637 if wantSize != gotSize {
638 t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
639 }
640 if err != nil {
641 t.Fatal(err)
642 }
643 if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
644 t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
645 } else if len(wantCRC) != 8 {
646 t.Logf("Unable to verify CRC: %#v", gotCRC)
647 } else {
648 t.Logf("CRC Verified: %#v", gotCRC)
649 }
650 t.Log("Encoder len", wantSize)
651 mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
652 t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
653 })
654 }
655 }
656
657 type writerWrapper struct {
658 w io.Writer
659 }
660
661 func (w writerWrapper) Write(p []byte) (n int, err error) {
662 return w.w.Write(p)
663 }
664
665
666 func testEncoderRoundtripWriter(t *testing.T, file string, wantCRC []byte) {
667 f, err := os.Open(file)
668 if err != nil {
669 if os.IsNotExist(err) {
670 t.Skip("No input file:", file)
671 return
672 }
673 t.Fatal(err)
674 }
675 defer f.Close()
676 if stat, err := f.Stat(); testing.Short() && err == nil {
677 if stat.Size() > 10000 {
678 t.SkipNow()
679 }
680 }
681 input := io.Reader(f)
682 if strings.HasSuffix(file, ".zst") {
683 dec, err := NewReader(f)
684 if err != nil {
685 t.Fatal(err)
686 }
687 input = dec
688 defer dec.Close()
689 }
690
691 pr, pw := io.Pipe()
692 dec2, err := NewReader(pr)
693 if err != nil {
694 t.Fatal(err)
695 }
696 defer dec2.Close()
697
698 enc, err := NewWriter(pw, WithEncoderCRC(true))
699 if err != nil {
700 t.Fatal(err)
701 }
702 defer enc.Close()
703 encW := writerWrapper{w: enc}
704 var wantSize int64
705 start := time.Now()
706 go func() {
707 n, err := io.CopyBuffer(encW, input, make([]byte, 1337))
708 if err != nil {
709 t.Error(err)
710 return
711 }
712 wantSize = n
713 err = enc.Close()
714 if err != nil {
715 t.Error(err)
716 return
717 }
718 pw.Close()
719 }()
720 var gotSize int64
721
722
723 d := xxhash.New()
724 if true {
725 gotSize, err = io.Copy(d, dec2)
726 } else {
727 fout, err := os.Create(file + ".got")
728 if err != nil {
729 t.Fatal(err)
730 }
731 gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
732 if err != nil {
733 t.Fatal(err)
734 }
735 }
736 if wantSize != gotSize {
737 t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
738 }
739 if err != nil {
740 t.Fatal(err)
741 }
742 if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
743 t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
744 } else if len(wantCRC) != 8 {
745 t.Logf("Unable to verify CRC: %#v", gotCRC)
746 } else {
747 t.Logf("CRC Verified: %#v", gotCRC)
748 }
749 t.Log("Fast Encoder len", wantSize)
750 mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
751 t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
752 }
753
754 func TestEncoder_EncodeAllSilesia(t *testing.T) {
755 if testing.Short() {
756 t.SkipNow()
757 }
758 in, err := os.ReadFile("testdata/silesia.tar")
759 if err != nil {
760 if os.IsNotExist(err) {
761 t.Skip("Missing testdata/silesia.tar")
762 return
763 }
764 t.Fatal(err)
765 }
766
767 var e Encoder
768 start := time.Now()
769 dst := e.EncodeAll(in, nil)
770 t.Log("Fast Encoder len", len(in), "-> zstd len", len(dst))
771 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
772 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
773
774 dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
775 if err != nil {
776 t.Fatal(err)
777 }
778 defer dec.Close()
779 decoded, err := dec.DecodeAll(dst, nil)
780 if err != nil {
781 t.Error(err, len(decoded))
782 }
783 if !bytes.Equal(decoded, in) {
784 os.WriteFile("testdata/"+t.Name()+"-silesia.tar.got", decoded, os.ModePerm)
785 t.Fatal("Decoded does not match")
786 }
787 t.Log("Encoded content matched")
788 }
789
790 func TestEncoderReadFrom(t *testing.T) {
791 buffer := bytes.NewBuffer(nil)
792 encoder, err := NewWriter(buffer)
793 if err != nil {
794 t.Fatal(err)
795 }
796 if _, err := encoder.ReadFrom(strings.NewReader("0")); err != nil {
797 t.Fatal(err)
798 }
799 if err := encoder.Close(); err != nil {
800 t.Fatal(err)
801 }
802
803 dec, _ := NewReader(nil)
804 toDec := buffer.Bytes()
805 toDec = append(toDec, toDec...)
806 decoded, err := dec.DecodeAll(toDec, nil)
807 if err != nil {
808 t.Fatal(err)
809 }
810
811 if !bytes.Equal([]byte("00"), decoded) {
812 t.Logf("encoded: % x\n", buffer.Bytes())
813 t.Fatalf("output mismatch, got %s", string(decoded))
814 }
815 dec.Close()
816 }
817
818 func TestInterleavedWriteReadFrom(t *testing.T) {
819 var encoded bytes.Buffer
820
821 enc, err := NewWriter(&encoded)
822 if err != nil {
823 t.Fatal(err)
824 }
825
826 if _, err := enc.Write([]byte("write1")); err != nil {
827 t.Fatal(err)
828 }
829 if _, err := enc.Write([]byte("write2")); err != nil {
830 t.Fatal(err)
831 }
832 if _, err := enc.ReadFrom(strings.NewReader("readfrom1")); err != nil {
833 t.Fatal(err)
834 }
835 if _, err := enc.Write([]byte("write3")); err != nil {
836 t.Fatal(err)
837 }
838
839 if err := enc.Close(); err != nil {
840 t.Fatal(err)
841 }
842
843 dec, err := NewReader(&encoded)
844 if err != nil {
845 t.Fatal(err)
846 }
847 defer dec.Close()
848
849 gotb, err := io.ReadAll(dec)
850 if err != nil {
851 t.Fatal(err)
852 }
853 got := string(gotb)
854
855 if want := "write1write2readfrom1write3"; got != want {
856 t.Errorf("got decoded %q, want %q", got, want)
857 }
858 }
859
860 func TestEncoder_EncodeAllEmpty(t *testing.T) {
861 if testing.Short() {
862 t.SkipNow()
863 }
864 var in []byte
865
866 for _, opt := range getEncOpts(1) {
867 t.Run(opt.name, func(t *testing.T) {
868 e, err := NewWriter(nil, opt.o...)
869 if err != nil {
870 t.Fatal(err)
871 }
872 defer e.Close()
873 dst := e.EncodeAll(in, nil)
874 t.Log("Block Encoder len", len(in), "-> zstd len", len(dst), dst)
875
876 dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
877 if err != nil {
878 t.Fatal(err)
879 }
880 defer dec.Close()
881 decoded, err := dec.DecodeAll(dst, nil)
882 if err != nil {
883 t.Error(err, len(decoded))
884 }
885 if !bytes.Equal(decoded, in) {
886 t.Fatal("Decoded does not match")
887 }
888
889
890 var buf bytes.Buffer
891 e.Reset(&buf)
892 err = e.Close()
893 if err != nil {
894 t.Fatal(err)
895 }
896 dst = buf.Bytes()
897 t.Log("Buffer Encoder len", len(in), "-> zstd len", len(dst))
898
899 decoded, err = dec.DecodeAll(dst, nil)
900 if err != nil {
901 t.Error(err, len(decoded))
902 }
903 if !bytes.Equal(decoded, in) {
904 t.Fatal("Decoded does not match")
905 }
906
907 t.Log("Encoded content matched")
908 })
909 }
910 }
911
912 func TestEncoder_EncodeAllEnwik9(t *testing.T) {
913 if testing.Short() {
914 t.SkipNow()
915 }
916 file := "testdata/enwik9.zst"
917 f, err := os.Open(file)
918 if err != nil {
919 if os.IsNotExist(err) {
920 t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
921 "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
922 }
923 }
924 defer f.Close()
925
926 dec, err := NewReader(f)
927 if err != nil {
928 t.Fatal(err)
929 }
930 defer dec.Close()
931 in, err := io.ReadAll(dec)
932 if err != nil {
933 t.Fatal(err)
934 }
935
936 start := time.Now()
937 e, err := NewWriter(nil)
938 dst := e.EncodeAll(in, nil)
939 if err != nil {
940 t.Fatal(err)
941 }
942 t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
943 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
944 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
945 decoded, err := dec.DecodeAll(dst, nil)
946 if err != nil {
947 t.Error(err, len(decoded))
948 }
949 if !bytes.Equal(decoded, in) {
950 os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
951 t.Fatal("Decoded does not match")
952 }
953 t.Log("Encoded content matched")
954 }
955
956 func TestEncoder_EncoderStreamEnwik9(t *testing.T) {
957 if testing.Short() {
958 t.SkipNow()
959 }
960 file := "testdata/enwik9.zst"
961 f, err := os.Open(file)
962 if err != nil {
963 if os.IsNotExist(err) {
964 t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
965 "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
966 }
967 }
968 defer f.Close()
969
970 dec, err := NewReader(f)
971 if err != nil {
972 t.Fatal(err)
973 }
974 defer dec.Close()
975 in, err := io.ReadAll(dec)
976 if err != nil {
977 t.Fatal(err)
978 }
979
980 start := time.Now()
981 var dst bytes.Buffer
982 e, err := NewWriter(&dst)
983 if err != nil {
984 t.Fatal(err)
985 }
986 _, err = io.Copy(e, bytes.NewBuffer(in))
987 if err != nil {
988 t.Fatal(err)
989 }
990 e.Close()
991 t.Log("Full Encoder len", len(in), "-> zstd len", dst.Len())
992 mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
993 t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
994 if false {
995 decoded, err := dec.DecodeAll(dst.Bytes(), nil)
996 if err != nil {
997 t.Error(err, len(decoded))
998 }
999 if !bytes.Equal(decoded, in) {
1000 os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
1001 t.Fatal("Decoded does not match")
1002 }
1003 t.Log("Encoded content matched")
1004 }
1005 }
1006
1007 func BenchmarkEncoder_EncodeAllXML(b *testing.B) {
1008 f, err := os.Open("testdata/xml.zst")
1009 if err != nil {
1010 b.Fatal(err)
1011 }
1012 defer f.Close()
1013
1014 dec, err := NewReader(f)
1015 if err != nil {
1016 b.Fatal(err)
1017 }
1018 in, err := io.ReadAll(dec)
1019 if err != nil {
1020 b.Fatal(err)
1021 }
1022 dec.Close()
1023
1024 enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
1025 dst := enc.EncodeAll(in, nil)
1026 wantSize := len(dst)
1027
1028 b.ResetTimer()
1029 b.ReportAllocs()
1030 b.SetBytes(int64(len(in)))
1031 for i := 0; i < b.N; i++ {
1032 dst := enc.EncodeAll(in, dst[:0])
1033 if len(dst) != wantSize {
1034 b.Fatal(len(dst), "!=", wantSize)
1035 }
1036 }
1037 }
1038
1039 func BenchmarkEncoder_EncodeAllSimple(b *testing.B) {
1040 in, err := os.ReadFile("testdata/z000028")
1041 if err != nil {
1042 b.Fatal(err)
1043 }
1044
1045 for level := speedNotSet + 1; level < speedLast; level++ {
1046 b.Run(level.String(), func(b *testing.B) {
1047 enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
1048 if err != nil {
1049 b.Fatal(err)
1050 }
1051 defer enc.Close()
1052 dst := enc.EncodeAll(in, nil)
1053 wantSize := len(dst)
1054 b.ResetTimer()
1055 b.ReportAllocs()
1056 b.SetBytes(int64(len(in)))
1057 for i := 0; i < b.N; i++ {
1058 dst := enc.EncodeAll(in, dst[:0])
1059 if len(dst) != wantSize {
1060 b.Fatal(len(dst), "!=", wantSize)
1061 }
1062 }
1063 })
1064 }
1065 }
1066
1067 func BenchmarkEncoder_EncodeAllSimple4K(b *testing.B) {
1068 in, err := os.ReadFile("testdata/z000028")
1069 if err != nil {
1070 b.Fatal(err)
1071 }
1072 in = in[:4096]
1073
1074 for level := speedNotSet + 1; level < speedLast; level++ {
1075 b.Run(level.String(), func(b *testing.B) {
1076 enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
1077 if err != nil {
1078 b.Fatal(err)
1079 }
1080 defer enc.Close()
1081 dst := enc.EncodeAll(in, nil)
1082 wantSize := len(dst)
1083 b.ResetTimer()
1084 b.ReportAllocs()
1085 b.SetBytes(int64(len(in)))
1086 for i := 0; i < b.N; i++ {
1087 dst := enc.EncodeAll(in, dst[:0])
1088 if len(dst) != wantSize {
1089 b.Fatal(len(dst), "!=", wantSize)
1090 }
1091 }
1092 })
1093 }
1094 }
1095
1096 func BenchmarkEncoder_EncodeAllHTML(b *testing.B) {
1097 in, err := os.ReadFile("../testdata/html.txt")
1098 if err != nil {
1099 b.Fatal(err)
1100 }
1101
1102 enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
1103 dst := enc.EncodeAll(in, nil)
1104 wantSize := len(dst)
1105 b.ResetTimer()
1106 b.ReportAllocs()
1107 b.SetBytes(int64(len(in)))
1108 for i := 0; i < b.N; i++ {
1109 dst := enc.EncodeAll(in, dst[:0])
1110 if len(dst) != wantSize {
1111 b.Fatal(len(dst), "!=", wantSize)
1112 }
1113 }
1114 }
1115
1116 func BenchmarkEncoder_EncodeAllTwain(b *testing.B) {
1117 in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
1118 if err != nil {
1119 b.Fatal(err)
1120 }
1121
1122 enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
1123 dst := enc.EncodeAll(in, nil)
1124 wantSize := len(dst)
1125 b.ResetTimer()
1126 b.ReportAllocs()
1127 b.SetBytes(int64(len(in)))
1128 for i := 0; i < b.N; i++ {
1129 dst := enc.EncodeAll(in, dst[:0])
1130 if len(dst) != wantSize {
1131 b.Fatal(len(dst), "!=", wantSize)
1132 }
1133 }
1134 }
1135
1136 func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
1137 in, err := os.ReadFile("../testdata/pi.txt")
1138 if err != nil {
1139 b.Fatal(err)
1140 }
1141
1142 enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
1143 dst := enc.EncodeAll(in, nil)
1144 wantSize := len(dst)
1145 b.ResetTimer()
1146 b.ReportAllocs()
1147 b.SetBytes(int64(len(in)))
1148 for i := 0; i < b.N; i++ {
1149 dst := enc.EncodeAll(in, dst[:0])
1150 if len(dst) != wantSize {
1151 b.Fatal(len(dst), "!=", wantSize)
1152 }
1153 }
1154 }
1155
1156 func BenchmarkRandom4KEncodeAllFastest(b *testing.B) {
1157 rng := rand.New(rand.NewSource(1))
1158 data := make([]byte, 4<<10)
1159 for i := range data {
1160 data[i] = uint8(rng.Intn(256))
1161 }
1162 enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1))
1163 defer enc.Close()
1164 dst := enc.EncodeAll(data, nil)
1165 wantSize := len(dst)
1166 b.ResetTimer()
1167 b.ReportAllocs()
1168 b.SetBytes(int64(len(data)))
1169 for i := 0; i < b.N; i++ {
1170 dst := enc.EncodeAll(data, dst[:0])
1171 if len(dst) != wantSize {
1172 b.Fatal(len(dst), "!=", wantSize)
1173 }
1174 }
1175 }
1176
1177 func BenchmarkRandom10MBEncodeAllFastest(b *testing.B) {
1178 rng := rand.New(rand.NewSource(1))
1179 data := make([]byte, 10<<20)
1180 rng.Read(data)
1181 enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(2))
1182 defer enc.Close()
1183 dst := enc.EncodeAll(data, nil)
1184 wantSize := len(dst)
1185 b.ResetTimer()
1186 b.ReportAllocs()
1187 b.SetBytes(int64(len(data)))
1188 for i := 0; i < b.N; i++ {
1189 dst := enc.EncodeAll(data, dst[:0])
1190 if len(dst) != wantSize {
1191 b.Fatal(len(dst), "!=", wantSize)
1192 }
1193 }
1194 }
1195
1196 func BenchmarkRandom4KEncodeAllDefault(b *testing.B) {
1197 rng := rand.New(rand.NewSource(1))
1198 data := make([]byte, 4<<10)
1199 rng.Read(data)
1200 enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
1201 defer enc.Close()
1202 dst := enc.EncodeAll(data, nil)
1203 wantSize := len(dst)
1204 b.ResetTimer()
1205 b.ReportAllocs()
1206 b.SetBytes(int64(len(data)))
1207 for i := 0; i < b.N; i++ {
1208 dst := enc.EncodeAll(data, dst[:0])
1209 if len(dst) != wantSize {
1210 b.Fatal(len(dst), "!=", wantSize)
1211 }
1212 }
1213 }
1214
1215 func BenchmarkRandomEncodeAllDefault(b *testing.B) {
1216 rng := rand.New(rand.NewSource(1))
1217 data := make([]byte, 10<<20)
1218 rng.Read(data)
1219 enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
1220 defer enc.Close()
1221 dst := enc.EncodeAll(data, nil)
1222 wantSize := len(dst)
1223 b.ResetTimer()
1224 b.ReportAllocs()
1225 b.SetBytes(int64(len(data)))
1226 for i := 0; i < b.N; i++ {
1227 dst := enc.EncodeAll(data, dst[:0])
1228 if len(dst) != wantSize {
1229 b.Fatal(len(dst), "!=", wantSize)
1230 }
1231 }
1232 }
1233
1234 func BenchmarkRandom10MBEncoderFastest(b *testing.B) {
1235 rng := rand.New(rand.NewSource(1))
1236 data := make([]byte, 10<<20)
1237 rng.Read(data)
1238 wantSize := int64(len(data))
1239 enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedFastest))
1240 defer enc.Close()
1241 n, err := io.Copy(enc, bytes.NewBuffer(data))
1242 if err != nil {
1243 b.Fatal(err)
1244 }
1245 if n != wantSize {
1246 b.Fatal(n, "!=", wantSize)
1247 }
1248 b.ResetTimer()
1249 b.ReportAllocs()
1250 b.SetBytes(wantSize)
1251 for i := 0; i < b.N; i++ {
1252 enc.Reset(io.Discard)
1253 n, err := io.Copy(enc, bytes.NewBuffer(data))
1254 if err != nil {
1255 b.Fatal(err)
1256 }
1257 if n != wantSize {
1258 b.Fatal(n, "!=", wantSize)
1259 }
1260 }
1261 }
1262
1263 func BenchmarkRandomEncoderDefault(b *testing.B) {
1264 rng := rand.New(rand.NewSource(1))
1265 data := make([]byte, 10<<20)
1266 rng.Read(data)
1267 wantSize := int64(len(data))
1268 enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedDefault))
1269 defer enc.Close()
1270 n, err := io.Copy(enc, bytes.NewBuffer(data))
1271 if err != nil {
1272 b.Fatal(err)
1273 }
1274 if n != wantSize {
1275 b.Fatal(n, "!=", wantSize)
1276 }
1277 b.ResetTimer()
1278 b.ReportAllocs()
1279 b.SetBytes(wantSize)
1280 for i := 0; i < b.N; i++ {
1281 enc.Reset(io.Discard)
1282 n, err := io.Copy(enc, bytes.NewBuffer(data))
1283 if err != nil {
1284 b.Fatal(err)
1285 }
1286 if n != wantSize {
1287 b.Fatal(n, "!=", wantSize)
1288 }
1289 }
1290 }
1291
View as plain text