1
2
3
4
5 package s2
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "math/rand"
12 "os"
13 "runtime"
14 "strings"
15 "testing"
16
17 "github.com/klauspost/compress/internal/snapref"
18 "github.com/klauspost/compress/zip"
19 )
20
21 func testOptions(_ testing.TB) map[string][]WriterOption {
22 var testOptions = map[string][]WriterOption{
23 "default": {WriterAddIndex()},
24 "better": {WriterBetterCompression()},
25 "best": {WriterBestCompression()},
26 "none": {WriterUncompressed()},
27 }
28
29 x := make(map[string][]WriterOption)
30 cloneAdd := func(org []WriterOption, add ...WriterOption) []WriterOption {
31 y := make([]WriterOption, len(org)+len(add))
32 copy(y, org)
33 copy(y[len(org):], add)
34 return y
35 }
36 for name, opt := range testOptions {
37 x[name] = opt
38 x[name+"-c1"] = cloneAdd(opt, WriterConcurrency(1))
39 }
40 testOptions = x
41 x = make(map[string][]WriterOption)
42 for name, opt := range testOptions {
43 x[name] = opt
44 if !testing.Short() {
45 x[name+"-4k-win"] = cloneAdd(opt, WriterBlockSize(4<<10))
46 x[name+"-4M-win"] = cloneAdd(opt, WriterBlockSize(4<<20))
47 }
48 }
49 testOptions = x
50 x = make(map[string][]WriterOption)
51 for name, opt := range testOptions {
52 x[name] = opt
53 x[name+"-pad-min"] = cloneAdd(opt, WriterPadding(2), WriterPaddingSrc(zeroReader{}))
54 if !testing.Short() {
55 x[name+"-pad-8000"] = cloneAdd(opt, WriterPadding(8000), WriterPaddingSrc(zeroReader{}))
56 x[name+"-pad-max"] = cloneAdd(opt, WriterPadding(4<<20), WriterPaddingSrc(zeroReader{}))
57 }
58 }
59 for name, opt := range testOptions {
60 x[name] = opt
61 x[name+"-snappy"] = cloneAdd(opt, WriterSnappyCompat())
62 x[name+"-custom"] = cloneAdd(opt, WriterCustomEncoder(snapref.EncodeBlockInto))
63 }
64 testOptions = x
65 return testOptions
66 }
67
68 type zeroReader struct{}
69
70 func (zeroReader) Read(p []byte) (int, error) {
71 for i := range p {
72 p[i] = 0
73 }
74 return len(p), nil
75 }
76
77 func TestEncoderRegression(t *testing.T) {
78 data, err := os.ReadFile("testdata/enc_regressions.zip")
79 if err != nil {
80 t.Fatal(err)
81 }
82 zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
83 if err != nil {
84 t.Fatal(err)
85 }
86
87 test := func(t *testing.T, data []byte) {
88 if testing.Short() && len(data) > 10000 {
89 t.SkipNow()
90 }
91 var blocksTested bool
92 for name, opts := range testOptions(t) {
93 t.Run(name, func(t *testing.T) {
94 var buf bytes.Buffer
95 dec := NewReader(nil)
96 enc := NewWriter(&buf, opts...)
97
98 if !blocksTested {
99 comp := Encode(make([]byte, MaxEncodedLen(len(data))), data)
100 decoded, err := Decode(nil, comp)
101 if err != nil {
102 t.Error(err)
103 return
104 }
105 if !bytes.Equal(data, decoded) {
106 t.Error("block decoder mismatch")
107 return
108 }
109 if mel := MaxEncodedLen(len(data)); len(comp) > mel {
110 t.Error(fmt.Errorf("MaxEncodedLen Exceed: input: %d, mel: %d, got %d", len(data), mel, len(comp)))
111 return
112 }
113 comp = EncodeBetter(make([]byte, MaxEncodedLen(len(data))), data)
114 decoded, err = Decode(nil, comp)
115 if err != nil {
116 t.Error(err)
117 return
118 }
119 if !bytes.Equal(data, decoded) {
120 t.Error("block decoder mismatch")
121 return
122 }
123 if mel := MaxEncodedLen(len(data)); len(comp) > mel {
124 t.Error(fmt.Errorf("MaxEncodedLen Exceed: input: %d, mel: %d, got %d", len(data), mel, len(comp)))
125 return
126 }
127
128 comp = EncodeBest(make([]byte, MaxEncodedLen(len(data))), data)
129 decoded, err = Decode(nil, comp)
130 if err != nil {
131 t.Error(err)
132 return
133 }
134 if !bytes.Equal(data, decoded) {
135 t.Error("block decoder mismatch")
136 return
137 }
138 if mel := MaxEncodedLen(len(data)); len(comp) > mel {
139 t.Error(fmt.Errorf("MaxEncodedLen Exceed: input: %d, mel: %d, got %d", len(data), mel, len(comp)))
140 return
141 }
142 blocksTested = true
143 }
144
145
146 n, err := enc.Write(data)
147 if err != nil {
148 t.Error(err)
149 return
150 }
151 if n != len(data) {
152 t.Error(fmt.Errorf("Write: Short write, want %d, got %d", len(data), n))
153 return
154 }
155 err = enc.Close()
156 if err != nil {
157 t.Error(err)
158 return
159 }
160
161 err = enc.Close()
162 if err != nil {
163 t.Error(err)
164 return
165 }
166 comp := buf.Bytes()
167 if enc.pad > 0 && len(comp)%enc.pad != 0 {
168 t.Error(fmt.Errorf("wanted size to be mutiple of %d, got size %d with remainder %d", enc.pad, len(comp), len(comp)%enc.pad))
169 return
170 }
171 var got []byte
172 if !strings.Contains(name, "-snappy") {
173 dec.Reset(&buf)
174 got, err = io.ReadAll(dec)
175 } else {
176 sdec := snapref.NewReader(&buf)
177 got, err = io.ReadAll(sdec)
178 }
179 if err != nil {
180 t.Error(err)
181 return
182 }
183 if !bytes.Equal(data, got) {
184 t.Error("block (reset) decoder mismatch")
185 return
186 }
187
188
189 buf.Reset()
190 enc.Reset(&buf)
191 n2, err := enc.ReadFrom(bytes.NewBuffer(data))
192 if err != nil {
193 t.Error(err)
194 return
195 }
196 if n2 != int64(len(data)) {
197 t.Error(fmt.Errorf("ReadFrom: Short read, want %d, got %d", len(data), n2))
198 return
199 }
200 err = enc.Close()
201 if err != nil {
202 t.Error(err)
203 return
204 }
205 if enc.pad > 0 && buf.Len()%enc.pad != 0 {
206 t.Error(fmt.Errorf("wanted size to be mutiple of %d, got size %d with remainder %d", enc.pad, buf.Len(), buf.Len()%enc.pad))
207 return
208 }
209 if !strings.Contains(name, "-snappy") {
210 dec.Reset(&buf)
211 got, err = io.ReadAll(dec)
212 } else {
213 sdec := snapref.NewReader(&buf)
214 got, err = io.ReadAll(sdec)
215 }
216 if err != nil {
217 t.Error(err)
218 return
219 }
220 if !bytes.Equal(data, got) {
221 t.Error("frame (reset) decoder mismatch")
222 return
223 }
224 })
225 }
226 }
227 for _, tt := range zr.File {
228 if !strings.HasSuffix(t.Name(), "") {
229 continue
230 }
231 t.Run(tt.Name, func(t *testing.T) {
232 r, err := tt.Open()
233 if err != nil {
234 t.Error(err)
235 return
236 }
237 b, err := io.ReadAll(r)
238 if err != nil {
239 t.Error(err)
240 return
241 }
242 test(t, b[:len(b):len(b)])
243 })
244 }
245 }
246
247 func TestIndex(t *testing.T) {
248 fatalErr := func(t testing.TB, err error) {
249 if err != nil {
250 t.Fatal(err)
251 }
252 }
253
254
255 var input []byte
256 if !testing.Short() {
257 input = make([]byte, 10<<20)
258 } else {
259 input = make([]byte, 500<<10)
260 }
261 rng := rand.New(rand.NewSource(0xabeefcafe))
262 rng.Read(input)
263
264 for i, v := range input {
265 input[i] = '0' + v&3
266 }
267
268 var buf bytes.Buffer
269
270 enc := NewWriter(&buf, WriterBlockSize(100<<10), WriterAddIndex(), WriterBetterCompression(), WriterConcurrency(runtime.GOMAXPROCS(0)))
271 todo := input
272 for len(todo) > 0 {
273
274 x := todo[:rng.Intn(1+len(todo)&65535)]
275 if len(x) == 0 {
276 x = todo[:1]
277 }
278 _, err := enc.Write(x)
279 fatalErr(t, err)
280
281 if rng.Intn(8) == 0 {
282 err = enc.Flush()
283 fatalErr(t, err)
284 }
285 todo = todo[len(x):]
286 }
287
288
289 idxBytes, err := enc.CloseIndex()
290 fatalErr(t, err)
291 if false {
292
293 var index Index
294 _, err = index.Load(idxBytes)
295 fatalErr(t, err)
296 t.Log(string(index.JSON()))
297 }
298
299 compressed := buf.Bytes()
300 for wantOffset := int64(0); wantOffset < int64(len(input)); wantOffset += 65531 {
301 t.Run(fmt.Sprintf("offset-%d", wantOffset), func(t *testing.T) {
302
303
304 want := input[wantOffset:]
305
306
307 var index Index
308 _, err = index.Load(idxBytes)
309 fatalErr(t, err)
310
311
312 compressedOffset, uncompressedOffset, err := index.Find(wantOffset)
313 fatalErr(t, err)
314
315
316
317 in := io.Reader(bytes.NewBuffer(compressed[compressedOffset:]))
318
319
320
321 dec := NewReader(in, ReaderIgnoreStreamIdentifier())
322
323
324
325 toSkip := wantOffset - uncompressedOffset
326 err = dec.Skip(toSkip)
327 fatalErr(t, err)
328
329
330 got, err := io.ReadAll(dec)
331 fatalErr(t, err)
332 if !bytes.Equal(got, want) {
333 t.Error("Result mismatch", wantOffset)
334 }
335
336
337 for i := io.SeekStart; i <= io.SeekEnd; i++ {
338 t.Run(fmt.Sprintf("seek-%d", i), func(t *testing.T) {
339
340 dec = NewReader(bytes.NewReader(compressed))
341
342 rs, err := dec.ReadSeeker(true, nil)
343 fatalErr(t, err)
344
345
346 var tmp = make([]byte, len(input)/2)
347 _, err = io.ReadFull(rs, tmp[:])
348 fatalErr(t, err)
349
350 toSkip := wantOffset
351 switch i {
352 case io.SeekStart:
353 case io.SeekCurrent:
354 toSkip = wantOffset - int64(len(input)/2)
355 case io.SeekEnd:
356 toSkip = -(int64(len(input)) - wantOffset)
357 }
358 gotOffset, err := rs.Seek(toSkip, i)
359 if gotOffset != wantOffset {
360 t.Errorf("got offset %d, want %d", gotOffset, wantOffset)
361 }
362
363 got, err := io.ReadAll(dec)
364 fatalErr(t, err)
365 if !bytes.Equal(got, want) {
366 t.Error("Result mismatch", wantOffset)
367 }
368 })
369 }
370 t.Run("ReadAt", func(t *testing.T) {
371
372 dec = NewReader(bytes.NewReader(compressed))
373
374 rs, err := dec.ReadSeeker(true, nil)
375 fatalErr(t, err)
376
377
378 var tmp = make([]byte, len(input)/2)
379 _, err = io.ReadFull(rs, tmp[:])
380 fatalErr(t, err)
381 wantLen := len(tmp)
382 if wantLen+int(wantOffset) > len(input) {
383 wantLen = len(input) - int(wantOffset)
384 }
385
386 n, err := rs.ReadAt(tmp, wantOffset)
387 if n != wantLen {
388 t.Errorf("got length %d, want %d", n, wantLen)
389 }
390 if err != io.EOF {
391 fatalErr(t, err)
392 }
393 want := want[:n]
394 got := tmp[:n]
395
396
397 if !bytes.Equal(got, want) {
398 t.Error("Result mismatch", wantOffset)
399 }
400 })
401 })
402 }
403 }
404
405 func TestWriterPadding(t *testing.T) {
406 n := 100
407 if testing.Short() {
408 n = 5
409 }
410 rng := rand.New(rand.NewSource(0x1337))
411 d := NewReader(nil)
412
413 for i := 0; i < n; i++ {
414 padding := (rng.Int() & 0xffff) + 1
415 src := make([]byte, (rng.Int()&0xfffff)+1)
416 for i := range src {
417 src[i] = uint8(rng.Uint32()) & 3
418 }
419 var dst bytes.Buffer
420 e := NewWriter(&dst, WriterPadding(padding))
421
422 _, err := io.Copy(e, bytes.NewBuffer(src))
423 if err != nil {
424 t.Fatal(err)
425 }
426 err = e.Close()
427 if err != nil {
428 t.Fatal(err)
429 }
430 err = e.Close()
431 if err != nil {
432 t.Fatal(err)
433 }
434
435 if dst.Len()%padding != 0 {
436 t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, dst.Len(), dst.Len()%padding)
437 }
438 var got bytes.Buffer
439 d.Reset(&dst)
440 _, err = io.Copy(&got, d)
441 if err != nil {
442 t.Fatal(err)
443 }
444 if !bytes.Equal(src, got.Bytes()) {
445 t.Fatal("output mismatch")
446 }
447
448
449 dst.Reset()
450 e.Reset(&dst)
451 _, err = io.Copy(e, bytes.NewBuffer(src))
452 if err != nil {
453 t.Fatal(err)
454 }
455 err = e.Close()
456 if err != nil {
457 t.Fatal(err)
458 }
459 if dst.Len()%padding != 0 {
460 t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, dst.Len(), dst.Len()%padding)
461 }
462
463 got.Reset()
464 d.Reset(&dst)
465 _, err = io.Copy(&got, d)
466 if err != nil {
467 t.Fatal(err)
468 }
469 if !bytes.Equal(src, got.Bytes()) {
470 t.Fatal("output mismatch after reset")
471 }
472 }
473 }
474
475 func TestBigRegularWrites(t *testing.T) {
476 var buf [maxBlockSize * 2]byte
477 dst := bytes.NewBuffer(nil)
478 enc := NewWriter(dst, WriterBestCompression())
479 max := uint8(10)
480 if testing.Short() {
481 max = 4
482 }
483 for n := uint8(0); n < max; n++ {
484 for i := range buf[:] {
485 buf[i] = n
486 }
487
488 _, err := enc.Write(buf[:])
489 if err != nil {
490 t.Fatal(err)
491 }
492 }
493 err := enc.Close()
494 if err != nil {
495 t.Fatal(err)
496 }
497
498 dec := NewReader(dst)
499 _, err = io.Copy(io.Discard, dec)
500 if err != nil {
501 t.Fatal(err)
502 }
503 }
504
505 func TestBigEncodeBuffer(t *testing.T) {
506 const blockSize = 1 << 20
507 var buf [blockSize * 2]byte
508 dst := bytes.NewBuffer(nil)
509 enc := NewWriter(dst, WriterBlockSize(blockSize), WriterBestCompression())
510 max := uint8(10)
511 if testing.Short() {
512 max = 4
513 }
514 for n := uint8(0); n < max; n++ {
515
516 for i := range buf[:] {
517 buf[i] = n
518 }
519 err := enc.EncodeBuffer(buf[:])
520 if err != nil {
521 t.Fatal(err)
522 }
523
524 err = enc.EncodeBuffer(buf[:])
525 if err != nil {
526 t.Fatal(err)
527 }
528 err = enc.Flush()
529 if err != nil {
530 t.Fatal(err)
531 }
532 }
533 err := enc.Close()
534 if err != nil {
535 t.Fatal(err)
536 }
537
538 dec := NewReader(dst)
539 n, err := io.Copy(io.Discard, dec)
540 if err != nil {
541 t.Fatal(err)
542 }
543 t.Log(n)
544 }
545
546 func TestBigEncodeBufferSync(t *testing.T) {
547 const blockSize = 1 << 20
548 var buf [blockSize * 2]byte
549 dst := bytes.NewBuffer(nil)
550 enc := NewWriter(dst, WriterBlockSize(blockSize), WriterConcurrency(1), WriterBestCompression())
551 max := uint8(10)
552 if testing.Short() {
553 max = 2
554 }
555 for n := uint8(0); n < max; n++ {
556
557 for i := range buf[:] {
558 buf[i] = n
559 }
560
561 err := enc.EncodeBuffer(buf[:])
562 if err != nil {
563 t.Fatal(err)
564 }
565 }
566 err := enc.Close()
567 if err != nil {
568 t.Fatal(err)
569 }
570
571 dec := NewReader(dst)
572 n, err := io.Copy(io.Discard, dec)
573 if err != nil {
574 t.Fatal(err)
575 }
576 t.Log(n)
577 }
578
579 func BenchmarkWriterRandom(b *testing.B) {
580 rng := rand.New(rand.NewSource(1))
581
582 data := make([]byte, 4<<20)
583 for i := range data {
584 data[i] = uint8(rng.Intn(256))
585 }
586
587 for name, opts := range testOptions(b) {
588 w := NewWriter(io.Discard, opts...)
589 b.Run(name, func(b *testing.B) {
590 b.ResetTimer()
591 b.ReportAllocs()
592 b.SetBytes(int64(len(data)))
593 for i := 0; i < b.N; i++ {
594 err := w.EncodeBuffer(data)
595 if err != nil {
596 b.Fatal(err)
597 }
598 }
599
600 w.Flush()
601 })
602 w.Close()
603 }
604 }
605
606 func BenchmarkIndexFind(b *testing.B) {
607 fatalErr := func(t testing.TB, err error) {
608 if err != nil {
609 t.Fatal(err)
610 }
611 }
612 for blocks := 1; blocks <= 65536; blocks *= 2 {
613 if blocks == 65536 {
614 blocks = 65535
615 }
616
617 var index Index
618 index.reset(100)
619 index.TotalUncompressed = int64(blocks) * 100
620 index.TotalCompressed = int64(blocks) * 100
621 for i := 0; i < blocks; i++ {
622 err := index.add(int64(i*100), int64(i*100))
623 fatalErr(b, err)
624 }
625
626 rng := rand.New(rand.NewSource(0xabeefcafe))
627 b.Run(fmt.Sprintf("blocks-%d", len(index.info)), func(b *testing.B) {
628 b.ResetTimer()
629 b.ReportAllocs()
630 const prime4bytes = 2654435761
631 rng2 := rng.Int63()
632 for i := 0; i < b.N; i++ {
633 rng2 = ((rng2 + prime4bytes) * prime4bytes) >> 32
634
635 _, _, err := index.Find(rng2 % (int64(blocks) * 100))
636 fatalErr(b, err)
637 }
638 })
639 }
640 }
641
View as plain text