1
2
3
4
5
6 package s2
7
8 import (
9 "bytes"
10 "encoding/binary"
11 "flag"
12 "fmt"
13 "io"
14 "math"
15 "math/rand"
16 "net/http"
17 "os"
18 "path/filepath"
19 "runtime"
20 "strings"
21 "testing"
22
23 "github.com/klauspost/compress/internal/snapref"
24 "github.com/klauspost/compress/zip"
25 "github.com/klauspost/compress/zstd"
26 )
27
28 const maxUint = ^uint(0)
29 const maxInt = int(maxUint >> 1)
30
31 var (
32 download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
33 testdataDir = flag.String("testdataDir", "testdata", "Directory containing the test data")
34 benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data")
35 )
36
37 func TestMaxEncodedLen(t *testing.T) {
38 testSet := []struct {
39 in, out int64
40 }{
41 0: {in: 0, out: 1},
42 1: {in: 1 << 24, out: 1<<24 + int64(binary.PutVarint([]byte{binary.MaxVarintLen32: 0}, int64(1<<24))) + literalExtraSize(1<<24)},
43 2: {in: MaxBlockSize, out: math.MaxUint32},
44 3: {in: math.MaxUint32 - binary.MaxVarintLen32 - literalExtraSize(math.MaxUint32), out: math.MaxUint32},
45 4: {in: math.MaxUint32 - 9, out: -1},
46 5: {in: math.MaxUint32 - 8, out: -1},
47 6: {in: math.MaxUint32 - 7, out: -1},
48 7: {in: math.MaxUint32 - 6, out: -1},
49 8: {in: math.MaxUint32 - 5, out: -1},
50 9: {in: math.MaxUint32 - 4, out: -1},
51 10: {in: math.MaxUint32 - 3, out: -1},
52 11: {in: math.MaxUint32 - 2, out: -1},
53 12: {in: math.MaxUint32 - 1, out: -1},
54 13: {in: math.MaxUint32, out: -1},
55 14: {in: -1, out: -1},
56 15: {in: -2, out: -1},
57 }
58
59 if maxInt == math.MaxInt32 {
60 testSet[2].out = math.MaxInt32
61 testSet[3].out = -1
62 }
63 t.Log("Maxblock:", MaxBlockSize, "reduction:", intReduction)
64
65 for i := int64(0); i < maxBlockSize; i++ {
66 testSet = append(testSet, struct{ in, out int64 }{in: i, out: i + int64(binary.PutVarint([]byte{binary.MaxVarintLen32: 0}, i)) + literalExtraSize(i)})
67 }
68 for i := range testSet {
69 tt := testSet[i]
70 want := tt.out
71 got := int64(MaxEncodedLen(int(tt.in)))
72 if got != want {
73 t.Errorf("test %d: input: %d, want: %d, got: %d", i, tt.in, want, got)
74 }
75 }
76 }
77
78 func cmp(got, want []byte) error {
79 if bytes.Equal(got, want) {
80 return nil
81 }
82 if len(got) != len(want) {
83 return fmt.Errorf("got %d bytes, want %d", len(got), len(want))
84 }
85 for i := range got {
86 if got[i] != want[i] {
87 return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, got[i], want[i])
88 }
89 }
90 return nil
91 }
92
93 func roundtrip(b, ebuf, dbuf []byte) error {
94 bOrg := make([]byte, len(b))
95 copy(bOrg, b)
96 asmEnc := Encode(nil, b)
97 if err := cmp(bOrg, b); err != nil {
98 return fmt.Errorf("src was changed: %v", err)
99 }
100 goEnc := encodeGo(nil, b)
101 if err := cmp(bOrg, b); err != nil {
102 return fmt.Errorf("src was changed: %v", err)
103 }
104
105
106 dGo, err := Decode(nil, goEnc)
107 if err != nil {
108 return fmt.Errorf("decoding error: %v", err)
109 }
110
111 if err := cmp(dGo, b); err != nil {
112 return fmt.Errorf("roundtrip mismatch: %v", err)
113 }
114
115
116 d, err := Decode(nil, asmEnc)
117 if err != nil {
118 return fmt.Errorf("decoding error: %v", err)
119 }
120 if err := cmp(d, b); err != nil {
121 return fmt.Errorf("roundtrip mismatch: %v", err)
122 }
123 d, err = Decode(dbuf, EncodeBetter(ebuf, b))
124 if err != nil {
125 return fmt.Errorf("decoding better error: %v", err)
126 }
127 if err := cmp(d, b); err != nil {
128 return fmt.Errorf("roundtrip better mismatch: %v", err)
129 }
130
131
132 dst := []byte("existing")
133
134 concat, err := ConcatBlocks(dst, Encode(nil, b), EncodeBetter(nil, b), []byte{0}, EncodeSnappy(nil, b))
135 if err != nil {
136 return fmt.Errorf("concat error: %v", err)
137 }
138 if err := cmp(concat[:len(dst)], dst); err != nil {
139 return fmt.Errorf("concat existing mismatch: %v", err)
140 }
141 concat = concat[len(dst):]
142
143 d, _ = Decode(nil, concat)
144 want := append(make([]byte, 0, len(b)*3), b...)
145 want = append(want, b...)
146 want = append(want, b...)
147
148 if err := cmp(d, want); err != nil {
149 return fmt.Errorf("roundtrip concat mismatch: %v", err)
150 }
151
152 return nil
153 }
154
155 func TestEmpty(t *testing.T) {
156 if err := roundtrip(nil, nil, nil); err != nil {
157 t.Fatal(err)
158 }
159 }
160
161 func TestSmallCopy(t *testing.T) {
162 for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
163 for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
164 for i := 0; i < 32; i++ {
165 s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
166 if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
167 t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
168 }
169 }
170 }
171 }
172 }
173
174 func TestSmallRand(t *testing.T) {
175 rng := rand.New(rand.NewSource(1))
176 for n := 1; n < 20000; n += 23 {
177 b := make([]byte, n)
178 for i := range b {
179 b[i] = uint8(rng.Intn(256))
180 }
181 if err := roundtrip(b, nil, nil); err != nil {
182 t.Fatal(err)
183 }
184 }
185 }
186
187 func TestSmallRegular(t *testing.T) {
188 for n := 1; n < 20000; n += 23 {
189 b := make([]byte, n)
190 for i := range b {
191 b[i] = uint8(i%10 + 'a')
192 }
193 if err := roundtrip(b, nil, nil); err != nil {
194 t.Fatal(err)
195 }
196 }
197 }
198
199 func TestSmallRepeat(t *testing.T) {
200 for n := 1; n < 20000; n += 23 {
201 b := make([]byte, n)
202 for i := range b[:n/2] {
203 b[i] = uint8(i * 255 / n)
204 }
205 for i := range b[n/2:] {
206 b[i+n/2] = uint8(i%10 + 'a')
207 }
208 if err := roundtrip(b, nil, nil); err != nil {
209 t.Fatal(err)
210 }
211 }
212 }
213
214 func TestInvalidVarint(t *testing.T) {
215 testCases := []struct {
216 desc string
217 input string
218 }{{
219 "invalid varint, final byte has continuation bit set",
220 "\xff",
221 }, {
222 "invalid varint, value overflows uint64",
223 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
224 }, {
225
226
227
228 "valid varint (as uint64), but value overflows uint32",
229 "\x80\x80\x80\x80\x10",
230 }}
231
232 for _, tc := range testCases {
233 input := []byte(tc.input)
234 if _, err := DecodedLen(input); err != ErrCorrupt {
235 t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
236 }
237 if _, err := Decode(nil, input); err != ErrCorrupt {
238 t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
239 }
240 }
241 }
242
243 func TestDecode(t *testing.T) {
244 lit40Bytes := make([]byte, 40)
245 for i := range lit40Bytes {
246 lit40Bytes[i] = byte(i)
247 }
248 lit40 := string(lit40Bytes)
249
250 testCases := []struct {
251 desc string
252 input string
253 want string
254 wantErr error
255 }{{
256 `decodedLen=0; valid input`,
257 "\x00",
258 "",
259 nil,
260 }, {
261 `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
262 "\x03" + "\x08\xff\xff\xff",
263 "\xff\xff\xff",
264 nil,
265 }, {
266 `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
267 "\x02" + "\x08\xff\xff\xff",
268 "",
269 ErrCorrupt,
270 }, {
271 `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
272 "\x03" + "\x08\xff\xff",
273 "",
274 ErrCorrupt,
275 }, {
276 `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
277 "\x28" + "\x9c" + lit40,
278 lit40,
279 nil,
280 }, {
281 `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
282 "\x01" + "\xf0",
283 "",
284 ErrCorrupt,
285 }, {
286 `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
287 "\x03" + "\xf0\x02\xff\xff\xff",
288 "\xff\xff\xff",
289 nil,
290 }, {
291 `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
292 "\x01" + "\xf4\x00",
293 "",
294 ErrCorrupt,
295 }, {
296 `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
297 "\x03" + "\xf4\x02\x00\xff\xff\xff",
298 "\xff\xff\xff",
299 nil,
300 }, {
301 `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
302 "\x01" + "\xf8\x00\x00",
303 "",
304 ErrCorrupt,
305 }, {
306 `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
307 "\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
308 "\xff\xff\xff",
309 nil,
310 }, {
311 `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
312 "\x01" + "\xfc\x00\x00\x00",
313 "",
314 ErrCorrupt,
315 }, {
316 `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
317 "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
318 "",
319 ErrCorrupt,
320 }, {
321 `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
322 "\x04" + "\xfc\x02\x00\x00\x00\xff",
323 "",
324 ErrCorrupt,
325 }, {
326 `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
327 "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
328 "\xff\xff\xff",
329 nil,
330 }, {
331 `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
332 "\x04" + "\x01",
333 "",
334 ErrCorrupt,
335 }, {
336 `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
337 "\x04" + "\x02\x00",
338 "",
339 ErrCorrupt,
340 }, {
341 `decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`,
342 "\x04" + "\x03\x00\x00\x00",
343 "",
344 ErrCorrupt,
345 }, {
346 `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
347 "\x04" + "\x0cabcd",
348 "abcd",
349 nil,
350 }, {
351 `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
352 "\x0d" + "\x0cabcd" + "\x15\x04",
353 "abcdabcdabcda",
354 nil,
355 }, {
356 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
357 "\x08" + "\x0cabcd" + "\x01\x04",
358 "abcdabcd",
359 nil,
360 }, {
361 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
362 "\x08" + "\x0cabcd" + "\x01\x02",
363 "abcdcdcd",
364 nil,
365 }, {
366 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
367 "\x08" + "\x0cabcd" + "\x01\x01",
368 "abcddddd",
369 nil,
370 }, {
371 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; repeat offset as first match`,
372 "\x08" + "\x0cabcd" + "\x01\x00",
373 "",
374 ErrCorrupt,
375 }, {
376 `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; literal: 'z'; tagCopy1; length=4 offset=0; repeat offset as second match`,
377 "\x0d" + "\x0cabcd" + "\x01\x01" + "\x00z" + "\x01\x00",
378 "abcdddddzzzzz",
379 nil,
380 }, {
381 `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
382 "\x09" + "\x0cabcd" + "\x01\x04",
383 "",
384 ErrCorrupt,
385 }, {
386 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
387 "\x08" + "\x0cabcd" + "\x01\x05",
388 "",
389 ErrCorrupt,
390 }, {
391 `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
392 "\x07" + "\x0cabcd" + "\x01\x04",
393 "",
394 ErrCorrupt,
395 }, {
396 `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
397 "\x06" + "\x0cabcd" + "\x06\x03\x00",
398 "abcdbc",
399 nil,
400 }, {
401 `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`,
402 "\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
403 "abcdbc",
404 nil,
405 }}
406
407 const (
408
409
410
411
412
413
414
415
416 notPresentBase = 0xa0
417 notPresentLen = 37
418 )
419
420 var dBuf [100]byte
421 loop:
422 for i, tc := range testCases {
423 input := []byte(tc.input)
424 for _, x := range input {
425 if notPresentBase <= x && x < notPresentBase+notPresentLen {
426 t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
427 continue loop
428 }
429 }
430
431 dLen, n := binary.Uvarint(input)
432 if n <= 0 {
433 t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
434 continue
435 }
436 if dLen > uint64(len(dBuf)) {
437 t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
438 continue
439 }
440
441 for j := range dBuf {
442 dBuf[j] = byte(notPresentBase + j%notPresentLen)
443 }
444 g, gotErr := Decode(dBuf[:], input)
445 if got := string(g); got != tc.want || gotErr != tc.wantErr {
446 t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v",
447 i, tc.desc, got, gotErr, tc.want, tc.wantErr)
448 continue
449 }
450 for j, x := range dBuf {
451 if uint64(j) < dLen {
452 continue
453 }
454 if w := byte(notPresentBase + j%notPresentLen); x != w {
455 t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
456 i, tc.desc, j, x, w, dBuf)
457 continue loop
458 }
459 }
460 }
461 }
462
463 func TestDecodeCopy4(t *testing.T) {
464 dots := strings.Repeat(".", 65536)
465
466 input := strings.Join([]string{
467 "\x89\x80\x04",
468 "\x0cpqrs",
469 "\xf4\xff\xff" + dots,
470 "\x13\x04\x00\x01\x00",
471 }, "")
472
473 gotBytes, err := Decode(nil, []byte(input))
474 if err != nil {
475 t.Fatal(err)
476 }
477 got := string(gotBytes)
478 want := "pqrs" + dots + "pqrs."
479 if len(got) != len(want) {
480 t.Fatalf("got %d bytes, want %d", len(got), len(want))
481 }
482 if got != want {
483 for i := 0; i < len(got); i++ {
484 if g, w := got[i], want[i]; g != w {
485 t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w)
486 }
487 }
488 }
489 }
490
491
492
493 func TestDecodeLengthOffset(t *testing.T) {
494 const (
495 prefix = "abcdefghijklmnopqr"
496 suffix = "ABCDEFGHIJKLMNOPQR"
497
498
499
500
501
502
503
504
505
506 notPresentBase = 0xa0
507 notPresentLen = 37
508 )
509 var gotBuf, wantBuf, inputBuf [128]byte
510 for length := 1; length <= 18; length++ {
511 for offset := 1; offset <= 18; offset++ {
512 loop:
513 for suffixLen := 0; suffixLen <= 18; suffixLen++ {
514 totalLen := len(prefix) + length + suffixLen
515
516 inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
517 inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
518 inputLen++
519 inputLen += copy(inputBuf[inputLen:], prefix)
520 inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
521 inputBuf[inputLen+1] = byte(offset)
522 inputBuf[inputLen+2] = 0x00
523 inputLen += 3
524 if suffixLen > 0 {
525 inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
526 inputLen++
527 inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
528 }
529 input := inputBuf[:inputLen]
530
531 for i := range gotBuf {
532 gotBuf[i] = byte(notPresentBase + i%notPresentLen)
533 }
534 got, err := Decode(gotBuf[:], input)
535 if err != nil {
536 t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
537 continue
538 }
539
540 wantLen := 0
541 wantLen += copy(wantBuf[wantLen:], prefix)
542 for i := 0; i < length; i++ {
543 wantBuf[wantLen] = wantBuf[wantLen-offset]
544 wantLen++
545 }
546 wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
547 want := wantBuf[:wantLen]
548
549 for _, x := range input {
550 if notPresentBase <= x && x < notPresentBase+notPresentLen {
551 t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
552 length, offset, suffixLen, x, input)
553 continue loop
554 }
555 }
556 for i, x := range gotBuf {
557 if i < totalLen {
558 continue
559 }
560 if w := byte(notPresentBase + i%notPresentLen); x != w {
561 t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
562 "Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
563 length, offset, suffixLen, totalLen, i, x, w, gotBuf)
564 continue loop
565 }
566 }
567 for _, x := range want {
568 if notPresentBase <= x && x < notPresentBase+notPresentLen {
569 t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
570 length, offset, suffixLen, x, want)
571 continue loop
572 }
573 }
574
575 if !bytes.Equal(got, want) {
576 t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x",
577 length, offset, suffixLen, input, got, want)
578 continue
579 }
580 }
581 }
582 }
583 }
584
585 const (
586 goldenText = "Mark.Twain-Tom.Sawyer.txt"
587 goldenCompressed = goldenText + ".rawsnappy"
588 )
589
590 func TestDecodeGoldenInput(t *testing.T) {
591 tDir := filepath.FromSlash(*testdataDir)
592 src, err := os.ReadFile(filepath.Join(tDir, goldenCompressed))
593 if err != nil {
594 t.Fatalf("ReadFile: %v", err)
595 }
596 got, err := Decode(nil, src)
597 if err != nil {
598 t.Fatalf("Decode: %v", err)
599 }
600 want, err := os.ReadFile(filepath.Join(tDir, goldenText))
601 if err != nil {
602 t.Fatalf("ReadFile: %v", err)
603 }
604 if err := cmp(got, want); err != nil {
605 t.Fatal(err)
606 }
607 }
608
609
610
611 func TestSlowForwardCopyOverrun(t *testing.T) {
612 const base = 100
613
614 for length := 1; length < 18; length++ {
615 for offset := 1; offset < 18; offset++ {
616 highWaterMark := base
617 d := base
618 l := length
619 o := offset
620
621
622 for o < 8 {
623 if end := d + 8; highWaterMark < end {
624 highWaterMark = end
625 }
626 l -= o
627 d += o
628 o += o
629 }
630
631
632 a := d
633 d += l
634
635
636 for l > 0 {
637 if end := a + 8; highWaterMark < end {
638 highWaterMark = end
639 }
640 a += 8
641 l -= 8
642 }
643
644 dWant := base + length
645 overrun := highWaterMark - dWant
646 if d != dWant || overrun < 0 || 10 < overrun {
647 t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
648 length, offset, d, overrun, dWant)
649 }
650 }
651 }
652 }
653
654
655 func TestEncoderSkip(t *testing.T) {
656 for ti, origLen := range []int{10 << 10, 256 << 10, 2 << 20, 8 << 20} {
657 if testing.Short() && ti > 1 {
658 break
659 }
660 t.Run(fmt.Sprint(origLen), func(t *testing.T) {
661 src := make([]byte, origLen)
662 rng := rand.New(rand.NewSource(1))
663 firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
664 bonus := secondHalf[len(secondHalf)-origLen/10:]
665 for i := range firstHalf {
666
667 firstHalf[i] = uint8(rng.Intn(256))
668 }
669 for i := range secondHalf {
670
671 secondHalf[i] = uint8(i & 32)
672 }
673 for i := range bonus {
674
675 bonus[i] = uint8(rng.Intn(256))
676 }
677 var dst bytes.Buffer
678 enc := NewWriter(&dst, WriterBlockSize(64<<10))
679 _, err := io.Copy(enc, bytes.NewBuffer(src))
680 if err != nil {
681 t.Fatal(err)
682 }
683 err = enc.Close()
684 if err != nil {
685 t.Fatal(err)
686 }
687 compressed := dst.Bytes()
688 dec := NewReader(nil)
689 for i := 0; i < len(src); i += len(src)/20 - 17 {
690 t.Run(fmt.Sprint("skip-", i), func(t *testing.T) {
691 want := src[i:]
692 dec.Reset(bytes.NewBuffer(compressed))
693
694 read, err := io.CopyN(io.Discard, dec, int64(len(want)/10))
695 if err != nil {
696 t.Fatal(err)
697 }
698
699 want = want[read:]
700 err = dec.Skip(int64(i))
701 if err != nil {
702 t.Fatal(err)
703 }
704 got, err := io.ReadAll(dec)
705 if err != nil {
706 t.Errorf("Skipping %d returned error: %v", i, err)
707 return
708 }
709 if !bytes.Equal(want, got) {
710 t.Log("got len:", len(got))
711 t.Log("want len:", len(want))
712 t.Errorf("Skipping %d did not return correct data (content mismatch)", i)
713 return
714 }
715 })
716 if testing.Short() && i > 0 {
717 return
718 }
719 }
720 })
721 }
722 }
723
724
725
726
727 func TestEncodeNoiseThenRepeats(t *testing.T) {
728 for _, origLen := range []int{256 * 1024, 2048 * 1024} {
729 src := make([]byte, origLen)
730 rng := rand.New(rand.NewSource(1))
731 firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
732 for i := range firstHalf {
733 firstHalf[i] = uint8(rng.Intn(256))
734 }
735 for i := range secondHalf {
736 secondHalf[i] = uint8(i >> 8)
737 }
738 dst := Encode(nil, src)
739 if got, want := len(dst), origLen*3/4; got >= want {
740 t.Fatalf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
741 }
742 t.Log(len(dst))
743 }
744 }
745
746 func TestFramingFormat(t *testing.T) {
747
748
749
750 src := make([]byte, 1e6)
751 rng := rand.New(rand.NewSource(1))
752 for i := 0; i < 10; i++ {
753 if i%2 == 0 {
754 for j := 0; j < 1e5; j++ {
755 src[1e5*i+j] = uint8(rng.Intn(256))
756 }
757 } else {
758 for j := 0; j < 1e5; j++ {
759 src[1e5*i+j] = uint8(i)
760 }
761 }
762 }
763
764 buf := new(bytes.Buffer)
765 bw := NewWriter(buf)
766 if _, err := bw.Write(src); err != nil {
767 t.Fatalf("Write: encoding: %v", err)
768 }
769 err := bw.Close()
770 if err != nil {
771 t.Fatal(err)
772 }
773 dst, err := io.ReadAll(NewReader(buf))
774 if err != nil {
775 t.Fatalf("ReadAll: decoding: %v", err)
776 }
777 if err := cmp(dst, src); err != nil {
778 t.Fatal(err)
779 }
780 }
781
782 func TestFramingFormatBetter(t *testing.T) {
783
784
785
786 src := make([]byte, 1e6)
787 rng := rand.New(rand.NewSource(1))
788 for i := 0; i < 10; i++ {
789 if i%2 == 0 {
790 for j := 0; j < 1e5; j++ {
791 src[1e5*i+j] = uint8(rng.Intn(256))
792 }
793 } else {
794 for j := 0; j < 1e5; j++ {
795 src[1e5*i+j] = uint8(i)
796 }
797 }
798 }
799
800 buf := new(bytes.Buffer)
801 bw := NewWriter(buf, WriterBetterCompression())
802 if _, err := bw.Write(src); err != nil {
803 t.Fatalf("Write: encoding: %v", err)
804 }
805 err := bw.Close()
806 if err != nil {
807 t.Fatal(err)
808 }
809 dst, err := io.ReadAll(NewReader(buf))
810 if err != nil {
811 t.Fatalf("ReadAll: decoding: %v", err)
812 }
813 if err := cmp(dst, src); err != nil {
814 t.Fatal(err)
815 }
816 }
817
818 func TestEmitLiteral(t *testing.T) {
819 testCases := []struct {
820 length int
821 want string
822 }{
823 {1, "\x00"},
824 {2, "\x04"},
825 {59, "\xe8"},
826 {60, "\xec"},
827 {61, "\xf0\x3c"},
828 {62, "\xf0\x3d"},
829 {254, "\xf0\xfd"},
830 {255, "\xf0\xfe"},
831 {256, "\xf0\xff"},
832 {257, "\xf4\x00\x01"},
833 {65534, "\xf4\xfd\xff"},
834 {65535, "\xf4\xfe\xff"},
835 {65536, "\xf4\xff\xff"},
836 }
837
838 dst := make([]byte, 70000)
839 nines := bytes.Repeat([]byte{0x99}, 65536)
840 for _, tc := range testCases {
841 lit := nines[:tc.length]
842 n := emitLiteral(dst, lit)
843 if !bytes.HasSuffix(dst[:n], lit) {
844 t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
845 continue
846 }
847 got := string(dst[:n-tc.length])
848 if got != tc.want {
849 t.Errorf("length=%d:\ngot % x\nwant % x", tc.length, got, tc.want)
850 continue
851 }
852 }
853 }
854
855 func TestEmitCopy(t *testing.T) {
856 testCases := []struct {
857 offset int
858 length int
859 want string
860 }{
861 {8, 04, "\x01\x08"},
862 {8, 11, "\x1d\x08"},
863 {8, 12, "\x2e\x08\x00"},
864 {8, 13, "\x32\x08\x00"},
865 {8, 59, "\xea\x08\x00"},
866 {8, 60, "\xee\x08\x00"},
867 {8, 61, "\xf2\x08\x00"},
868 {8, 62, "\xf6\x08\x00"},
869 {8, 63, "\xfa\x08\x00"},
870 {8, 64, "\xfe\x08\x00"},
871 {8, 65, "\x11\b\x15\x001"},
872 {8, 66, "\x11\b\x15\x002"},
873 {8, 67, "\x11\b\x15\x003"},
874 {8, 68, "\x11\b\x15\x004"},
875 {8, 69, "\x11\b\x15\x005"},
876 {8, 80, "\x11\b\x15\x00@"},
877 {8, 800, "\x11\b\x19\x00\x14\x02"},
878 {8, 800000, "\x11\b\x1d\x00\xf44\v"},
879
880 {256, 04, "\x21\x00"},
881 {256, 11, "\x3d\x00"},
882 {256, 12, "\x2e\x00\x01"},
883 {256, 13, "\x32\x00\x01"},
884 {256, 59, "\xea\x00\x01"},
885 {256, 60, "\xee\x00\x01"},
886 {256, 61, "\xf2\x00\x01"},
887 {256, 62, "\xf6\x00\x01"},
888 {256, 63, "\xfa\x00\x01"},
889 {256, 64, "\xfe\x00\x01"},
890 {256, 65, "1\x00\x15\x001"},
891 {256, 66, "1\x00\x15\x002"},
892 {256, 67, "1\x00\x15\x003"},
893 {256, 68, "1\x00\x15\x004"},
894 {256, 69, "1\x00\x15\x005"},
895 {256, 80, "1\x00\x15\x00@"},
896 {256, 800, "1\x00\x19\x00\x14\x02"},
897 {256, 80000, "1\x00\x1d\x00t8\x00"},
898
899 {2048, 04, "\x0e\x00\x08"},
900 {2048, 11, "\x2a\x00\x08"},
901 {2048, 12, "\x2e\x00\x08"},
902 {2048, 13, "\x32\x00\x08"},
903 {2048, 59, "\xea\x00\x08"},
904 {2048, 60, "\xee\x00\x08"},
905 {2048, 61, "\xf2\x00\x08"},
906 {2048, 62, "\xf6\x00\x08"},
907 {2048, 63, "\xfa\x00\x08"},
908 {2048, 64, "\xfe\x00\x08"},
909 {2048, 65, "\xee\x00\x08\x05\x00"},
910 {2048, 66, "\xee\x00\x08\x09\x00"},
911 {2048, 67, "\xee\x00\x08\x0d\x00"},
912 {2048, 68, "\xee\x00\x08\x11\x00"},
913 {2048, 69, "\xee\x00\x08\x15\x00\x01"},
914 {2048, 80, "\xee\x00\x08\x15\x00\x0c"},
915 {2048, 800, "\xee\x00\x08\x19\x00\xe0\x01"},
916 {2048, 80000, "\xee\x00\x08\x1d\x00\x40\x38\x00"},
917
918 {204800, 04, "\x0f\x00\x20\x03\x00"},
919 {204800, 65, "\xff\x00\x20\x03\x00\x03\x00\x20\x03\x00"},
920 {204800, 69, "\xff\x00\x20\x03\x00\x05\x00"},
921 {204800, 800, "\xff\x00\x20\x03\x00\x19\x00\xdc\x01"},
922 {204800, 80000, "\xff\x00\x20\x03\x00\x1d\x00\x3c\x38\x00"},
923 }
924
925 dst := make([]byte, 1024)
926 for _, tc := range testCases {
927 n := emitCopy(dst, tc.offset, tc.length)
928 got := string(dst[:n])
929 if got != tc.want {
930 t.Errorf("offset=%d, length=%d:\ngot %q\nwant %q", tc.offset, tc.length, got, tc.want)
931 }
932 }
933 }
934
935 func TestNewWriter(t *testing.T) {
936
937
938
939
940 inputs := [][]byte{
941 bytes.Repeat([]byte{'a'}, 40000),
942 bytes.Repeat([]byte{'b'}, 150000),
943 bytes.Repeat([]byte{'c'}, 60000),
944 bytes.Repeat([]byte{'d'}, 120000),
945 bytes.Repeat([]byte{'e'}, 30000),
946 }
947 loop:
948 for i := 0; i < 1<<uint(len(inputs)); i++ {
949 var want []byte
950 buf := new(bytes.Buffer)
951 w := NewWriter(buf)
952 for j, input := range inputs {
953 if i&(1<<uint(j)) == 0 {
954 continue
955 }
956 if _, err := w.Write(input); err != nil {
957 t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
958 continue loop
959 }
960 want = append(want, input...)
961 }
962 if err := w.Close(); err != nil {
963 t.Errorf("i=%#02x: Close: %v", i, err)
964 continue
965 }
966 got, err := io.ReadAll(NewReader(buf))
967 if err != nil {
968 t.Errorf("i=%#02x: ReadAll: %v", i, err)
969 continue
970 }
971 if err := cmp(got, want); err != nil {
972 t.Errorf("i=%#02x: %v", i, err)
973 continue
974 }
975 }
976 }
977
978 func TestFlush(t *testing.T) {
979 buf := new(bytes.Buffer)
980 w := NewWriter(buf)
981 defer w.Close()
982 if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
983 t.Fatalf("Write: %v", err)
984 }
985 if n := buf.Len(); n != 0 {
986 t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
987 }
988 if err := w.Flush(); err != nil {
989 t.Fatalf("Flush: %v", err)
990 }
991 if n := buf.Len(); n == 0 {
992 t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
993 }
994 }
995
996 func TestReaderUncompressedDataOK(t *testing.T) {
997 r := NewReader(strings.NewReader(magicChunk +
998 "\x01\x08\x00\x00" +
999 "\x68\x10\xe6\xb6" +
1000 "\x61\x62\x63\x64",
1001 ))
1002 g, err := io.ReadAll(r)
1003 if err != nil {
1004 t.Fatal(err)
1005 }
1006 if got, want := string(g), "abcd"; got != want {
1007 t.Fatalf("got %q, want %q", got, want)
1008 }
1009 }
1010
1011 func TestReaderUncompressedDataNoPayload(t *testing.T) {
1012 r := NewReader(strings.NewReader(magicChunk +
1013 "\x01\x04\x00\x00" +
1014 "",
1015 ))
1016 if _, err := io.ReadAll(r); err != ErrCorrupt {
1017 t.Fatalf("got %v, want %v", err, ErrCorrupt)
1018 }
1019 }
1020
1021 func TestReaderUncompressedDataTooLong(t *testing.T) {
1022
1023 n := maxBlockSize + checksumSize
1024 n32 := uint32(n)
1025 r := NewReader(strings.NewReader(magicChunk +
1026
1027 string([]byte{chunkTypeUncompressedData, uint8(n32), uint8(n32 >> 8), uint8(n32 >> 16)}) +
1028 strings.Repeat("\x00", n),
1029 ))
1030
1031 if _, err := io.ReadAll(r); err != ErrCRC {
1032 t.Fatalf("got %v, want %v", err, ErrCRC)
1033 }
1034
1035
1036 n++
1037 n32 = uint32(n)
1038 r = NewReader(strings.NewReader(magicChunk +
1039
1040 string([]byte{chunkTypeUncompressedData, uint8(n32), uint8(n32 >> 8), uint8(n32 >> 16)}) +
1041 strings.Repeat("\x00", n),
1042 ))
1043 if _, err := io.ReadAll(r); err != ErrCorrupt {
1044 t.Fatalf("got %v, want %v", err, ErrCorrupt)
1045 }
1046 }
1047
1048 func TestReaderReset(t *testing.T) {
1049 gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
1050 buf := new(bytes.Buffer)
1051 w := NewWriter(buf)
1052 _, err := w.Write(gold)
1053 if err != nil {
1054 t.Fatalf("Write: %v", err)
1055 }
1056 err = w.Close()
1057 if err != nil {
1058 t.Fatalf("Close: %v", err)
1059 }
1060
1061 encoded, invalid, partial := buf.String(), "invalid", "partial"
1062 r := NewReader(nil)
1063 for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
1064 if s == partial {
1065 r.Reset(strings.NewReader(encoded))
1066 if _, err := r.Read(make([]byte, 101)); err != nil {
1067 t.Errorf("#%d: %v", i, err)
1068 continue
1069 }
1070 continue
1071 }
1072 r.Reset(strings.NewReader(s))
1073 got, err := io.ReadAll(r)
1074 switch s {
1075 case encoded:
1076 if err != nil {
1077 t.Errorf("#%d: %v", i, err)
1078 continue
1079 }
1080 if err := cmp(got, gold); err != nil {
1081 t.Errorf("#%d: %v", i, err)
1082 continue
1083 }
1084 case invalid:
1085 if err == nil {
1086 t.Errorf("#%d: got nil error, want non-nil", i)
1087 continue
1088 }
1089 }
1090 }
1091 }
1092
1093 func TestWriterReset(t *testing.T) {
1094 gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
1095 const n = 20
1096 w := NewWriter(nil)
1097 defer w.Close()
1098
1099 var gots, wants [][]byte
1100 failed := false
1101 for i := 0; i <= n; i++ {
1102 buf := new(bytes.Buffer)
1103 w.Reset(buf)
1104 want := gold[:len(gold)*i/n]
1105 if _, err := w.Write(want); err != nil {
1106 t.Errorf("#%d: Write: %v", i, err)
1107 failed = true
1108 continue
1109 }
1110 if err := w.Flush(); err != nil {
1111 t.Errorf("#%d: Flush: %v", i, err)
1112 failed = true
1113 got, err := io.ReadAll(NewReader(buf))
1114 if err != nil {
1115 t.Errorf("#%d: ReadAll: %v", i, err)
1116 failed = true
1117 continue
1118 }
1119 gots = append(gots, got)
1120 wants = append(wants, want)
1121 }
1122 if failed {
1123 continue
1124 }
1125 for i := range gots {
1126 if err := cmp(gots[i], wants[i]); err != nil {
1127 t.Errorf("#%d: %v", i, err)
1128 }
1129 }
1130 }
1131 }
1132
1133 func TestWriterResetWithoutFlush(t *testing.T) {
1134 buf0 := new(bytes.Buffer)
1135 buf1 := new(bytes.Buffer)
1136 w := NewWriter(buf0)
1137 if _, err := w.Write([]byte("xxx")); err != nil {
1138 t.Fatalf("Write #0: %v", err)
1139 }
1140
1141 w.Reset(buf1)
1142 if _, err := w.Write([]byte("yyy")); err != nil {
1143 t.Fatalf("Write #1: %v", err)
1144 }
1145 if err := w.Flush(); err != nil {
1146 t.Fatalf("Flush: %v", err)
1147 }
1148 got, err := io.ReadAll(NewReader(buf1))
1149 if err != nil {
1150 t.Fatalf("ReadAll: %v", err)
1151 }
1152 if err := cmp(got, []byte("yyy")); err != nil {
1153 t.Fatal(err)
1154 }
1155 }
1156
1157 type writeCounter int
1158
1159 func (c *writeCounter) Write(p []byte) (int, error) {
1160 *c++
1161 return len(p), nil
1162 }
1163
1164
1165
1166
1167 func TestNumUnderlyingWrites(t *testing.T) {
1168 testCases := []struct {
1169 input []byte
1170 want int
1171 }{
1172
1173 {bytes.Repeat([]byte{'x'}, 100), 2},
1174
1175 {bytes.Repeat([]byte{'y'}, 100), 1},
1176 {[]byte("ABCDEFGHIJKLMNOPQRST"), 1},
1177 }
1178
1179
1180 if runtime.GOMAXPROCS(0) == 1 {
1181 testCases[2].want++
1182 }
1183 var c writeCounter
1184 w := NewWriter(&c)
1185 defer w.Close()
1186 for i, tc := range testCases {
1187 c = 0
1188 if _, err := w.Write(tc.input); err != nil {
1189 t.Errorf("#%d: Write: %v", i, err)
1190 continue
1191 }
1192 if err := w.Flush(); err != nil {
1193 t.Errorf("#%d: Flush: %v", i, err)
1194 continue
1195 }
1196 if int(c) != tc.want {
1197 t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
1198 continue
1199 }
1200 }
1201 }
1202
1203 func testWriterRoundtrip(t *testing.T, src []byte, opts ...WriterOption) {
1204 var buf bytes.Buffer
1205 enc := NewWriter(&buf, opts...)
1206 n, err := enc.Write(src)
1207 if err != nil {
1208 t.Error(err)
1209 return
1210 }
1211 if n != len(src) {
1212 t.Error(io.ErrShortWrite)
1213 return
1214 }
1215 err = enc.Flush()
1216 if err != nil {
1217 t.Error(err)
1218 return
1219 }
1220
1221 err = enc.Flush()
1222 if err != nil {
1223 t.Error(err)
1224 return
1225 }
1226 err = enc.Close()
1227 if err != nil {
1228 t.Error(err)
1229 return
1230 }
1231
1232 t.Logf("encoded to %d -> %d bytes", len(src), buf.Len())
1233 dec := NewReader(&buf)
1234 decoded, err := io.ReadAll(dec)
1235 if err != nil {
1236 t.Error(err)
1237 return
1238 }
1239 if len(decoded) != len(src) {
1240 t.Error("decoded len:", len(decoded), "!=", len(src))
1241 return
1242 }
1243 err = cmp(src, decoded)
1244 if err != nil {
1245 t.Error(err)
1246 }
1247 }
1248
1249 func testBlockRoundtrip(t *testing.T, src []byte) {
1250 dst := Encode(nil, src)
1251 t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
1252 decoded, err := Decode(nil, dst)
1253 if err != nil {
1254 t.Error(err)
1255 return
1256 }
1257 if len(decoded) != len(src) {
1258 t.Error("decoded len:", len(decoded), "!=", len(src))
1259 return
1260 }
1261 err = cmp(decoded, src)
1262 if err != nil {
1263 t.Error(err)
1264 }
1265 }
1266
1267 func testBetterBlockRoundtrip(t *testing.T, src []byte) {
1268 dst := EncodeBetter(nil, src)
1269 t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
1270 decoded, err := Decode(nil, dst)
1271 if err != nil {
1272 t.Error(err)
1273 return
1274 }
1275 if len(decoded) != len(src) {
1276 t.Error("decoded len:", len(decoded), "!=", len(src))
1277 return
1278 }
1279 err = cmp(src, decoded)
1280 if err != nil {
1281 t.Error(err)
1282 }
1283 }
1284
1285 func testBestBlockRoundtrip(t *testing.T, src []byte) {
1286 dst := EncodeBest(nil, src)
1287 t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
1288 decoded, err := Decode(nil, dst)
1289 if err != nil {
1290 t.Error(err)
1291 return
1292 }
1293 if len(decoded) != len(src) {
1294 t.Error("decoded len:", len(decoded), "!=", len(src))
1295 return
1296 }
1297 err = cmp(src, decoded)
1298 if err != nil {
1299 t.Error(err)
1300 }
1301 }
1302
1303 func testSnappyBlockRoundtrip(t *testing.T, src []byte) {
1304
1305 t.Run("regular", func(t *testing.T) {
1306 dst := EncodeSnappy(nil, src)
1307 t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
1308 decoded, err := snapref.Decode(nil, dst)
1309 if err != nil {
1310 t.Error(err)
1311 return
1312 }
1313 if len(decoded) != len(src) {
1314 t.Error("decoded len:", len(decoded), "!=", len(src))
1315 return
1316 }
1317 err = cmp(src, decoded)
1318 if err != nil {
1319 t.Error(err)
1320 }
1321 })
1322 t.Run("better", func(t *testing.T) {
1323 dst := EncodeSnappyBetter(nil, src)
1324 t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
1325 decoded, err := snapref.Decode(nil, dst)
1326 if err != nil {
1327 t.Error(err)
1328 return
1329 }
1330 if len(decoded) != len(src) {
1331 t.Error("decoded len:", len(decoded), "!=", len(src))
1332 return
1333 }
1334 err = cmp(src, decoded)
1335 if err != nil {
1336 t.Error(err)
1337 }
1338 })
1339 t.Run("best", func(t *testing.T) {
1340 dst := EncodeSnappyBest(nil, src)
1341 t.Logf("encoded to %d -> %d bytes", len(src), len(dst))
1342 decoded, err := snapref.Decode(nil, dst)
1343 if err != nil {
1344 t.Error(err)
1345 return
1346 }
1347 if len(decoded) != len(src) {
1348 t.Error("decoded len:", len(decoded), "!=", len(src))
1349 return
1350 }
1351 err = cmp(src, decoded)
1352 if err != nil {
1353 t.Error(err)
1354 }
1355 })
1356 }
1357
1358 func testSnappyDecode(t *testing.T, src []byte) {
1359 var buf bytes.Buffer
1360 enc := snapref.NewBufferedWriter(&buf)
1361 n, err := enc.Write(src)
1362 if err != nil {
1363 t.Error(err)
1364 return
1365 }
1366 if n != len(src) {
1367 t.Error(io.ErrShortWrite)
1368 return
1369 }
1370 enc.Close()
1371 t.Logf("encoded to %d -> %d bytes", len(src), buf.Len())
1372 dec := NewReader(&buf)
1373 decoded, err := io.ReadAll(dec)
1374 if err != nil {
1375 t.Error(err)
1376 return
1377 }
1378 if len(decoded) != len(src) {
1379 t.Error("decoded len:", len(decoded), "!=", len(src))
1380 return
1381 }
1382 err = cmp(src, decoded)
1383 if err != nil {
1384 t.Error(err)
1385 }
1386 }
1387
1388 func benchDecode(b *testing.B, src []byte) {
1389 b.Run("default", func(b *testing.B) {
1390 encoded := Encode(nil, src)
1391 b.SetBytes(int64(len(src)))
1392 b.ReportAllocs()
1393 b.ResetTimer()
1394 for i := 0; i < b.N; i++ {
1395 _, err := Decode(src[:0], encoded)
1396 if err != nil {
1397 b.Fatal(err)
1398 }
1399 }
1400 b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
1401 })
1402 b.Run("better", func(b *testing.B) {
1403 encoded := EncodeBetter(nil, src)
1404 b.SetBytes(int64(len(src)))
1405 b.ReportAllocs()
1406 b.ResetTimer()
1407 for i := 0; i < b.N; i++ {
1408 _, err := Decode(src[:0], encoded)
1409 if err != nil {
1410 b.Fatal(err)
1411 }
1412 }
1413 b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
1414 })
1415 b.Run("best", func(b *testing.B) {
1416 encoded := EncodeBest(nil, src)
1417 b.SetBytes(int64(len(src)))
1418 b.ReportAllocs()
1419 b.ResetTimer()
1420 for i := 0; i < b.N; i++ {
1421 _, err := Decode(src[:0], encoded)
1422 if err != nil {
1423 b.Fatal(err)
1424 }
1425 }
1426 b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
1427 })
1428 b.Run("snappy-input", func(b *testing.B) {
1429 encoded := snapref.Encode(nil, src)
1430 b.SetBytes(int64(len(src)))
1431 b.ReportAllocs()
1432 b.ResetTimer()
1433 for i := 0; i < b.N; i++ {
1434 _, err := Decode(src[:0], encoded)
1435 if err != nil {
1436 b.Fatal(err)
1437 }
1438 }
1439 b.ReportMetric(100*float64(len(encoded))/float64(len(src)), "pct")
1440 })
1441 }
1442
1443 func benchEncode(b *testing.B, src []byte) {
1444
1445 dst := make([]byte, snapref.MaxEncodedLen(len(src)))
1446 b.ResetTimer()
1447 b.Run("default", func(b *testing.B) {
1448 b.SetBytes(int64(len(src)))
1449 b.ReportAllocs()
1450 for i := 0; i < b.N; i++ {
1451 Encode(dst, src)
1452 }
1453 b.ReportMetric(100*float64(len(Encode(dst, src)))/float64(len(src)), "pct")
1454 })
1455 b.Run("better", func(b *testing.B) {
1456 b.SetBytes(int64(len(src)))
1457 b.ReportAllocs()
1458 for i := 0; i < b.N; i++ {
1459 EncodeBetter(dst, src)
1460 }
1461 b.ReportMetric(100*float64(len(EncodeBetter(dst, src)))/float64(len(src)), "pct")
1462 })
1463 b.Run("best", func(b *testing.B) {
1464 b.SetBytes(int64(len(src)))
1465 b.ReportAllocs()
1466 for i := 0; i < b.N; i++ {
1467 EncodeBest(dst, src)
1468 }
1469 b.ReportMetric(100*float64(len(EncodeBest(dst, src)))/float64(len(src)), "pct")
1470 })
1471 b.Run("snappy-default", func(b *testing.B) {
1472 b.SetBytes(int64(len(src)))
1473 b.ReportAllocs()
1474 for i := 0; i < b.N; i++ {
1475 EncodeSnappy(dst, src)
1476 }
1477 b.ReportMetric(100*float64(len(EncodeSnappy(dst, src)))/float64(len(src)), "pct")
1478 })
1479 b.Run("snappy-better", func(b *testing.B) {
1480 b.SetBytes(int64(len(src)))
1481 b.ReportAllocs()
1482 for i := 0; i < b.N; i++ {
1483 EncodeSnappyBetter(dst, src)
1484 }
1485 b.ReportMetric(100*float64(len(EncodeSnappyBetter(dst, src)))/float64(len(src)), "pct")
1486 })
1487 b.Run("snappy-best", func(b *testing.B) {
1488 b.SetBytes(int64(len(src)))
1489 b.ReportAllocs()
1490 for i := 0; i < b.N; i++ {
1491 EncodeSnappyBest(dst, src)
1492 }
1493 b.ReportMetric(100*float64(len(EncodeSnappyBest(dst, src)))/float64(len(src)), "pct")
1494 })
1495 b.Run("snappy-ref-noasm", func(b *testing.B) {
1496 b.SetBytes(int64(len(src)))
1497 b.ReportAllocs()
1498 for i := 0; i < b.N; i++ {
1499 snapref.Encode(dst, src)
1500 }
1501 b.ReportMetric(100*float64(len(snapref.Encode(dst, src)))/float64(len(src)), "pct")
1502 })
1503 }
1504
1505 func testOrBenchmark(b testing.TB) string {
1506 if _, ok := b.(*testing.B); ok {
1507 return "benchmark"
1508 }
1509 return "test"
1510 }
1511
1512 func readFile(b testing.TB, filename string) []byte {
1513 src, err := os.ReadFile(filename)
1514 if err != nil {
1515 b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
1516 }
1517 if len(src) == 0 {
1518 b.Fatalf("%s has zero length", filename)
1519 }
1520 return src
1521 }
1522
1523
1524 func expand(src []byte, n int) []byte {
1525 dst := make([]byte, n)
1526 cnt := uint8(0)
1527 for x := dst; len(x) > 0; cnt++ {
1528 idx := copy(x, src)
1529 for i := range x {
1530 if i >= len(src) {
1531 break
1532 }
1533 x[i] = src[i] ^ cnt
1534 }
1535 x = x[idx:]
1536 }
1537 return dst
1538 }
1539
1540 func benchTwain(b *testing.B, n int, decode bool) {
1541 data := expand(readFile(b, "../testdata/Mark.Twain-Tom.Sawyer.txt"), n)
1542 if decode {
1543 benchDecode(b, data)
1544 } else {
1545 benchEncode(b, data)
1546 }
1547 }
1548
1549 func BenchmarkTwainDecode1e1(b *testing.B) { benchTwain(b, 1e1, true) }
1550 func BenchmarkTwainDecode1e2(b *testing.B) { benchTwain(b, 1e2, true) }
1551 func BenchmarkTwainDecode1e3(b *testing.B) { benchTwain(b, 1e3, true) }
1552 func BenchmarkTwainDecode1e4(b *testing.B) { benchTwain(b, 1e4, true) }
1553 func BenchmarkTwainDecode1e5(b *testing.B) { benchTwain(b, 1e5, true) }
1554 func BenchmarkTwainDecode1e6(b *testing.B) { benchTwain(b, 1e6, true) }
1555 func BenchmarkTwainDecode1e7(b *testing.B) { benchTwain(b, 1e7, true) }
1556 func BenchmarkTwainEncode1e1(b *testing.B) { benchTwain(b, 1e1, false) }
1557 func BenchmarkTwainEncode1e2(b *testing.B) { benchTwain(b, 1e2, false) }
1558 func BenchmarkTwainEncode1e3(b *testing.B) { benchTwain(b, 1e3, false) }
1559 func BenchmarkTwainEncode1e4(b *testing.B) { benchTwain(b, 1e4, false) }
1560 func BenchmarkTwainEncode1e5(b *testing.B) { benchTwain(b, 1e5, false) }
1561 func BenchmarkTwainEncode1e6(b *testing.B) { benchTwain(b, 1e6, false) }
1562 func BenchmarkTwainEncode1e7(b *testing.B) { benchTwain(b, 1e7, false) }
1563
1564 func BenchmarkRandomEncodeBlock1MB(b *testing.B) {
1565 rng := rand.New(rand.NewSource(1))
1566 data := make([]byte, 1<<20)
1567 for i := range data {
1568 data[i] = uint8(rng.Intn(256))
1569 }
1570 benchEncode(b, data)
1571 }
1572
1573 func BenchmarkRandomEncodeBetterBlock16MB(b *testing.B) {
1574 rng := rand.New(rand.NewSource(1))
1575 data := make([]byte, 16<<20)
1576 for i := range data {
1577 data[i] = uint8(rng.Intn(256))
1578 }
1579 benchEncode(b, data)
1580 }
1581
1582
1583
1584
1585 var testFiles = []struct {
1586 label string
1587 filename string
1588 sizeLimit int
1589 }{
1590 {"html", "html", 0},
1591 {"urls", "urls.10K", 0},
1592 {"jpg", "fireworks.jpeg", 0},
1593 {"jpg_200b", "fireworks.jpeg", 200},
1594 {"pdf", "paper-100k.pdf", 0},
1595 {"html4", "html_x_4", 0},
1596 {"txt1", "alice29.txt", 0},
1597 {"txt2", "asyoulik.txt", 0},
1598 {"txt3", "lcet10.txt", 0},
1599 {"txt4", "plrabn12.txt", 0},
1600 {"pb", "geo.protodata", 0},
1601 {"gaviota", "kppkn.gtb", 0},
1602 {"txt1_128b", "alice29.txt", 128},
1603 {"txt1_1000b", "alice29.txt", 1000},
1604 {"txt1_10000b", "alice29.txt", 10000},
1605 {"txt1_20000b", "alice29.txt", 20000},
1606 }
1607
1608 const (
1609
1610 benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
1611 )
1612
1613 func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
1614 bDir := filepath.FromSlash(*benchdataDir)
1615 filename := filepath.Join(bDir, basename)
1616 if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
1617 return nil
1618 }
1619
1620 if !*download {
1621 b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
1622 }
1623
1624
1625 if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) {
1626 return fmt.Errorf("failed to create %s: %s", bDir, err)
1627 }
1628
1629 f, err := os.Create(filename)
1630 if err != nil {
1631 return fmt.Errorf("failed to create %s: %s", filename, err)
1632 }
1633 defer f.Close()
1634 defer func() {
1635 if errRet != nil {
1636 os.Remove(filename)
1637 }
1638 }()
1639 url := benchURL + basename
1640 resp, err := http.Get(url)
1641 if err != nil {
1642 return fmt.Errorf("failed to download %s: %s", url, err)
1643 }
1644 defer resp.Body.Close()
1645 if s := resp.StatusCode; s != http.StatusOK {
1646 return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
1647 }
1648 _, err = io.Copy(f, resp.Body)
1649 if err != nil {
1650 return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
1651 }
1652 return nil
1653 }
1654
1655 func TestEstimateBlockSize(t *testing.T) {
1656 var input []byte
1657 for i := 0; i < 100; i++ {
1658 EstimateBlockSize(input)
1659 input = append(input, 0)
1660 }
1661 }
1662
1663 func benchFile(b *testing.B, i int, decode bool) {
1664 if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
1665 b.Fatalf("failed to download testdata: %s", err)
1666 }
1667 bDir := filepath.FromSlash(*benchdataDir)
1668 data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
1669 if !decode {
1670 b.Run("est-size", func(b *testing.B) {
1671 if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
1672 data = data[:n]
1673 }
1674 b.SetBytes(int64(len(data)))
1675 b.ReportAllocs()
1676 b.ResetTimer()
1677 b.RunParallel(func(pb *testing.PB) {
1678 for pb.Next() {
1679 _ = EstimateBlockSize(data)
1680 }
1681 })
1682 sz := float64(EstimateBlockSize(data))
1683 if sz > 0 {
1684 b.ReportMetric(100*sz/float64(len(data)), "pct")
1685 b.ReportMetric(sz, "B")
1686 }
1687 })
1688 }
1689 b.Run("block", func(b *testing.B) {
1690 if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
1691 data = data[:n]
1692 }
1693 if decode {
1694 b.SetBytes(int64(len(data)))
1695 b.ReportAllocs()
1696 b.ResetTimer()
1697 b.RunParallel(func(pb *testing.PB) {
1698 encoded := Encode(nil, data)
1699 tmp := make([]byte, len(data))
1700 for pb.Next() {
1701 var err error
1702 tmp, err = Decode(tmp, encoded)
1703 if err != nil {
1704 b.Fatal(err)
1705 }
1706 }
1707 })
1708 } else {
1709 b.SetBytes(int64(len(data)))
1710 b.ReportAllocs()
1711 b.ResetTimer()
1712 b.RunParallel(func(pb *testing.PB) {
1713 dst := make([]byte, MaxEncodedLen(len(data)))
1714 tmp := make([]byte, len(data))
1715 for pb.Next() {
1716 res := Encode(dst, data)
1717 if len(res) == 0 {
1718 panic(0)
1719 }
1720 if false {
1721 tmp, _ = Decode(tmp, res)
1722 if !bytes.Equal(tmp, data) {
1723 panic("wrong")
1724 }
1725 }
1726 }
1727 })
1728 }
1729 b.ReportMetric(100*float64(len(Encode(nil, data)))/float64(len(data)), "pct")
1730 b.ReportMetric(float64(len(Encode(nil, data))), "B")
1731 })
1732 b.Run("block-better", func(b *testing.B) {
1733 if decode {
1734 b.SetBytes(int64(len(data)))
1735 b.ReportAllocs()
1736 b.ResetTimer()
1737 b.RunParallel(func(pb *testing.PB) {
1738 encoded := EncodeBetter(nil, data)
1739 tmp := make([]byte, len(data))
1740 for pb.Next() {
1741 var err error
1742 tmp, err = Decode(tmp, encoded)
1743 if err != nil {
1744 b.Fatal(err)
1745 }
1746 }
1747 })
1748 } else {
1749 b.SetBytes(int64(len(data)))
1750 b.ReportAllocs()
1751 b.ResetTimer()
1752 b.RunParallel(func(pb *testing.PB) {
1753 dst := make([]byte, MaxEncodedLen(len(data)))
1754 tmp := make([]byte, len(data))
1755 for pb.Next() {
1756 res := EncodeBetter(dst, data)
1757 if len(res) == 0 {
1758 panic(0)
1759 }
1760 if false {
1761 tmp, _ = Decode(tmp, res)
1762 if !bytes.Equal(tmp, data) {
1763 panic("wrong")
1764 }
1765 }
1766 }
1767 })
1768 }
1769 b.ReportMetric(100*float64(len(EncodeBetter(nil, data)))/float64(len(data)), "pct")
1770 b.ReportMetric(float64(len(EncodeBetter(nil, data))), "B")
1771 })
1772
1773 b.Run("block-best", func(b *testing.B) {
1774 if decode {
1775 b.SetBytes(int64(len(data)))
1776 b.ReportAllocs()
1777 b.ResetTimer()
1778 b.RunParallel(func(pb *testing.PB) {
1779 encoded := EncodeBest(nil, data)
1780 tmp := make([]byte, len(data))
1781 for pb.Next() {
1782 var err error
1783 tmp, err = Decode(tmp, encoded)
1784 if err != nil {
1785 b.Fatal(err)
1786 }
1787 }
1788 })
1789 b.ReportMetric(100*float64(len(EncodeBest(nil, data)))/float64(len(data)), "pct")
1790 } else {
1791 b.SetBytes(int64(len(data)))
1792 b.ReportAllocs()
1793 b.ResetTimer()
1794 b.RunParallel(func(pb *testing.PB) {
1795 dst := make([]byte, MaxEncodedLen(len(data)))
1796 tmp := make([]byte, len(data))
1797 for pb.Next() {
1798 res := EncodeBest(dst, data)
1799 if len(res) == 0 {
1800 panic(0)
1801 }
1802 if false {
1803 tmp, _ = Decode(tmp, res)
1804 if !bytes.Equal(tmp, data) {
1805 panic("wrong")
1806 }
1807 }
1808 }
1809 })
1810 b.ReportMetric(100*float64(len(EncodeBest(nil, data)))/float64(len(data)), "pct")
1811 }
1812 b.ReportMetric(float64(len(EncodeBest(nil, data))), "B")
1813 })
1814 }
1815
1816 func benchFileSnappy(b *testing.B, i int, decode bool) {
1817 if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
1818 b.Fatalf("failed to download testdata: %s", err)
1819 }
1820 bDir := filepath.FromSlash(*benchdataDir)
1821 data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
1822 if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
1823 data = data[:n]
1824 }
1825
1826 b.Run("s2-snappy", func(b *testing.B) {
1827 if decode {
1828 b.SetBytes(int64(len(data)))
1829 b.ResetTimer()
1830 b.ReportAllocs()
1831 b.RunParallel(func(pb *testing.PB) {
1832 encoded := EncodeSnappy(nil, data)
1833 tmp := make([]byte, len(data))
1834 for pb.Next() {
1835 var err error
1836 tmp, err = Decode(tmp, encoded)
1837 if err != nil {
1838 b.Fatal(err)
1839 }
1840 }
1841 })
1842 b.ReportMetric(100*float64(len(EncodeSnappy(nil, data)))/float64(len(data)), "pct")
1843 } else {
1844 b.SetBytes(int64(len(data)))
1845 b.ReportAllocs()
1846 b.ResetTimer()
1847
1848 b.RunParallel(func(pb *testing.PB) {
1849 dst := make([]byte, MaxEncodedLen(len(data)))
1850 for pb.Next() {
1851 res := EncodeSnappy(dst, data)
1852 if len(res) == 0 {
1853 panic(0)
1854 }
1855 }
1856 })
1857 b.ReportMetric(100*float64(len(EncodeSnappy(nil, data)))/float64(len(data)), "pct")
1858 }
1859 b.ReportMetric(float64(len(EncodeSnappy(nil, data))), "B")
1860 })
1861
1862 b.Run("s2-snappy-better", func(b *testing.B) {
1863 if decode {
1864 b.SetBytes(int64(len(data)))
1865 b.ResetTimer()
1866 b.RunParallel(func(pb *testing.PB) {
1867 encoded := EncodeSnappyBetter(nil, data)
1868 tmp := make([]byte, len(data))
1869 b.ReportAllocs()
1870 b.ResetTimer()
1871
1872 for pb.Next() {
1873 var err error
1874 tmp, err = Decode(tmp, encoded)
1875 if err != nil {
1876 b.Fatal(err)
1877 }
1878 }
1879 })
1880 b.ReportMetric(100*float64(len(EncodeSnappyBetter(nil, data)))/float64(len(data)), "pct")
1881 } else {
1882 b.SetBytes(int64(len(data)))
1883 b.ReportAllocs()
1884 b.ResetTimer()
1885 b.RunParallel(func(pb *testing.PB) {
1886 dst := make([]byte, MaxEncodedLen(len(data)))
1887 tmp := make([]byte, len(data))
1888 for pb.Next() {
1889 res := EncodeSnappyBetter(dst, data)
1890 if len(res) == 0 {
1891 panic(0)
1892 }
1893 if false {
1894 tmp, _ = Decode(tmp, res)
1895 if !bytes.Equal(tmp, data) {
1896 panic("wrong")
1897 }
1898 }
1899 }
1900 })
1901 b.ReportMetric(100*float64(len(EncodeSnappyBetter(nil, data)))/float64(len(data)), "pct")
1902 }
1903 b.ReportMetric(float64(len(EncodeSnappyBetter(nil, data))), "B")
1904 })
1905
1906 b.Run("s2-snappy-best", func(b *testing.B) {
1907 if decode {
1908 b.SetBytes(int64(len(data)))
1909 b.ReportAllocs()
1910 b.ResetTimer()
1911 b.RunParallel(func(pb *testing.PB) {
1912 encoded := EncodeSnappyBest(nil, data)
1913 tmp := make([]byte, len(data))
1914 for pb.Next() {
1915 var err error
1916 tmp, err = Decode(tmp, encoded)
1917 if err != nil {
1918 b.Fatal(err)
1919 }
1920 }
1921 })
1922 b.ReportMetric(100*float64(len(EncodeSnappyBest(nil, data)))/float64(len(data)), "pct")
1923 } else {
1924 b.SetBytes(int64(len(data)))
1925 b.ReportAllocs()
1926 b.ResetTimer()
1927 b.RunParallel(func(pb *testing.PB) {
1928 dst := make([]byte, MaxEncodedLen(len(data)))
1929 tmp := make([]byte, len(data))
1930 for pb.Next() {
1931 res := EncodeSnappyBest(dst, data)
1932 if len(res) == 0 {
1933 panic(0)
1934 }
1935 if false {
1936 tmp, _ = snapref.Decode(tmp, res)
1937 if !bytes.Equal(tmp, data) {
1938 panic("wrong")
1939 }
1940 }
1941 }
1942 })
1943 b.ReportMetric(100*float64(len(EncodeSnappyBest(nil, data)))/float64(len(data)), "pct")
1944 }
1945 b.ReportMetric(float64(len(EncodeSnappyBest(nil, data))), "B")
1946 })
1947 b.Run("snappy-noasm", func(b *testing.B) {
1948 if decode {
1949 b.SetBytes(int64(len(data)))
1950 b.ReportAllocs()
1951 b.ResetTimer()
1952 b.RunParallel(func(pb *testing.PB) {
1953 encoded := snapref.Encode(nil, data)
1954 tmp := make([]byte, len(data))
1955 for pb.Next() {
1956 var err error
1957 tmp, err = snapref.Decode(tmp, encoded)
1958 if err != nil {
1959 b.Fatal(err)
1960 }
1961 }
1962 })
1963 b.ReportMetric(100*float64(len(snapref.Encode(nil, data)))/float64(len(data)), "pct")
1964 } else {
1965 b.SetBytes(int64(len(data)))
1966 b.ReportAllocs()
1967 b.ResetTimer()
1968 b.RunParallel(func(pb *testing.PB) {
1969 dst := make([]byte, snapref.MaxEncodedLen(len(data)))
1970 tmp := make([]byte, len(data))
1971 for pb.Next() {
1972 res := snapref.Encode(dst, data)
1973 if len(res) == 0 {
1974 panic(0)
1975 }
1976 if false {
1977 tmp, _ = snapref.Decode(tmp, res)
1978 if !bytes.Equal(tmp, data) {
1979 panic("wrong")
1980 }
1981 }
1982 }
1983 })
1984 b.ReportMetric(100*float64(len(snapref.Encode(nil, data)))/float64(len(data)), "pct")
1985 }
1986 })
1987 }
1988
1989 func TestRoundtrips(t *testing.T) {
1990 testFile(t, 0, 10)
1991 testFile(t, 1, 10)
1992 testFile(t, 2, 10)
1993 testFile(t, 3, 10)
1994 testFile(t, 4, 10)
1995 testFile(t, 5, 10)
1996 testFile(t, 6, 10)
1997 testFile(t, 7, 10)
1998 testFile(t, 8, 10)
1999 testFile(t, 9, 10)
2000 testFile(t, 10, 10)
2001 testFile(t, 11, 10)
2002 testFile(t, 12, 0)
2003 testFile(t, 13, 0)
2004 testFile(t, 14, 0)
2005 testFile(t, 15, 0)
2006 }
2007
2008 func testFile(t *testing.T, i, repeat int) {
2009 if err := downloadBenchmarkFiles(t, testFiles[i].filename); err != nil {
2010 t.Skipf("failed to download testdata: %s", err)
2011 }
2012
2013 if testing.Short() {
2014 repeat = 0
2015 }
2016 t.Run(fmt.Sprint(i, "-", testFiles[i].label), func(t *testing.T) {
2017 bDir := filepath.FromSlash(*benchdataDir)
2018 data := readFile(t, filepath.Join(bDir, testFiles[i].filename))
2019 if testing.Short() && len(data) > 10000 {
2020 t.SkipNow()
2021 }
2022 oSize := len(data)
2023 for i := 0; i < repeat; i++ {
2024 data = append(data, data[:oSize]...)
2025 }
2026 t.Run("s2", func(t *testing.T) {
2027 testWriterRoundtrip(t, data)
2028 })
2029 t.Run("s2-better", func(t *testing.T) {
2030 testWriterRoundtrip(t, data, WriterBetterCompression())
2031 })
2032 t.Run("s2-best", func(t *testing.T) {
2033 testWriterRoundtrip(t, data, WriterBestCompression())
2034 })
2035 t.Run("s2-uncompressed", func(t *testing.T) {
2036 testWriterRoundtrip(t, data, WriterUncompressed())
2037 })
2038 t.Run("block", func(t *testing.T) {
2039 d := data
2040 testBlockRoundtrip(t, d)
2041 })
2042 t.Run("block-better", func(t *testing.T) {
2043 d := data
2044 testBetterBlockRoundtrip(t, d)
2045 })
2046 t.Run("block-best", func(t *testing.T) {
2047 d := data
2048 testBestBlockRoundtrip(t, d)
2049 })
2050 t.Run("s2-snappy", func(t *testing.T) {
2051 d := data
2052 testSnappyBlockRoundtrip(t, d)
2053 })
2054 t.Run("snappy", func(t *testing.T) {
2055 testSnappyDecode(t, data)
2056 })
2057 })
2058 }
2059
2060 func TestDataRoundtrips(t *testing.T) {
2061 test := func(t *testing.T, data []byte) {
2062 t.Run("s2", func(t *testing.T) {
2063 testWriterRoundtrip(t, data)
2064 })
2065 t.Run("s2-better", func(t *testing.T) {
2066 testWriterRoundtrip(t, data, WriterBetterCompression())
2067 })
2068 t.Run("s2-best", func(t *testing.T) {
2069 testWriterRoundtrip(t, data, WriterBestCompression())
2070 })
2071 t.Run("block", func(t *testing.T) {
2072 d := data
2073 testBlockRoundtrip(t, d)
2074 })
2075 t.Run("block-better", func(t *testing.T) {
2076 d := data
2077 testBetterBlockRoundtrip(t, d)
2078 })
2079 t.Run("block-best", func(t *testing.T) {
2080 d := data
2081 testBestBlockRoundtrip(t, d)
2082 })
2083 t.Run("s2-snappy", func(t *testing.T) {
2084 d := data
2085 testSnappyBlockRoundtrip(t, d)
2086 })
2087 t.Run("snappy", func(t *testing.T) {
2088 testSnappyDecode(t, data)
2089 })
2090 }
2091 t.Run("longblock", func(t *testing.T) {
2092 data := make([]byte, 1<<25)
2093 if testing.Short() {
2094 data = data[:1<<20]
2095 }
2096 test(t, data)
2097 })
2098 t.Run("4f9e1a0", func(t *testing.T) {
2099 comp, _ := os.ReadFile("testdata/4f9e1a0da7915a3d69632f5613ed78bc998a8a23.zst")
2100 dec, _ := zstd.NewReader(bytes.NewBuffer(comp))
2101 data, _ := io.ReadAll(dec)
2102 test(t, data)
2103 })
2104 data, err := os.ReadFile("testdata/enc_regressions.zip")
2105 if err != nil {
2106 t.Fatal(err)
2107 }
2108 zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
2109 if err != nil {
2110 t.Fatal(err)
2111 }
2112 for _, tt := range zr.File {
2113 if !strings.HasSuffix(t.Name(), "") {
2114 continue
2115 }
2116 t.Run(tt.Name, func(t *testing.T) {
2117 r, err := tt.Open()
2118 if err != nil {
2119 t.Error(err)
2120 return
2121 }
2122 b, err := io.ReadAll(r)
2123 if err != nil {
2124 t.Error(err)
2125 return
2126 }
2127 test(t, b[:len(b):len(b)])
2128 })
2129 }
2130
2131 }
2132
2133 func BenchmarkDecodeS2BlockParallel(b *testing.B) {
2134 for i := range testFiles {
2135 b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
2136 benchFile(b, i, true)
2137 })
2138 }
2139 }
2140
2141 func BenchmarkEncodeS2BlockParallel(b *testing.B) {
2142 for i := range testFiles {
2143 b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
2144 benchFile(b, i, false)
2145 })
2146 }
2147 }
2148
2149 func BenchmarkDecodeSnappyBlockParallel(b *testing.B) {
2150 for i := range testFiles {
2151 b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
2152 benchFileSnappy(b, i, true)
2153 })
2154 }
2155 }
2156
2157 func BenchmarkEncodeSnappyBlockParallel(b *testing.B) {
2158 for i := range testFiles {
2159 b.Run(fmt.Sprint(i, "-", testFiles[i].label), func(b *testing.B) {
2160 benchFileSnappy(b, i, false)
2161 })
2162 }
2163 }
2164
2165 func TestMatchLen(t *testing.T) {
2166
2167 ref := func(a, b []byte) int {
2168 n := 0
2169 for i := range a {
2170 if a[i] != b[i] {
2171 break
2172 }
2173 n++
2174 }
2175 return n
2176 }
2177
2178
2179 const maxBelow = 0
2180 nums := []int{0, 1, 2, 7, 8, 9, 16, 20, 29, 30, 31, 32, 33, 34, 38, 39, 40}
2181 for yIndex := 40; yIndex > 30; yIndex-- {
2182 xxx := bytes.Repeat([]byte("x"), 40)
2183 if yIndex < len(xxx) {
2184 xxx[yIndex] = 'y'
2185 }
2186 for _, i := range nums {
2187 for _, j := range nums {
2188 if i >= j {
2189 continue
2190 }
2191 got := matchLen(xxx[j:], xxx[i:])
2192 want := ref(xxx[j:], xxx[i:])
2193 if got > want {
2194 t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
2195 continue
2196 }
2197 if got < want-maxBelow {
2198 t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
2199 }
2200 }
2201 }
2202 }
2203 }
2204
View as plain text