1 package zstd
2
3 import (
4 "bytes"
5 "encoding/csv"
6 "fmt"
7 "io"
8 "os"
9 "reflect"
10 "strconv"
11 "testing"
12 "time"
13
14 "github.com/klauspost/compress/zip"
15 )
16
17 func TestSequenceDecsAdjustOffset(t *testing.T) {
18 type result struct {
19 offset int
20 prevOffset [3]int
21 }
22
23 tc := []struct {
24 offset int
25 litLen int
26 offsetB uint8
27 prevOffset [3]int
28
29 res result
30 }{{
31 offset: 444,
32 litLen: 0,
33 offsetB: 42,
34 prevOffset: [3]int{111, 222, 333},
35
36 res: result{
37 offset: 444,
38 prevOffset: [3]int{444, 111, 222},
39 },
40 }, {
41 offset: 0,
42 litLen: 1,
43 offsetB: 0,
44 prevOffset: [3]int{111, 222, 333},
45
46 res: result{
47 offset: 111,
48 prevOffset: [3]int{111, 222, 333},
49 },
50 }, {
51 offset: -1,
52 litLen: 0,
53 offsetB: 0,
54 prevOffset: [3]int{111, 222, 333},
55
56 res: result{
57 offset: 111,
58 prevOffset: [3]int{111, 222, 333},
59 },
60 }, {
61 offset: 1,
62 litLen: 1,
63 offsetB: 0,
64 prevOffset: [3]int{111, 222, 333},
65
66 res: result{
67 offset: 222,
68 prevOffset: [3]int{222, 111, 333},
69 },
70 }, {
71 offset: 2,
72 litLen: 1,
73 offsetB: 0,
74 prevOffset: [3]int{111, 222, 333},
75
76 res: result{
77 offset: 333,
78 prevOffset: [3]int{333, 111, 222},
79 },
80 }, {
81 offset: 3,
82 litLen: 1,
83 offsetB: 0,
84 prevOffset: [3]int{111, 222, 333},
85
86 res: result{
87 offset: 110,
88 prevOffset: [3]int{110, 111, 222},
89 },
90 }, {
91 offset: 3,
92 litLen: 1,
93 offsetB: 0,
94 prevOffset: [3]int{1, 222, 333},
95
96 res: result{
97 offset: 1,
98 prevOffset: [3]int{1, 1, 222},
99 },
100 },
101 }
102
103 for i := range tc {
104
105 var sd sequenceDecs
106 for j := 0; j < 3; j++ {
107 sd.prevOffset[j] = tc[i].prevOffset[j]
108 }
109
110
111 offset := sd.adjustOffset(tc[i].offset, tc[i].litLen, tc[i].offsetB)
112
113
114 if offset != tc[i].res.offset {
115 t.Logf("result: %d", offset)
116 t.Logf("expected: %d", tc[i].res.offset)
117 t.Errorf("testcase #%d: wrong function result", i)
118 }
119
120 for j := 0; j < 3; j++ {
121 if sd.prevOffset[j] != tc[i].res.prevOffset[j] {
122 t.Logf("result: %v", sd.prevOffset)
123 t.Logf("expected: %v", tc[i].res.prevOffset)
124 t.Errorf("testcase #%d: sd.prevOffset got wrongly updated", i)
125 break
126 }
127 }
128 }
129 }
130
131 type testSequence struct {
132 n, lits, win int
133 prevOffsets [3]int
134 }
135
136 func (s *testSequence) parse(fn string) (ok bool) {
137 n, err := fmt.Sscanf(fn, "n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", &s.n, &s.lits, &s.prevOffsets[0], &s.prevOffsets[1], &s.prevOffsets[2], &s.win)
138 ok = err == nil && n == 6
139 if !ok {
140 fmt.Println("Unable to parse:", err, n)
141 }
142 return ok
143 }
144
145 func readDecoders(tb testing.TB, buf *bytes.Buffer, ref testSequence) sequenceDecs {
146 s := sequenceDecs{
147 litLengths: sequenceDec{fse: &fseDecoder{}},
148 offsets: sequenceDec{fse: &fseDecoder{}},
149 matchLengths: sequenceDec{fse: &fseDecoder{}},
150 prevOffset: ref.prevOffsets,
151 dict: nil,
152 literals: make([]byte, ref.lits, ref.lits+compressedBlockOverAlloc),
153 out: nil,
154 nSeqs: ref.n,
155 br: nil,
156 seqSize: 0,
157 windowSize: ref.win,
158 maxBits: 0,
159 }
160
161 s.litLengths.fse.mustReadFrom(buf)
162 s.matchLengths.fse.mustReadFrom(buf)
163 s.offsets.fse.mustReadFrom(buf)
164
165 s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
166 s.br = &bitReader{}
167 return s
168 }
169
170 func Test_seqdec_decode_regression(t *testing.T) {
171 zr := testCreateZipReader("testdata/decode-regression.zip", t)
172
173 for _, tt := range zr.File {
174 t.Run(tt.Name, func(t *testing.T) {
175 f, err := tt.Open()
176 if err != nil {
177 t.Error(err)
178 return
179 }
180 defer f.Close()
181
182
183 dec, err := NewReader(f, WithDecoderConcurrency(4))
184 if err != nil {
185 t.Error(err)
186 return
187 }
188
189 var buf []byte
190 _, err = io.ReadFull(dec, buf)
191 if err != nil {
192 t.Error(err)
193 return
194 }
195 })
196 }
197 }
198
199 func Test_seqdec_decoder(t *testing.T) {
200 const writeWant = false
201 var buf bytes.Buffer
202 zw := zip.NewWriter(&buf)
203
204 want := map[string][]seqVals{}
205 var wantOffsets = map[string][3]int{}
206 if !writeWant {
207 zr := testCreateZipReader("testdata/seqs-want.zip", t)
208 tb := t
209 for _, tt := range zr.File {
210 var ref testSequence
211 if !ref.parse(tt.Name) {
212 tb.Skip("unable to parse:", tt.Name)
213 }
214 o, err := tt.Open()
215 if err != nil {
216 t.Fatal(err)
217 }
218 r := csv.NewReader(o)
219 recs, err := r.ReadAll()
220 if err != nil {
221 t.Fatal(err)
222 }
223 for i, rec := range recs {
224 if i == 0 {
225 var o [3]int
226 o[0], _ = strconv.Atoi(rec[0])
227 o[1], _ = strconv.Atoi(rec[1])
228 o[2], _ = strconv.Atoi(rec[2])
229 wantOffsets[tt.Name] = o
230 continue
231 }
232 s := seqVals{}
233 s.mo, _ = strconv.Atoi(rec[0])
234 s.ml, _ = strconv.Atoi(rec[1])
235 s.ll, _ = strconv.Atoi(rec[2])
236 want[tt.Name] = append(want[tt.Name], s)
237 }
238 o.Close()
239 }
240 }
241 zr := testCreateZipReader("testdata/seqs.zip", t)
242 tb := t
243 for _, tt := range zr.File {
244 var ref testSequence
245 if !ref.parse(tt.Name) {
246 tb.Skip("unable to parse:", tt.Name)
247 }
248 r, err := tt.Open()
249 if err != nil {
250 tb.Error(err)
251 return
252 }
253
254 seqData, err := io.ReadAll(r)
255 if err != nil {
256 tb.Error(err)
257 return
258 }
259 var buf = bytes.NewBuffer(seqData)
260 s := readDecoders(tb, buf, ref)
261 seqs := make([]seqVals, ref.n)
262
263 t.Run(tt.Name, func(t *testing.T) {
264 fatalIf := func(err error) {
265 if err != nil {
266 t.Fatal(err)
267 }
268 }
269 fatalIf(s.br.init(buf.Bytes()))
270 fatalIf(s.litLengths.init(s.br))
271 fatalIf(s.offsets.init(s.br))
272 fatalIf(s.matchLengths.init(s.br))
273
274 err := s.decode(seqs)
275 if err != nil {
276 t.Error(err)
277 }
278 if writeWant {
279 w, err := zw.Create(tt.Name)
280 fatalIf(err)
281 c := csv.NewWriter(w)
282 w.Write([]byte(fmt.Sprintf("%d,%d,%d\n", s.prevOffset[0], s.prevOffset[1], s.prevOffset[2])))
283 for _, seq := range seqs {
284 c.Write([]string{strconv.Itoa(seq.mo), strconv.Itoa(seq.ml), strconv.Itoa(seq.ll)})
285 }
286 c.Flush()
287 } else {
288 if s.prevOffset != wantOffsets[tt.Name] {
289 t.Errorf("want offsets %v, got %v", wantOffsets[tt.Name], s.prevOffset)
290 }
291
292 if !reflect.DeepEqual(want[tt.Name], seqs) {
293 t.Errorf("got %v\nwant %v", seqs, want[tt.Name])
294 }
295 }
296 })
297 }
298 if writeWant {
299 zw.Close()
300 os.WriteFile("testdata/seqs-want.zip", buf.Bytes(), os.ModePerm)
301 }
302 }
303
304 func Test_seqdec_execute(t *testing.T) {
305 zr := testCreateZipReader("testdata/seqs.zip", t)
306 tb := t
307 for _, tt := range zr.File {
308 var ref testSequence
309 if !ref.parse(tt.Name) {
310 tb.Skip("unable to parse:", tt.Name)
311 }
312 r, err := tt.Open()
313 if err != nil {
314 tb.Error(err)
315 return
316 }
317
318 seqData, err := io.ReadAll(r)
319 if err != nil {
320 tb.Error(err)
321 return
322 }
323 var buf = bytes.NewBuffer(seqData)
324 s := readDecoders(tb, buf, ref)
325 seqs := make([]seqVals, ref.n)
326
327 fatalIf := func(err error) {
328 if err != nil {
329 tb.Fatal(err)
330 }
331 }
332 fatalIf(s.br.init(buf.Bytes()))
333 fatalIf(s.litLengths.init(s.br))
334 fatalIf(s.offsets.init(s.br))
335 fatalIf(s.matchLengths.init(s.br))
336
337 fatalIf(s.decode(seqs))
338 hist := make([]byte, ref.win)
339 lits := s.literals
340
341 t.Run(tt.Name, func(t *testing.T) {
342 s.literals = lits
343 if len(s.out) > 0 {
344 s.out = s.out[:0]
345 }
346 err := s.execute(seqs, hist)
347 if err != nil {
348 t.Fatal(err)
349 }
350 if len(s.out) != s.seqSize {
351 t.Errorf("want %d != got %d", s.seqSize, len(s.out))
352 }
353 })
354 }
355 }
356
357 func Test_seqdec_decodeSync(t *testing.T) {
358 zr := testCreateZipReader("testdata/seqs.zip", t)
359 tb := t
360 for _, tt := range zr.File {
361 var ref testSequence
362 if !ref.parse(tt.Name) {
363 tb.Skip("unable to parse:", tt.Name)
364 }
365 r, err := tt.Open()
366 if err != nil {
367 tb.Error(err)
368 return
369 }
370
371 seqData, err := io.ReadAll(r)
372 if err != nil {
373 tb.Error(err)
374 return
375 }
376 var buf = bytes.NewBuffer(seqData)
377 s := readDecoders(tb, buf, ref)
378
379 lits := s.literals
380 hist := make([]byte, ref.win)
381 t.Run(tt.Name, func(t *testing.T) {
382 fatalIf := func(err error) {
383 if err != nil {
384 t.Fatal(err)
385 }
386 }
387 fatalIf(s.br.init(buf.Bytes()))
388 fatalIf(s.litLengths.init(s.br))
389 fatalIf(s.offsets.init(s.br))
390 fatalIf(s.matchLengths.init(s.br))
391 s.literals = lits
392 if len(s.out) > 0 {
393 s.out = s.out[:0]
394 }
395 err := s.decodeSync(hist)
396 if err != nil {
397 t.Fatal(err)
398 }
399 })
400 }
401 }
402
403 func Benchmark_seqdec_decode(b *testing.B) {
404 benchmark_seqdec_decode(b)
405 }
406
407 func benchmark_seqdec_decode(b *testing.B) {
408 zr := testCreateZipReader("testdata/seqs.zip", b)
409 tb := b
410 for _, tt := range zr.File {
411 var ref testSequence
412 if !ref.parse(tt.Name) {
413 tb.Skip("unable to parse:", tt.Name)
414 }
415 r, err := tt.Open()
416 if err != nil {
417 tb.Error(err)
418 return
419 }
420
421 seqData, err := io.ReadAll(r)
422 if err != nil {
423 tb.Error(err)
424 return
425 }
426 var buf = bytes.NewBuffer(seqData)
427 s := readDecoders(tb, buf, ref)
428 seqs := make([]seqVals, ref.n)
429
430 b.Run(tt.Name, func(b *testing.B) {
431 fatalIf := func(err error) {
432 if err != nil {
433 b.Fatal(err)
434 }
435 }
436 b.ReportAllocs()
437 b.ResetTimer()
438 t := time.Now()
439 decoded := 0
440 remain := uint(0)
441 for i := 0; i < b.N; i++ {
442 fatalIf(s.br.init(buf.Bytes()))
443 fatalIf(s.litLengths.init(s.br))
444 fatalIf(s.offsets.init(s.br))
445 fatalIf(s.matchLengths.init(s.br))
446 remain = s.br.remain()
447 err := s.decode(seqs)
448 if err != nil {
449 b.Fatal(err)
450 }
451 decoded += ref.n
452 }
453 b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s")
454 b.ReportMetric(float64(remain)/float64(s.nSeqs), "b/seq")
455 })
456 }
457 }
458
459 func Benchmark_seqdec_execute(b *testing.B) {
460 zr := testCreateZipReader("testdata/seqs.zip", b)
461 tb := b
462 for _, tt := range zr.File {
463 var ref testSequence
464 if !ref.parse(tt.Name) {
465 tb.Skip("unable to parse:", tt.Name)
466 }
467 r, err := tt.Open()
468 if err != nil {
469 tb.Error(err)
470 return
471 }
472
473 seqData, err := io.ReadAll(r)
474 if err != nil {
475 tb.Error(err)
476 return
477 }
478 var buf = bytes.NewBuffer(seqData)
479 s := readDecoders(tb, buf, ref)
480 seqs := make([]seqVals, ref.n)
481
482 fatalIf := func(err error) {
483 if err != nil {
484 b.Fatal(err)
485 }
486 }
487 fatalIf(s.br.init(buf.Bytes()))
488 fatalIf(s.litLengths.init(s.br))
489 fatalIf(s.offsets.init(s.br))
490 fatalIf(s.matchLengths.init(s.br))
491
492 fatalIf(s.decode(seqs))
493 hist := make([]byte, ref.win)
494 lits := s.literals
495
496 b.Run(tt.Name, func(b *testing.B) {
497 b.ReportAllocs()
498 b.SetBytes(int64(s.seqSize))
499 b.ResetTimer()
500 t := time.Now()
501 decoded := 0
502 for i := 0; i < b.N; i++ {
503 s.literals = lits
504 if len(s.out) > 0 {
505 s.out = s.out[:0]
506 }
507 fatalIf(s.execute(seqs, hist))
508 decoded += ref.n
509 }
510 b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s")
511 })
512 }
513 }
514
515 func Benchmark_seqdec_decodeSync(b *testing.B) {
516 zr := testCreateZipReader("testdata/seqs.zip", b)
517 tb := b
518 for _, tt := range zr.File {
519 var ref testSequence
520 if !ref.parse(tt.Name) {
521 tb.Skip("unable to parse:", tt.Name)
522 }
523 r, err := tt.Open()
524 if err != nil {
525 tb.Error(err)
526 return
527 }
528
529 seqData, err := io.ReadAll(r)
530 if err != nil {
531 tb.Error(err)
532 return
533 }
534 var buf = bytes.NewBuffer(seqData)
535 s := readDecoders(tb, buf, ref)
536
537 lits := s.literals
538 hist := make([]byte, ref.win)
539 b.Run(tt.Name, func(b *testing.B) {
540 fatalIf := func(err error) {
541 if err != nil {
542 b.Fatal(err)
543 }
544 }
545 decoded := 0
546 b.ReportAllocs()
547 b.ResetTimer()
548 t := time.Now()
549
550 for i := 0; i < b.N; i++ {
551 fatalIf(s.br.init(buf.Bytes()))
552 fatalIf(s.litLengths.init(s.br))
553 fatalIf(s.offsets.init(s.br))
554 fatalIf(s.matchLengths.init(s.br))
555 s.literals = lits
556 if len(s.out) > 0 {
557 s.out = s.out[:0]
558 }
559 err := s.decodeSync(hist)
560 if err != nil {
561 b.Fatal(err)
562 }
563 b.SetBytes(int64(len(s.out)))
564 decoded += ref.n
565 }
566 b.ReportMetric(float64(decoded)/time.Since(t).Seconds(), "seq/s")
567 })
568 }
569 }
570
571 func testCreateZipReader(path string, tb testing.TB) *zip.Reader {
572 failOnError := func(err error) {
573 if err != nil {
574 tb.Fatal(err)
575 }
576 }
577
578 data, err := os.ReadFile(path)
579 failOnError(err)
580
581 zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
582 failOnError(err)
583
584 return zr
585 }
586
View as plain text