1// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
2
3//go:build amd64 && !appengine && !noasm && gc
4
5// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
6TEXT ·decompress4x_main_loop_amd64(SB), $0-8
7 // Preload values
8 MOVQ ctx+0(FP), AX
9 MOVBQZX 8(AX), DI
10 MOVQ 16(AX), BX
11 MOVQ 48(AX), SI
12 MOVQ 24(AX), R8
13 MOVQ 32(AX), R9
14 MOVQ (AX), R10
15
16 // Main loop
17main_loop:
18 XORL DX, DX
19 CMPQ BX, SI
20 SETGE DL
21
22 // br0.fillFast32()
23 MOVQ 32(R10), R11
24 MOVBQZX 40(R10), R12
25 CMPQ R12, $0x20
26 JBE skip_fill0
27 MOVQ 24(R10), AX
28 SUBQ $0x20, R12
29 SUBQ $0x04, AX
30 MOVQ (R10), R13
31
32 // b.value |= uint64(low) << (b.bitsRead & 63)
33 MOVL (AX)(R13*1), R13
34 MOVQ R12, CX
35 SHLQ CL, R13
36 MOVQ AX, 24(R10)
37 ORQ R13, R11
38
39 // exhausted += (br0.off < 4)
40 CMPQ AX, $0x04
41 ADCB $+0, DL
42
43skip_fill0:
44 // val0 := br0.peekTopBits(peekBits)
45 MOVQ R11, R13
46 MOVQ DI, CX
47 SHRQ CL, R13
48
49 // v0 := table[val0&mask]
50 MOVW (R9)(R13*2), CX
51
52 // br0.advance(uint8(v0.entry)
53 MOVB CH, AL
54 SHLQ CL, R11
55 ADDB CL, R12
56
57 // val1 := br0.peekTopBits(peekBits)
58 MOVQ DI, CX
59 MOVQ R11, R13
60 SHRQ CL, R13
61
62 // v1 := table[val1&mask]
63 MOVW (R9)(R13*2), CX
64
65 // br0.advance(uint8(v1.entry))
66 MOVB CH, AH
67 SHLQ CL, R11
68 ADDB CL, R12
69
70 // these two writes get coalesced
71 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
72 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
73 MOVW AX, (BX)
74
75 // update the bitreader structure
76 MOVQ R11, 32(R10)
77 MOVB R12, 40(R10)
78
79 // br1.fillFast32()
80 MOVQ 80(R10), R11
81 MOVBQZX 88(R10), R12
82 CMPQ R12, $0x20
83 JBE skip_fill1
84 MOVQ 72(R10), AX
85 SUBQ $0x20, R12
86 SUBQ $0x04, AX
87 MOVQ 48(R10), R13
88
89 // b.value |= uint64(low) << (b.bitsRead & 63)
90 MOVL (AX)(R13*1), R13
91 MOVQ R12, CX
92 SHLQ CL, R13
93 MOVQ AX, 72(R10)
94 ORQ R13, R11
95
96 // exhausted += (br1.off < 4)
97 CMPQ AX, $0x04
98 ADCB $+0, DL
99
100skip_fill1:
101 // val0 := br1.peekTopBits(peekBits)
102 MOVQ R11, R13
103 MOVQ DI, CX
104 SHRQ CL, R13
105
106 // v0 := table[val0&mask]
107 MOVW (R9)(R13*2), CX
108
109 // br1.advance(uint8(v0.entry)
110 MOVB CH, AL
111 SHLQ CL, R11
112 ADDB CL, R12
113
114 // val1 := br1.peekTopBits(peekBits)
115 MOVQ DI, CX
116 MOVQ R11, R13
117 SHRQ CL, R13
118
119 // v1 := table[val1&mask]
120 MOVW (R9)(R13*2), CX
121
122 // br1.advance(uint8(v1.entry))
123 MOVB CH, AH
124 SHLQ CL, R11
125 ADDB CL, R12
126
127 // these two writes get coalesced
128 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
129 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
130 MOVW AX, (BX)(R8*1)
131
132 // update the bitreader structure
133 MOVQ R11, 80(R10)
134 MOVB R12, 88(R10)
135
136 // br2.fillFast32()
137 MOVQ 128(R10), R11
138 MOVBQZX 136(R10), R12
139 CMPQ R12, $0x20
140 JBE skip_fill2
141 MOVQ 120(R10), AX
142 SUBQ $0x20, R12
143 SUBQ $0x04, AX
144 MOVQ 96(R10), R13
145
146 // b.value |= uint64(low) << (b.bitsRead & 63)
147 MOVL (AX)(R13*1), R13
148 MOVQ R12, CX
149 SHLQ CL, R13
150 MOVQ AX, 120(R10)
151 ORQ R13, R11
152
153 // exhausted += (br2.off < 4)
154 CMPQ AX, $0x04
155 ADCB $+0, DL
156
157skip_fill2:
158 // val0 := br2.peekTopBits(peekBits)
159 MOVQ R11, R13
160 MOVQ DI, CX
161 SHRQ CL, R13
162
163 // v0 := table[val0&mask]
164 MOVW (R9)(R13*2), CX
165
166 // br2.advance(uint8(v0.entry)
167 MOVB CH, AL
168 SHLQ CL, R11
169 ADDB CL, R12
170
171 // val1 := br2.peekTopBits(peekBits)
172 MOVQ DI, CX
173 MOVQ R11, R13
174 SHRQ CL, R13
175
176 // v1 := table[val1&mask]
177 MOVW (R9)(R13*2), CX
178
179 // br2.advance(uint8(v1.entry))
180 MOVB CH, AH
181 SHLQ CL, R11
182 ADDB CL, R12
183
184 // these two writes get coalesced
185 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
186 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
187 MOVW AX, (BX)(R8*2)
188
189 // update the bitreader structure
190 MOVQ R11, 128(R10)
191 MOVB R12, 136(R10)
192
193 // br3.fillFast32()
194 MOVQ 176(R10), R11
195 MOVBQZX 184(R10), R12
196 CMPQ R12, $0x20
197 JBE skip_fill3
198 MOVQ 168(R10), AX
199 SUBQ $0x20, R12
200 SUBQ $0x04, AX
201 MOVQ 144(R10), R13
202
203 // b.value |= uint64(low) << (b.bitsRead & 63)
204 MOVL (AX)(R13*1), R13
205 MOVQ R12, CX
206 SHLQ CL, R13
207 MOVQ AX, 168(R10)
208 ORQ R13, R11
209
210 // exhausted += (br3.off < 4)
211 CMPQ AX, $0x04
212 ADCB $+0, DL
213
214skip_fill3:
215 // val0 := br3.peekTopBits(peekBits)
216 MOVQ R11, R13
217 MOVQ DI, CX
218 SHRQ CL, R13
219
220 // v0 := table[val0&mask]
221 MOVW (R9)(R13*2), CX
222
223 // br3.advance(uint8(v0.entry)
224 MOVB CH, AL
225 SHLQ CL, R11
226 ADDB CL, R12
227
228 // val1 := br3.peekTopBits(peekBits)
229 MOVQ DI, CX
230 MOVQ R11, R13
231 SHRQ CL, R13
232
233 // v1 := table[val1&mask]
234 MOVW (R9)(R13*2), CX
235
236 // br3.advance(uint8(v1.entry))
237 MOVB CH, AH
238 SHLQ CL, R11
239 ADDB CL, R12
240
241 // these two writes get coalesced
242 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
243 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
244 LEAQ (R8)(R8*2), CX
245 MOVW AX, (BX)(CX*1)
246
247 // update the bitreader structure
248 MOVQ R11, 176(R10)
249 MOVB R12, 184(R10)
250 ADDQ $0x02, BX
251 TESTB DL, DL
252 JZ main_loop
253 MOVQ ctx+0(FP), AX
254 SUBQ 16(AX), BX
255 SHLQ $0x02, BX
256 MOVQ BX, 40(AX)
257 RET
258
259// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
260TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
261 // Preload values
262 MOVQ ctx+0(FP), CX
263 MOVBQZX 8(CX), DI
264 MOVQ 16(CX), BX
265 MOVQ 48(CX), SI
266 MOVQ 24(CX), R8
267 MOVQ 32(CX), R9
268 MOVQ (CX), R10
269
270 // Main loop
271main_loop:
272 XORL DX, DX
273 CMPQ BX, SI
274 SETGE DL
275
276 // br0.fillFast32()
277 MOVQ 32(R10), R11
278 MOVBQZX 40(R10), R12
279 CMPQ R12, $0x20
280 JBE skip_fill0
281 MOVQ 24(R10), R13
282 SUBQ $0x20, R12
283 SUBQ $0x04, R13
284 MOVQ (R10), R14
285
286 // b.value |= uint64(low) << (b.bitsRead & 63)
287 MOVL (R13)(R14*1), R14
288 MOVQ R12, CX
289 SHLQ CL, R14
290 MOVQ R13, 24(R10)
291 ORQ R14, R11
292
293 // exhausted += (br0.off < 4)
294 CMPQ R13, $0x04
295 ADCB $+0, DL
296
297skip_fill0:
298 // val0 := br0.peekTopBits(peekBits)
299 MOVQ R11, R13
300 MOVQ DI, CX
301 SHRQ CL, R13
302
303 // v0 := table[val0&mask]
304 MOVW (R9)(R13*2), CX
305
306 // br0.advance(uint8(v0.entry)
307 MOVB CH, AL
308 SHLQ CL, R11
309 ADDB CL, R12
310
311 // val1 := br0.peekTopBits(peekBits)
312 MOVQ R11, R13
313 MOVQ DI, CX
314 SHRQ CL, R13
315
316 // v1 := table[val0&mask]
317 MOVW (R9)(R13*2), CX
318
319 // br0.advance(uint8(v1.entry)
320 MOVB CH, AH
321 SHLQ CL, R11
322 ADDB CL, R12
323 BSWAPL AX
324
325 // val2 := br0.peekTopBits(peekBits)
326 MOVQ R11, R13
327 MOVQ DI, CX
328 SHRQ CL, R13
329
330 // v2 := table[val0&mask]
331 MOVW (R9)(R13*2), CX
332
333 // br0.advance(uint8(v2.entry)
334 MOVB CH, AH
335 SHLQ CL, R11
336 ADDB CL, R12
337
338 // val3 := br0.peekTopBits(peekBits)
339 MOVQ R11, R13
340 MOVQ DI, CX
341 SHRQ CL, R13
342
343 // v3 := table[val0&mask]
344 MOVW (R9)(R13*2), CX
345
346 // br0.advance(uint8(v3.entry)
347 MOVB CH, AL
348 SHLQ CL, R11
349 ADDB CL, R12
350 BSWAPL AX
351
352 // these four writes get coalesced
353 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
354 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
355 // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
356 // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
357 MOVL AX, (BX)
358
359 // update the bitreader structure
360 MOVQ R11, 32(R10)
361 MOVB R12, 40(R10)
362
363 // br1.fillFast32()
364 MOVQ 80(R10), R11
365 MOVBQZX 88(R10), R12
366 CMPQ R12, $0x20
367 JBE skip_fill1
368 MOVQ 72(R10), R13
369 SUBQ $0x20, R12
370 SUBQ $0x04, R13
371 MOVQ 48(R10), R14
372
373 // b.value |= uint64(low) << (b.bitsRead & 63)
374 MOVL (R13)(R14*1), R14
375 MOVQ R12, CX
376 SHLQ CL, R14
377 MOVQ R13, 72(R10)
378 ORQ R14, R11
379
380 // exhausted += (br1.off < 4)
381 CMPQ R13, $0x04
382 ADCB $+0, DL
383
384skip_fill1:
385 // val0 := br1.peekTopBits(peekBits)
386 MOVQ R11, R13
387 MOVQ DI, CX
388 SHRQ CL, R13
389
390 // v0 := table[val0&mask]
391 MOVW (R9)(R13*2), CX
392
393 // br1.advance(uint8(v0.entry)
394 MOVB CH, AL
395 SHLQ CL, R11
396 ADDB CL, R12
397
398 // val1 := br1.peekTopBits(peekBits)
399 MOVQ R11, R13
400 MOVQ DI, CX
401 SHRQ CL, R13
402
403 // v1 := table[val0&mask]
404 MOVW (R9)(R13*2), CX
405
406 // br1.advance(uint8(v1.entry)
407 MOVB CH, AH
408 SHLQ CL, R11
409 ADDB CL, R12
410 BSWAPL AX
411
412 // val2 := br1.peekTopBits(peekBits)
413 MOVQ R11, R13
414 MOVQ DI, CX
415 SHRQ CL, R13
416
417 // v2 := table[val0&mask]
418 MOVW (R9)(R13*2), CX
419
420 // br1.advance(uint8(v2.entry)
421 MOVB CH, AH
422 SHLQ CL, R11
423 ADDB CL, R12
424
425 // val3 := br1.peekTopBits(peekBits)
426 MOVQ R11, R13
427 MOVQ DI, CX
428 SHRQ CL, R13
429
430 // v3 := table[val0&mask]
431 MOVW (R9)(R13*2), CX
432
433 // br1.advance(uint8(v3.entry)
434 MOVB CH, AL
435 SHLQ CL, R11
436 ADDB CL, R12
437 BSWAPL AX
438
439 // these four writes get coalesced
440 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
441 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
442 // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
443 // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
444 MOVL AX, (BX)(R8*1)
445
446 // update the bitreader structure
447 MOVQ R11, 80(R10)
448 MOVB R12, 88(R10)
449
450 // br2.fillFast32()
451 MOVQ 128(R10), R11
452 MOVBQZX 136(R10), R12
453 CMPQ R12, $0x20
454 JBE skip_fill2
455 MOVQ 120(R10), R13
456 SUBQ $0x20, R12
457 SUBQ $0x04, R13
458 MOVQ 96(R10), R14
459
460 // b.value |= uint64(low) << (b.bitsRead & 63)
461 MOVL (R13)(R14*1), R14
462 MOVQ R12, CX
463 SHLQ CL, R14
464 MOVQ R13, 120(R10)
465 ORQ R14, R11
466
467 // exhausted += (br2.off < 4)
468 CMPQ R13, $0x04
469 ADCB $+0, DL
470
471skip_fill2:
472 // val0 := br2.peekTopBits(peekBits)
473 MOVQ R11, R13
474 MOVQ DI, CX
475 SHRQ CL, R13
476
477 // v0 := table[val0&mask]
478 MOVW (R9)(R13*2), CX
479
480 // br2.advance(uint8(v0.entry)
481 MOVB CH, AL
482 SHLQ CL, R11
483 ADDB CL, R12
484
485 // val1 := br2.peekTopBits(peekBits)
486 MOVQ R11, R13
487 MOVQ DI, CX
488 SHRQ CL, R13
489
490 // v1 := table[val0&mask]
491 MOVW (R9)(R13*2), CX
492
493 // br2.advance(uint8(v1.entry)
494 MOVB CH, AH
495 SHLQ CL, R11
496 ADDB CL, R12
497 BSWAPL AX
498
499 // val2 := br2.peekTopBits(peekBits)
500 MOVQ R11, R13
501 MOVQ DI, CX
502 SHRQ CL, R13
503
504 // v2 := table[val0&mask]
505 MOVW (R9)(R13*2), CX
506
507 // br2.advance(uint8(v2.entry)
508 MOVB CH, AH
509 SHLQ CL, R11
510 ADDB CL, R12
511
512 // val3 := br2.peekTopBits(peekBits)
513 MOVQ R11, R13
514 MOVQ DI, CX
515 SHRQ CL, R13
516
517 // v3 := table[val0&mask]
518 MOVW (R9)(R13*2), CX
519
520 // br2.advance(uint8(v3.entry)
521 MOVB CH, AL
522 SHLQ CL, R11
523 ADDB CL, R12
524 BSWAPL AX
525
526 // these four writes get coalesced
527 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
528 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
529 // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
530 // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
531 MOVL AX, (BX)(R8*2)
532
533 // update the bitreader structure
534 MOVQ R11, 128(R10)
535 MOVB R12, 136(R10)
536
537 // br3.fillFast32()
538 MOVQ 176(R10), R11
539 MOVBQZX 184(R10), R12
540 CMPQ R12, $0x20
541 JBE skip_fill3
542 MOVQ 168(R10), R13
543 SUBQ $0x20, R12
544 SUBQ $0x04, R13
545 MOVQ 144(R10), R14
546
547 // b.value |= uint64(low) << (b.bitsRead & 63)
548 MOVL (R13)(R14*1), R14
549 MOVQ R12, CX
550 SHLQ CL, R14
551 MOVQ R13, 168(R10)
552 ORQ R14, R11
553
554 // exhausted += (br3.off < 4)
555 CMPQ R13, $0x04
556 ADCB $+0, DL
557
558skip_fill3:
559 // val0 := br3.peekTopBits(peekBits)
560 MOVQ R11, R13
561 MOVQ DI, CX
562 SHRQ CL, R13
563
564 // v0 := table[val0&mask]
565 MOVW (R9)(R13*2), CX
566
567 // br3.advance(uint8(v0.entry)
568 MOVB CH, AL
569 SHLQ CL, R11
570 ADDB CL, R12
571
572 // val1 := br3.peekTopBits(peekBits)
573 MOVQ R11, R13
574 MOVQ DI, CX
575 SHRQ CL, R13
576
577 // v1 := table[val0&mask]
578 MOVW (R9)(R13*2), CX
579
580 // br3.advance(uint8(v1.entry)
581 MOVB CH, AH
582 SHLQ CL, R11
583 ADDB CL, R12
584 BSWAPL AX
585
586 // val2 := br3.peekTopBits(peekBits)
587 MOVQ R11, R13
588 MOVQ DI, CX
589 SHRQ CL, R13
590
591 // v2 := table[val0&mask]
592 MOVW (R9)(R13*2), CX
593
594 // br3.advance(uint8(v2.entry)
595 MOVB CH, AH
596 SHLQ CL, R11
597 ADDB CL, R12
598
599 // val3 := br3.peekTopBits(peekBits)
600 MOVQ R11, R13
601 MOVQ DI, CX
602 SHRQ CL, R13
603
604 // v3 := table[val0&mask]
605 MOVW (R9)(R13*2), CX
606
607 // br3.advance(uint8(v3.entry)
608 MOVB CH, AL
609 SHLQ CL, R11
610 ADDB CL, R12
611 BSWAPL AX
612
613 // these four writes get coalesced
614 // out[id * dstEvery + 0] = uint8(v0.entry >> 8)
615 // out[id * dstEvery + 1] = uint8(v1.entry >> 8)
616 // out[id * dstEvery + 3] = uint8(v2.entry >> 8)
617 // out[id * dstEvery + 4] = uint8(v3.entry >> 8)
618 LEAQ (R8)(R8*2), CX
619 MOVL AX, (BX)(CX*1)
620
621 // update the bitreader structure
622 MOVQ R11, 176(R10)
623 MOVB R12, 184(R10)
624 ADDQ $0x04, BX
625 TESTB DL, DL
626 JZ main_loop
627 MOVQ ctx+0(FP), AX
628 SUBQ 16(AX), BX
629 SHLQ $0x02, BX
630 MOVQ BX, 40(AX)
631 RET
632
633// func decompress1x_main_loop_amd64(ctx *decompress1xContext)
634TEXT ·decompress1x_main_loop_amd64(SB), $0-8
635 MOVQ ctx+0(FP), CX
636 MOVQ 16(CX), DX
637 MOVQ 24(CX), BX
638 CMPQ BX, $0x04
639 JB error_max_decoded_size_exceeded
640 LEAQ (DX)(BX*1), BX
641 MOVQ (CX), SI
642 MOVQ (SI), R8
643 MOVQ 24(SI), R9
644 MOVQ 32(SI), R10
645 MOVBQZX 40(SI), R11
646 MOVQ 32(CX), SI
647 MOVBQZX 8(CX), DI
648 JMP loop_condition
649
650main_loop:
651 // Check if we have room for 4 bytes in the output buffer
652 LEAQ 4(DX), CX
653 CMPQ CX, BX
654 JGE error_max_decoded_size_exceeded
655
656 // Decode 4 values
657 CMPQ R11, $0x20
658 JL bitReader_fillFast_1_end
659 SUBQ $0x20, R11
660 SUBQ $0x04, R9
661 MOVL (R8)(R9*1), R12
662 MOVQ R11, CX
663 SHLQ CL, R12
664 ORQ R12, R10
665
666bitReader_fillFast_1_end:
667 MOVQ DI, CX
668 MOVQ R10, R12
669 SHRQ CL, R12
670 MOVW (SI)(R12*2), CX
671 MOVB CH, AL
672 MOVBQZX CL, CX
673 ADDQ CX, R11
674 SHLQ CL, R10
675 MOVQ DI, CX
676 MOVQ R10, R12
677 SHRQ CL, R12
678 MOVW (SI)(R12*2), CX
679 MOVB CH, AH
680 MOVBQZX CL, CX
681 ADDQ CX, R11
682 SHLQ CL, R10
683 BSWAPL AX
684 CMPQ R11, $0x20
685 JL bitReader_fillFast_2_end
686 SUBQ $0x20, R11
687 SUBQ $0x04, R9
688 MOVL (R8)(R9*1), R12
689 MOVQ R11, CX
690 SHLQ CL, R12
691 ORQ R12, R10
692
693bitReader_fillFast_2_end:
694 MOVQ DI, CX
695 MOVQ R10, R12
696 SHRQ CL, R12
697 MOVW (SI)(R12*2), CX
698 MOVB CH, AH
699 MOVBQZX CL, CX
700 ADDQ CX, R11
701 SHLQ CL, R10
702 MOVQ DI, CX
703 MOVQ R10, R12
704 SHRQ CL, R12
705 MOVW (SI)(R12*2), CX
706 MOVB CH, AL
707 MOVBQZX CL, CX
708 ADDQ CX, R11
709 SHLQ CL, R10
710 BSWAPL AX
711
712 // Store the decoded values
713 MOVL AX, (DX)
714 ADDQ $0x04, DX
715
716loop_condition:
717 CMPQ R9, $0x08
718 JGE main_loop
719
720 // Update ctx structure
721 MOVQ ctx+0(FP), AX
722 SUBQ 16(AX), DX
723 MOVQ DX, 40(AX)
724 MOVQ (AX), AX
725 MOVQ R9, 24(AX)
726 MOVQ R10, 32(AX)
727 MOVB R11, 40(AX)
728 RET
729
730 // Report error
731error_max_decoded_size_exceeded:
732 MOVQ ctx+0(FP), AX
733 MOVQ $-1, CX
734 MOVQ CX, 40(AX)
735 RET
736
737// func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
738// Requires: BMI2
739TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
740 MOVQ ctx+0(FP), CX
741 MOVQ 16(CX), DX
742 MOVQ 24(CX), BX
743 CMPQ BX, $0x04
744 JB error_max_decoded_size_exceeded
745 LEAQ (DX)(BX*1), BX
746 MOVQ (CX), SI
747 MOVQ (SI), R8
748 MOVQ 24(SI), R9
749 MOVQ 32(SI), R10
750 MOVBQZX 40(SI), R11
751 MOVQ 32(CX), SI
752 MOVBQZX 8(CX), DI
753 JMP loop_condition
754
755main_loop:
756 // Check if we have room for 4 bytes in the output buffer
757 LEAQ 4(DX), CX
758 CMPQ CX, BX
759 JGE error_max_decoded_size_exceeded
760
761 // Decode 4 values
762 CMPQ R11, $0x20
763 JL bitReader_fillFast_1_end
764 SUBQ $0x20, R11
765 SUBQ $0x04, R9
766 MOVL (R8)(R9*1), CX
767 SHLXQ R11, CX, CX
768 ORQ CX, R10
769
770bitReader_fillFast_1_end:
771 SHRXQ DI, R10, CX
772 MOVW (SI)(CX*2), CX
773 MOVB CH, AL
774 MOVBQZX CL, CX
775 ADDQ CX, R11
776 SHLXQ CX, R10, R10
777 SHRXQ DI, R10, CX
778 MOVW (SI)(CX*2), CX
779 MOVB CH, AH
780 MOVBQZX CL, CX
781 ADDQ CX, R11
782 SHLXQ CX, R10, R10
783 BSWAPL AX
784 CMPQ R11, $0x20
785 JL bitReader_fillFast_2_end
786 SUBQ $0x20, R11
787 SUBQ $0x04, R9
788 MOVL (R8)(R9*1), CX
789 SHLXQ R11, CX, CX
790 ORQ CX, R10
791
792bitReader_fillFast_2_end:
793 SHRXQ DI, R10, CX
794 MOVW (SI)(CX*2), CX
795 MOVB CH, AH
796 MOVBQZX CL, CX
797 ADDQ CX, R11
798 SHLXQ CX, R10, R10
799 SHRXQ DI, R10, CX
800 MOVW (SI)(CX*2), CX
801 MOVB CH, AL
802 MOVBQZX CL, CX
803 ADDQ CX, R11
804 SHLXQ CX, R10, R10
805 BSWAPL AX
806
807 // Store the decoded values
808 MOVL AX, (DX)
809 ADDQ $0x04, DX
810
811loop_condition:
812 CMPQ R9, $0x08
813 JGE main_loop
814
815 // Update ctx structure
816 MOVQ ctx+0(FP), AX
817 SUBQ 16(AX), DX
818 MOVQ DX, 40(AX)
819 MOVQ (AX), AX
820 MOVQ R9, 24(AX)
821 MOVQ R10, 32(AX)
822 MOVB R11, 40(AX)
823 RET
824
825 // Report error
826error_max_decoded_size_exceeded:
827 MOVQ ctx+0(FP), AX
828 MOVQ $-1, CX
829 MOVQ CX, 40(AX)
830 RET
View as plain text