// Code generated by command: go run src.go -out ../amd64.s -stubs ../stubs_amd64.go -pkg common. DO NOT EDIT. //go:build amd64 #include "textflag.h" // func nttAVX2(p *[256]uint32) // Requires: AVX, AVX2 TEXT ·nttAVX2(SB), $2080-8 MOVQ p+0(FP), AX LEAQ ·Zetas+0(SB), CX LEAQ (SP), DX MOVQ $0xffffffffffffffe0, BX ANDQ BX, DX MOVL $0x007fe001, BX VMOVD BX, X0 VPBROADCASTD X0, Y0 MOVL $0x00ffc002, BX VMOVD BX, X1 VPBROADCASTD X1, Y1 MOVL $0xfc7fdfff, BX VMOVD BX, X2 VPBROADCASTD X2, Y2 VPMOVZXDQ (AX), Y7 VPMOVZXDQ 128(AX), Y8 VPMOVZXDQ 256(AX), Y9 VPMOVZXDQ 384(AX), Y10 VPMOVZXDQ 512(AX), Y11 VPMOVZXDQ 640(AX), Y12 VPMOVZXDQ 768(AX), Y13 VPMOVZXDQ 896(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, (DX) VMOVDQA Y8, 256(DX) VMOVDQA Y9, 512(DX) VMOVDQA Y10, 768(DX) VMOVDQA Y11, 1024(DX) VMOVDQA Y12, 1280(DX) VMOVDQA Y13, 1536(DX) VMOVDQA Y14, 1792(DX) VPMOVZXDQ 16(AX), Y7 VPMOVZXDQ 144(AX), Y8 VPMOVZXDQ 272(AX), Y9 VPMOVZXDQ 400(AX), Y10 VPMOVZXDQ 528(AX), Y11 VPMOVZXDQ 656(AX), Y12 VPMOVZXDQ 784(AX), Y13 VPMOVZXDQ 912(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 32(DX) VMOVDQA Y8, 288(DX) VMOVDQA Y9, 544(DX) VMOVDQA Y10, 800(DX) VMOVDQA Y11, 1056(DX) VMOVDQA Y12, 1312(DX) VMOVDQA Y13, 1568(DX) VMOVDQA Y14, 1824(DX) VPMOVZXDQ 32(AX), Y7 VPMOVZXDQ 160(AX), Y8 VPMOVZXDQ 288(AX), Y9 VPMOVZXDQ 416(AX), Y10 VPMOVZXDQ 544(AX), Y11 VPMOVZXDQ 672(AX), Y12 VPMOVZXDQ 800(AX), Y13 VPMOVZXDQ 928(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 64(DX) VMOVDQA Y8, 320(DX) VMOVDQA Y9, 576(DX) VMOVDQA Y10, 832(DX) VMOVDQA Y11, 1088(DX) VMOVDQA Y12, 1344(DX) VMOVDQA Y13, 1600(DX) VMOVDQA Y14, 1856(DX) VPMOVZXDQ 48(AX), Y7 VPMOVZXDQ 176(AX), Y8 VPMOVZXDQ 304(AX), Y9 VPMOVZXDQ 432(AX), Y10 VPMOVZXDQ 560(AX), Y11 VPMOVZXDQ 688(AX), Y12 VPMOVZXDQ 816(AX), Y13 VPMOVZXDQ 944(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 96(DX) VMOVDQA Y8, 352(DX) VMOVDQA Y9, 608(DX) VMOVDQA Y10, 864(DX) VMOVDQA Y11, 1120(DX) VMOVDQA Y12, 1376(DX) VMOVDQA Y13, 1632(DX) VMOVDQA Y14, 1888(DX) VPMOVZXDQ 64(AX), Y7 VPMOVZXDQ 192(AX), Y8 VPMOVZXDQ 320(AX), Y9 VPMOVZXDQ 448(AX), Y10 VPMOVZXDQ 576(AX), Y11 VPMOVZXDQ 704(AX), Y12 VPMOVZXDQ 832(AX), Y13 VPMOVZXDQ 960(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 128(DX) VMOVDQA Y8, 384(DX) VMOVDQA Y9, 640(DX) VMOVDQA Y10, 896(DX) VMOVDQA Y11, 1152(DX) VMOVDQA Y12, 1408(DX) VMOVDQA Y13, 1664(DX) VMOVDQA Y14, 1920(DX) VPMOVZXDQ 80(AX), Y7 VPMOVZXDQ 208(AX), Y8 VPMOVZXDQ 336(AX), Y9 VPMOVZXDQ 464(AX), Y10 VPMOVZXDQ 592(AX), Y11 VPMOVZXDQ 720(AX), Y12 VPMOVZXDQ 848(AX), Y13 VPMOVZXDQ 976(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 160(DX) VMOVDQA Y8, 416(DX) VMOVDQA Y9, 672(DX) VMOVDQA Y10, 928(DX) VMOVDQA Y11, 1184(DX) VMOVDQA Y12, 1440(DX) VMOVDQA Y13, 1696(DX) VMOVDQA Y14, 1952(DX) VPMOVZXDQ 96(AX), Y7 VPMOVZXDQ 224(AX), Y8 VPMOVZXDQ 352(AX), Y9 VPMOVZXDQ 480(AX), Y10 VPMOVZXDQ 608(AX), Y11 VPMOVZXDQ 736(AX), Y12 VPMOVZXDQ 864(AX), Y13 VPMOVZXDQ 992(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 192(DX) VMOVDQA Y8, 448(DX) VMOVDQA Y9, 704(DX) VMOVDQA Y10, 960(DX) VMOVDQA Y11, 1216(DX) VMOVDQA Y12, 1472(DX) VMOVDQA Y13, 1728(DX) VMOVDQA Y14, 1984(DX) VPMOVZXDQ 112(AX), Y7 VPMOVZXDQ 240(AX), Y8 VPMOVZXDQ 368(AX), Y9 VPMOVZXDQ 496(AX), Y10 VPMOVZXDQ 624(AX), Y11 VPMOVZXDQ 752(AX), Y12 VPMOVZXDQ 880(AX), Y13 VPMOVZXDQ 1008(AX), Y14 VPBROADCASTD 4(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 8(CX), Y3 VPBROADCASTD 12(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 16(CX), Y3 VPBROADCASTD 20(CX), Y4 VPBROADCASTD 24(CX), Y5 VPBROADCASTD 28(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VMOVDQA Y7, 224(DX) VMOVDQA Y8, 480(DX) VMOVDQA Y9, 736(DX) VMOVDQA Y10, 992(DX) VMOVDQA Y11, 1248(DX) VMOVDQA Y12, 1504(DX) VMOVDQA Y13, 1760(DX) VMOVDQA Y14, 2016(DX) VMOVDQA (DX), Y7 VMOVDQA 32(DX), Y8 VMOVDQA 64(DX), Y9 VMOVDQA 96(DX), Y10 VMOVDQA 128(DX), Y11 VMOVDQA 160(DX), Y12 VMOVDQA 192(DX), Y13 VMOVDQA 224(DX), Y14 VPBROADCASTD 32(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 64(CX), Y3 VPBROADCASTD 68(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 128(CX), Y3 VPBROADCASTD 132(CX), Y4 VPBROADCASTD 136(CX), Y5 VPBROADCASTD 140(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 256(CX), Y15 VPBROADCASTD 260(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 264(CX), Y15 VPBROADCASTD 268(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 272(CX), Y15 VPBROADCASTD 276(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 280(CX), Y15 VPBROADCASTD 284(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 512(CX), Y3 VPMOVZXDQ 528(CX), Y4 VPMOVZXDQ 544(CX), Y5 VPMOVZXDQ 560(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, (AX) VMOVDQU Y9, 32(AX) VMOVDQU Y11, 64(AX) VMOVDQU Y13, 96(AX) VMOVDQA 256(DX), Y7 VMOVDQA 288(DX), Y8 VMOVDQA 320(DX), Y9 VMOVDQA 352(DX), Y10 VMOVDQA 384(DX), Y11 VMOVDQA 416(DX), Y12 VMOVDQA 448(DX), Y13 VMOVDQA 480(DX), Y14 VPBROADCASTD 36(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 72(CX), Y3 VPBROADCASTD 76(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 144(CX), Y3 VPBROADCASTD 148(CX), Y4 VPBROADCASTD 152(CX), Y5 VPBROADCASTD 156(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 288(CX), Y15 VPBROADCASTD 292(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 296(CX), Y15 VPBROADCASTD 300(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 304(CX), Y15 VPBROADCASTD 308(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 312(CX), Y15 VPBROADCASTD 316(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 576(CX), Y3 VPMOVZXDQ 592(CX), Y4 VPMOVZXDQ 608(CX), Y5 VPMOVZXDQ 624(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 128(AX) VMOVDQU Y9, 160(AX) VMOVDQU Y11, 192(AX) VMOVDQU Y13, 224(AX) VMOVDQA 512(DX), Y7 VMOVDQA 544(DX), Y8 VMOVDQA 576(DX), Y9 VMOVDQA 608(DX), Y10 VMOVDQA 640(DX), Y11 VMOVDQA 672(DX), Y12 VMOVDQA 704(DX), Y13 VMOVDQA 736(DX), Y14 VPBROADCASTD 40(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 80(CX), Y3 VPBROADCASTD 84(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 160(CX), Y3 VPBROADCASTD 164(CX), Y4 VPBROADCASTD 168(CX), Y5 VPBROADCASTD 172(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 320(CX), Y15 VPBROADCASTD 324(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 328(CX), Y15 VPBROADCASTD 332(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 336(CX), Y15 VPBROADCASTD 340(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 344(CX), Y15 VPBROADCASTD 348(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 640(CX), Y3 VPMOVZXDQ 656(CX), Y4 VPMOVZXDQ 672(CX), Y5 VPMOVZXDQ 688(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 256(AX) VMOVDQU Y9, 288(AX) VMOVDQU Y11, 320(AX) VMOVDQU Y13, 352(AX) VMOVDQA 768(DX), Y7 VMOVDQA 800(DX), Y8 VMOVDQA 832(DX), Y9 VMOVDQA 864(DX), Y10 VMOVDQA 896(DX), Y11 VMOVDQA 928(DX), Y12 VMOVDQA 960(DX), Y13 VMOVDQA 992(DX), Y14 VPBROADCASTD 44(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 88(CX), Y3 VPBROADCASTD 92(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 176(CX), Y3 VPBROADCASTD 180(CX), Y4 VPBROADCASTD 184(CX), Y5 VPBROADCASTD 188(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 352(CX), Y15 VPBROADCASTD 356(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 360(CX), Y15 VPBROADCASTD 364(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 368(CX), Y15 VPBROADCASTD 372(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 376(CX), Y15 VPBROADCASTD 380(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 704(CX), Y3 VPMOVZXDQ 720(CX), Y4 VPMOVZXDQ 736(CX), Y5 VPMOVZXDQ 752(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 384(AX) VMOVDQU Y9, 416(AX) VMOVDQU Y11, 448(AX) VMOVDQU Y13, 480(AX) VMOVDQA 1024(DX), Y7 VMOVDQA 1056(DX), Y8 VMOVDQA 1088(DX), Y9 VMOVDQA 1120(DX), Y10 VMOVDQA 1152(DX), Y11 VMOVDQA 1184(DX), Y12 VMOVDQA 1216(DX), Y13 VMOVDQA 1248(DX), Y14 VPBROADCASTD 48(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 96(CX), Y3 VPBROADCASTD 100(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 192(CX), Y3 VPBROADCASTD 196(CX), Y4 VPBROADCASTD 200(CX), Y5 VPBROADCASTD 204(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 384(CX), Y15 VPBROADCASTD 388(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 392(CX), Y15 VPBROADCASTD 396(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 400(CX), Y15 VPBROADCASTD 404(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 408(CX), Y15 VPBROADCASTD 412(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 768(CX), Y3 VPMOVZXDQ 784(CX), Y4 VPMOVZXDQ 800(CX), Y5 VPMOVZXDQ 816(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 512(AX) VMOVDQU Y9, 544(AX) VMOVDQU Y11, 576(AX) VMOVDQU Y13, 608(AX) VMOVDQA 1280(DX), Y7 VMOVDQA 1312(DX), Y8 VMOVDQA 1344(DX), Y9 VMOVDQA 1376(DX), Y10 VMOVDQA 1408(DX), Y11 VMOVDQA 1440(DX), Y12 VMOVDQA 1472(DX), Y13 VMOVDQA 1504(DX), Y14 VPBROADCASTD 52(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 104(CX), Y3 VPBROADCASTD 108(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 208(CX), Y3 VPBROADCASTD 212(CX), Y4 VPBROADCASTD 216(CX), Y5 VPBROADCASTD 220(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 416(CX), Y15 VPBROADCASTD 420(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 424(CX), Y15 VPBROADCASTD 428(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 432(CX), Y15 VPBROADCASTD 436(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 440(CX), Y15 VPBROADCASTD 444(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 832(CX), Y3 VPMOVZXDQ 848(CX), Y4 VPMOVZXDQ 864(CX), Y5 VPMOVZXDQ 880(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 640(AX) VMOVDQU Y9, 672(AX) VMOVDQU Y11, 704(AX) VMOVDQU Y13, 736(AX) VMOVDQA 1536(DX), Y7 VMOVDQA 1568(DX), Y8 VMOVDQA 1600(DX), Y9 VMOVDQA 1632(DX), Y10 VMOVDQA 1664(DX), Y11 VMOVDQA 1696(DX), Y12 VMOVDQA 1728(DX), Y13 VMOVDQA 1760(DX), Y14 VPBROADCASTD 56(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 112(CX), Y3 VPBROADCASTD 116(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 224(CX), Y3 VPBROADCASTD 228(CX), Y4 VPBROADCASTD 232(CX), Y5 VPBROADCASTD 236(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 448(CX), Y15 VPBROADCASTD 452(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 456(CX), Y15 VPBROADCASTD 460(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 464(CX), Y15 VPBROADCASTD 468(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 472(CX), Y15 VPBROADCASTD 476(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 896(CX), Y3 VPMOVZXDQ 912(CX), Y4 VPMOVZXDQ 928(CX), Y5 VPMOVZXDQ 944(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 768(AX) VMOVDQU Y9, 800(AX) VMOVDQU Y11, 832(AX) VMOVDQU Y13, 864(AX) VMOVDQA 1792(DX), Y7 VMOVDQA 1824(DX), Y8 VMOVDQA 1856(DX), Y9 VMOVDQA 1888(DX), Y10 VMOVDQA 1920(DX), Y11 VMOVDQA 1952(DX), Y12 VMOVDQA 1984(DX), Y13 VMOVDQA 2016(DX), Y14 VPBROADCASTD 60(CX), Y3 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y11 VPADDD Y8, Y1, Y12 VPADDD Y9, Y1, Y13 VPADDD Y10, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y9, Y9 VPADDD Y6, Y10, Y10 VPSUBD Y3, Y11, Y11 VPSUBD Y4, Y12, Y12 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 120(CX), Y3 VPBROADCASTD 124(CX), Y4 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y13, Y4, Y13 VPMULUDQ Y14, Y4, Y14 VPMULUDQ Y2, Y9, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y9, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y9 VPADDD Y8, Y1, Y10 VPADDD Y11, Y1, Y13 VPADDD Y12, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y8, Y8 VPADDD Y5, Y11, Y11 VPADDD Y6, Y12, Y12 VPSUBD Y3, Y9, Y9 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y13, Y13 VPSUBD Y6, Y14, Y14 VPBROADCASTD 240(CX), Y3 VPBROADCASTD 244(CX), Y4 VPBROADCASTD 248(CX), Y5 VPBROADCASTD 252(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPBROADCASTD 480(CX), Y15 VPBROADCASTD 484(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 488(CX), Y15 VPBROADCASTD 492(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 496(CX), Y15 VPBROADCASTD 500(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 504(CX), Y15 VPBROADCASTD 508(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPERM2I128 $0x20, Y8, Y7, Y15 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y15, Y7 VPERM2I128 $0x20, Y10, Y9, Y15 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y15, Y9 VPERM2I128 $0x20, Y12, Y11, Y15 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y15, Y11 VPERM2I128 $0x20, Y14, Y13, Y15 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y15, Y13 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y6, Y6 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y6, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y6, Y14, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPMOVZXDQ 960(CX), Y3 VPMOVZXDQ 976(CX), Y4 VPMOVZXDQ 992(CX), Y5 VPMOVZXDQ 1008(CX), Y6 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y10, Y4, Y10 VPMULUDQ Y12, Y5, Y12 VPMULUDQ Y14, Y6, Y14 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y10, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y14, Y2 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y2, Y2 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y10, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y2, Y14, Y2 VPSRLQ $0x20, Y3, Y3 VPSRLQ $0x20, Y4, Y4 VPSRLQ $0x20, Y5, Y5 VPSRLQ $0x20, Y2, Y2 VPADDD Y7, Y1, Y8 VPADDD Y9, Y1, Y10 VPADDD Y11, Y1, Y12 VPADDD Y13, Y1, Y14 VPADDD Y3, Y7, Y7 VPADDD Y4, Y9, Y9 VPADDD Y5, Y11, Y11 VPADDD Y2, Y13, Y13 VPSUBD Y3, Y8, Y8 VPSUBD Y4, Y10, Y10 VPSUBD Y5, Y12, Y12 VPSUBD Y2, Y14, Y14 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 896(AX) VMOVDQU Y9, 928(AX) VMOVDQU Y11, 960(AX) VMOVDQU Y13, 992(AX) RET // func invNttAVX2(p *[256]uint32) // Requires: AVX, AVX2 TEXT ·invNttAVX2(SB), $2080-8 MOVQ p+0(FP), AX LEAQ ·InvZetas+0(SB), CX LEAQ (SP), DX MOVQ $0xffffffffffffffe0, BX ANDQ BX, DX MOVL $0x007fe001, BX VMOVD BX, X0 VPBROADCASTD X0, Y0 MOVL $0x7fe00100, BX VMOVD BX, X1 VPBROADCASTD X1, Y1 MOVL $0xfc7fdfff, BX VMOVD BX, X2 VPBROADCASTD X2, Y2 VMOVDQU (AX), Y7 VMOVDQU 32(AX), Y9 VMOVDQU 64(AX), Y11 VMOVDQU 96(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ (CX), Y3 VPMOVZXDQ 16(CX), Y4 VPMOVZXDQ 32(CX), Y5 VPMOVZXDQ 48(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 512(CX), Y15 VPBROADCASTD 516(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 520(CX), Y15 VPBROADCASTD 524(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 528(CX), Y15 VPBROADCASTD 532(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 536(CX), Y15 VPBROADCASTD 540(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 768(CX), Y3 VPBROADCASTD 772(CX), Y4 VPBROADCASTD 776(CX), Y5 VPBROADCASTD 780(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 896(CX), Y3 VPBROADCASTD 900(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 960(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, (DX) VMOVDQA Y8, 32(DX) VMOVDQA Y9, 64(DX) VMOVDQA Y10, 96(DX) VMOVDQA Y11, 128(DX) VMOVDQA Y12, 160(DX) VMOVDQA Y13, 192(DX) VMOVDQA Y14, 224(DX) VMOVDQU 128(AX), Y7 VMOVDQU 160(AX), Y9 VMOVDQU 192(AX), Y11 VMOVDQU 224(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 64(CX), Y3 VPMOVZXDQ 80(CX), Y4 VPMOVZXDQ 96(CX), Y5 VPMOVZXDQ 112(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 544(CX), Y15 VPBROADCASTD 548(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 552(CX), Y15 VPBROADCASTD 556(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 560(CX), Y15 VPBROADCASTD 564(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 568(CX), Y15 VPBROADCASTD 572(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 784(CX), Y3 VPBROADCASTD 788(CX), Y4 VPBROADCASTD 792(CX), Y5 VPBROADCASTD 796(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 904(CX), Y3 VPBROADCASTD 908(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 964(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 256(DX) VMOVDQA Y8, 288(DX) VMOVDQA Y9, 320(DX) VMOVDQA Y10, 352(DX) VMOVDQA Y11, 384(DX) VMOVDQA Y12, 416(DX) VMOVDQA Y13, 448(DX) VMOVDQA Y14, 480(DX) VMOVDQU 256(AX), Y7 VMOVDQU 288(AX), Y9 VMOVDQU 320(AX), Y11 VMOVDQU 352(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 128(CX), Y3 VPMOVZXDQ 144(CX), Y4 VPMOVZXDQ 160(CX), Y5 VPMOVZXDQ 176(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 576(CX), Y15 VPBROADCASTD 580(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 584(CX), Y15 VPBROADCASTD 588(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 592(CX), Y15 VPBROADCASTD 596(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 600(CX), Y15 VPBROADCASTD 604(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 800(CX), Y3 VPBROADCASTD 804(CX), Y4 VPBROADCASTD 808(CX), Y5 VPBROADCASTD 812(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 912(CX), Y3 VPBROADCASTD 916(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 968(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 512(DX) VMOVDQA Y8, 544(DX) VMOVDQA Y9, 576(DX) VMOVDQA Y10, 608(DX) VMOVDQA Y11, 640(DX) VMOVDQA Y12, 672(DX) VMOVDQA Y13, 704(DX) VMOVDQA Y14, 736(DX) VMOVDQU 384(AX), Y7 VMOVDQU 416(AX), Y9 VMOVDQU 448(AX), Y11 VMOVDQU 480(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 192(CX), Y3 VPMOVZXDQ 208(CX), Y4 VPMOVZXDQ 224(CX), Y5 VPMOVZXDQ 240(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 608(CX), Y15 VPBROADCASTD 612(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 616(CX), Y15 VPBROADCASTD 620(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 624(CX), Y15 VPBROADCASTD 628(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 632(CX), Y15 VPBROADCASTD 636(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 816(CX), Y3 VPBROADCASTD 820(CX), Y4 VPBROADCASTD 824(CX), Y5 VPBROADCASTD 828(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 920(CX), Y3 VPBROADCASTD 924(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 972(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 768(DX) VMOVDQA Y8, 800(DX) VMOVDQA Y9, 832(DX) VMOVDQA Y10, 864(DX) VMOVDQA Y11, 896(DX) VMOVDQA Y12, 928(DX) VMOVDQA Y13, 960(DX) VMOVDQA Y14, 992(DX) VMOVDQU 512(AX), Y7 VMOVDQU 544(AX), Y9 VMOVDQU 576(AX), Y11 VMOVDQU 608(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 256(CX), Y3 VPMOVZXDQ 272(CX), Y4 VPMOVZXDQ 288(CX), Y5 VPMOVZXDQ 304(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 640(CX), Y15 VPBROADCASTD 644(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 648(CX), Y15 VPBROADCASTD 652(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 656(CX), Y15 VPBROADCASTD 660(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 664(CX), Y15 VPBROADCASTD 668(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 832(CX), Y3 VPBROADCASTD 836(CX), Y4 VPBROADCASTD 840(CX), Y5 VPBROADCASTD 844(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 928(CX), Y3 VPBROADCASTD 932(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 976(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 1024(DX) VMOVDQA Y8, 1056(DX) VMOVDQA Y9, 1088(DX) VMOVDQA Y10, 1120(DX) VMOVDQA Y11, 1152(DX) VMOVDQA Y12, 1184(DX) VMOVDQA Y13, 1216(DX) VMOVDQA Y14, 1248(DX) VMOVDQU 640(AX), Y7 VMOVDQU 672(AX), Y9 VMOVDQU 704(AX), Y11 VMOVDQU 736(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 320(CX), Y3 VPMOVZXDQ 336(CX), Y4 VPMOVZXDQ 352(CX), Y5 VPMOVZXDQ 368(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 672(CX), Y15 VPBROADCASTD 676(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 680(CX), Y15 VPBROADCASTD 684(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 688(CX), Y15 VPBROADCASTD 692(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 696(CX), Y15 VPBROADCASTD 700(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 848(CX), Y3 VPBROADCASTD 852(CX), Y4 VPBROADCASTD 856(CX), Y5 VPBROADCASTD 860(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 936(CX), Y3 VPBROADCASTD 940(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 980(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 1280(DX) VMOVDQA Y8, 1312(DX) VMOVDQA Y9, 1344(DX) VMOVDQA Y10, 1376(DX) VMOVDQA Y11, 1408(DX) VMOVDQA Y12, 1440(DX) VMOVDQA Y13, 1472(DX) VMOVDQA Y14, 1504(DX) VMOVDQU 768(AX), Y7 VMOVDQU 800(AX), Y9 VMOVDQU 832(AX), Y11 VMOVDQU 864(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 384(CX), Y3 VPMOVZXDQ 400(CX), Y4 VPMOVZXDQ 416(CX), Y5 VPMOVZXDQ 432(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 704(CX), Y15 VPBROADCASTD 708(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 712(CX), Y15 VPBROADCASTD 716(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 720(CX), Y15 VPBROADCASTD 724(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 728(CX), Y15 VPBROADCASTD 732(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 864(CX), Y3 VPBROADCASTD 868(CX), Y4 VPBROADCASTD 872(CX), Y5 VPBROADCASTD 876(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 944(CX), Y3 VPBROADCASTD 948(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 984(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 1536(DX) VMOVDQA Y8, 1568(DX) VMOVDQA Y9, 1600(DX) VMOVDQA Y10, 1632(DX) VMOVDQA Y11, 1664(DX) VMOVDQA Y12, 1696(DX) VMOVDQA Y13, 1728(DX) VMOVDQA Y14, 1760(DX) VMOVDQU 896(AX), Y7 VMOVDQU 928(AX), Y9 VMOVDQU 960(AX), Y11 VMOVDQU 992(AX), Y13 VPSRLQ $0x20, Y7, Y8 VPSRLQ $0x20, Y9, Y10 VPSRLQ $0x20, Y11, Y12 VPSRLQ $0x20, Y13, Y14 VPMOVZXDQ 448(CX), Y3 VPMOVZXDQ 464(CX), Y4 VPMOVZXDQ 480(CX), Y5 VPMOVZXDQ 496(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPUNPCKLQDQ Y8, Y7, Y3 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y3, Y7 VPUNPCKLQDQ Y10, Y9, Y3 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y3, Y9 VPUNPCKLQDQ Y12, Y11, Y3 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y3, Y11 VPUNPCKLQDQ Y14, Y13, Y3 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 736(CX), Y15 VPBROADCASTD 740(CX), Y3 VPBLENDD $0xf0, Y3, Y15, Y3 VPBROADCASTD 744(CX), Y15 VPBROADCASTD 748(CX), Y4 VPBLENDD $0xf0, Y4, Y15, Y4 VPBROADCASTD 752(CX), Y15 VPBROADCASTD 756(CX), Y5 VPBLENDD $0xf0, Y5, Y15, Y5 VPBROADCASTD 760(CX), Y15 VPBROADCASTD 764(CX), Y6 VPBLENDD $0xf0, Y6, Y15, Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPERM2I128 $0x20, Y8, Y7, Y3 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y3, Y7 VPERM2I128 $0x20, Y10, Y9, Y3 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y3, Y9 VPERM2I128 $0x20, Y12, Y11, Y3 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y3, Y11 VPERM2I128 $0x20, Y14, Y13, Y3 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y3, Y13 VPBROADCASTD 880(CX), Y3 VPBROADCASTD 884(CX), Y4 VPBROADCASTD 888(CX), Y5 VPBROADCASTD 892(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 952(CX), Y3 VPBROADCASTD 956(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 988(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 VMOVDQA Y7, 1792(DX) VMOVDQA Y8, 1824(DX) VMOVDQA Y9, 1856(DX) VMOVDQA Y10, 1888(DX) VMOVDQA Y11, 1920(DX) VMOVDQA Y12, 1952(DX) VMOVDQA Y13, 1984(DX) VMOVDQA Y14, 2016(DX) VMOVDQA (DX), Y7 VMOVDQA 256(DX), Y8 VMOVDQA 512(DX), Y9 VMOVDQA 768(DX), Y10 VMOVDQA 1024(DX), Y11 VMOVDQA 1280(DX), Y12 VMOVDQA 1536(DX), Y13 VMOVDQA 1792(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, (DX) VMOVDQA Y8, 256(DX) VMOVDQA Y9, 512(DX) VMOVDQA Y10, 768(DX) VMOVDQA Y11, 1024(DX) VMOVDQA Y12, 1280(DX) VMOVDQA Y13, 1536(DX) VMOVDQA Y14, 1792(DX) VMOVDQA 32(DX), Y7 VMOVDQA 288(DX), Y8 VMOVDQA 544(DX), Y9 VMOVDQA 800(DX), Y10 VMOVDQA 1056(DX), Y11 VMOVDQA 1312(DX), Y12 VMOVDQA 1568(DX), Y13 VMOVDQA 1824(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, 32(DX) VMOVDQA Y8, 288(DX) VMOVDQA Y9, 544(DX) VMOVDQA Y10, 800(DX) VMOVDQA Y11, 1056(DX) VMOVDQA Y12, 1312(DX) VMOVDQA Y13, 1568(DX) VMOVDQA Y14, 1824(DX) VMOVDQA 64(DX), Y7 VMOVDQA 320(DX), Y8 VMOVDQA 576(DX), Y9 VMOVDQA 832(DX), Y10 VMOVDQA 1088(DX), Y11 VMOVDQA 1344(DX), Y12 VMOVDQA 1600(DX), Y13 VMOVDQA 1856(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, 64(DX) VMOVDQA Y8, 320(DX) VMOVDQA Y9, 576(DX) VMOVDQA Y10, 832(DX) VMOVDQA Y11, 1088(DX) VMOVDQA Y12, 1344(DX) VMOVDQA Y13, 1600(DX) VMOVDQA Y14, 1856(DX) VMOVDQA 96(DX), Y7 VMOVDQA 352(DX), Y8 VMOVDQA 608(DX), Y9 VMOVDQA 864(DX), Y10 VMOVDQA 1120(DX), Y11 VMOVDQA 1376(DX), Y12 VMOVDQA 1632(DX), Y13 VMOVDQA 1888(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, 96(DX) VMOVDQA Y8, 352(DX) VMOVDQA Y9, 608(DX) VMOVDQA Y10, 864(DX) VMOVDQA Y11, 1120(DX) VMOVDQA Y12, 1376(DX) VMOVDQA Y13, 1632(DX) VMOVDQA Y14, 1888(DX) VMOVDQA 128(DX), Y7 VMOVDQA 384(DX), Y8 VMOVDQA 640(DX), Y9 VMOVDQA 896(DX), Y10 VMOVDQA 1152(DX), Y11 VMOVDQA 1408(DX), Y12 VMOVDQA 1664(DX), Y13 VMOVDQA 1920(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, 128(DX) VMOVDQA Y8, 384(DX) VMOVDQA Y9, 640(DX) VMOVDQA Y10, 896(DX) VMOVDQA Y11, 1152(DX) VMOVDQA Y12, 1408(DX) VMOVDQA Y13, 1664(DX) VMOVDQA Y14, 1920(DX) VMOVDQA 160(DX), Y7 VMOVDQA 416(DX), Y8 VMOVDQA 672(DX), Y9 VMOVDQA 928(DX), Y10 VMOVDQA 1184(DX), Y11 VMOVDQA 1440(DX), Y12 VMOVDQA 1696(DX), Y13 VMOVDQA 1952(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, 160(DX) VMOVDQA Y8, 416(DX) VMOVDQA Y9, 672(DX) VMOVDQA Y10, 928(DX) VMOVDQA Y11, 1184(DX) VMOVDQA Y12, 1440(DX) VMOVDQA Y13, 1696(DX) VMOVDQA Y14, 1952(DX) VMOVDQA 192(DX), Y7 VMOVDQA 448(DX), Y8 VMOVDQA 704(DX), Y9 VMOVDQA 960(DX), Y10 VMOVDQA 1216(DX), Y11 VMOVDQA 1472(DX), Y12 VMOVDQA 1728(DX), Y13 VMOVDQA 1984(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y10, Y14, Y10 VPMULUDQ Y15, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y15, Y15 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y15, Y14 MOVL $0x0000a3fa, BX VMOVD BX, X3 VPBROADCASTD X3, Y3 VPMULUDQ Y7, Y3, Y7 VPMULUDQ Y8, Y3, Y8 VPMULUDQ Y9, Y3, Y9 VPMULUDQ Y10, Y3, Y10 VPMULUDQ Y11, Y3, Y11 VPMULUDQ Y12, Y3, Y12 VPMULUDQ Y13, Y3, Y13 VPMULUDQ Y14, Y3, Y14 VPMULUDQ Y2, Y7, Y3 VPMULUDQ Y2, Y8, Y4 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y7, Y3 VPADDQ Y4, Y8, Y4 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPSRLQ $0x20, Y3, Y7 VPSRLQ $0x20, Y4, Y8 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPMULUDQ Y2, Y11, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y13, Y5 VPMULUDQ Y2, Y14, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPADDQ Y3, Y11, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y13, Y5 VPADDQ Y6, Y14, Y6 VPSRLQ $0x20, Y3, Y11 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y13 VPSRLQ $0x20, Y6, Y14 VMOVDQA Y7, 192(DX) VMOVDQA Y8, 448(DX) VMOVDQA Y9, 704(DX) VMOVDQA Y10, 960(DX) VMOVDQA Y11, 1216(DX) VMOVDQA Y12, 1472(DX) VMOVDQA Y13, 1728(DX) VMOVDQA Y14, 1984(DX) VMOVDQA 224(DX), Y7 VMOVDQA 480(DX), Y8 VMOVDQA 736(DX), Y9 VMOVDQA 992(DX), Y10 VMOVDQA 1248(DX), Y11 VMOVDQA 1504(DX), Y12 VMOVDQA 1760(DX), Y13 VMOVDQA 2016(DX), Y14 VPBROADCASTD 992(CX), Y3 VPBROADCASTD 996(CX), Y4 VPBROADCASTD 1000(CX), Y5 VPBROADCASTD 1004(CX), Y6 VPADDD Y7, Y1, Y15 VPSUBD Y8, Y15, Y15 VPADDD Y7, Y8, Y7 VPMULUDQ Y15, Y3, Y8 VPADDD Y9, Y1, Y3 VPSUBD Y10, Y3, Y3 VPADDD Y9, Y10, Y9 VPMULUDQ Y3, Y4, Y10 VPADDD Y11, Y1, Y4 VPSUBD Y12, Y4, Y4 VPADDD Y11, Y12, Y11 VPMULUDQ Y4, Y5, Y12 VPADDD Y13, Y1, Y5 VPSUBD Y14, Y5, Y5 VPADDD Y13, Y14, Y13 VPMULUDQ Y5, Y6, Y14 VPMULUDQ Y2, Y8, Y15 VPMULUDQ Y2, Y10, Y3 VPMULUDQ Y2, Y12, Y4 VPMULUDQ Y2, Y14, Y5 VPMULUDQ Y0, Y15, Y15 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y15, Y8, Y15 VPADDQ Y3, Y10, Y3 VPADDQ Y4, Y12, Y4 VPADDQ Y5, Y14, Y5 VPSRLQ $0x20, Y15, Y8 VPSRLQ $0x20, Y3, Y10 VPSRLQ $0x20, Y4, Y12 VPSRLQ $0x20, Y5, Y14 VPBROADCASTD 1008(CX), Y3 VPBROADCASTD 1012(CX), Y4 VPADDD Y7, Y1, Y5 VPSUBD Y9, Y5, Y5 VPADDD Y7, Y9, Y7 VPMULUDQ Y5, Y3, Y9 VPADDD Y8, Y1, Y6 VPSUBD Y10, Y6, Y6 VPADDD Y8, Y10, Y8 VPMULUDQ Y6, Y3, Y10 VPADDD Y11, Y1, Y3 VPSUBD Y13, Y3, Y3 VPADDD Y11, Y13, Y11 VPMULUDQ Y3, Y4, Y13 VPADDD Y12, Y1, Y15 VPSUBD Y14, Y15, Y15 VPADDD Y12, Y14, Y12 VPMULUDQ Y15, Y4, Y14 VPMULUDQ Y2, Y9, Y5 VPMULUDQ Y2, Y10, Y6 VPMULUDQ Y2, Y13, Y3 VPMULUDQ Y2, Y14, Y15 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y15, Y15 VPADDQ Y5, Y9, Y5 VPADDQ Y6, Y10, Y6 VPADDQ Y3, Y13, Y3 VPADDQ Y15, Y14, Y15 VPSRLQ $0x20, Y5, Y9 VPSRLQ $0x20, Y6, Y10 VPSRLQ $0x20, Y3, Y13 VPSRLQ $0x20, Y15, Y14 VPBROADCASTD 1016(CX), Y3 VPADDD Y7, Y1, Y4 VPSUBD Y11, Y4, Y4 VPADDD Y7, Y11, Y7 VPMULUDQ Y4, Y3, Y11 VPADDD Y8, Y1, Y5 VPSUBD Y12, Y5, Y5 VPADDD Y8, Y12, Y8 VPMULUDQ Y5, Y3, Y12 VPADDD Y9, Y1, Y6 VPSUBD Y13, Y6, Y6 VPADDD Y9, Y13, Y9 VPMULUDQ Y6, Y3, Y13 VPADDD Y10, Y1, Y1 VPSUBD Y14, Y1, Y1 VPADDD Y10, Y14, Y10 VPMULUDQ Y1, Y3, Y14 VPMULUDQ Y2, Y11, Y4 VPMULUDQ Y2, Y12, Y5 VPMULUDQ Y2, Y13, Y6 VPMULUDQ Y2, Y14, Y1 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y1, Y1 VPADDQ Y4, Y11, Y4 VPADDQ Y5, Y12, Y5 VPADDQ Y6, Y13, Y6 VPADDQ Y1, Y14, Y1 VPSRLQ $0x20, Y4, Y11 VPSRLQ $0x20, Y5, Y12 VPSRLQ $0x20, Y6, Y13 VPSRLQ $0x20, Y1, Y14 MOVL $0x0000a3fa, CX VMOVD CX, X1 VPBROADCASTD X1, Y1 VPMULUDQ Y7, Y1, Y7 VPMULUDQ Y8, Y1, Y8 VPMULUDQ Y9, Y1, Y9 VPMULUDQ Y10, Y1, Y10 VPMULUDQ Y11, Y1, Y11 VPMULUDQ Y12, Y1, Y12 VPMULUDQ Y13, Y1, Y13 VPMULUDQ Y14, Y1, Y14 VPMULUDQ Y2, Y7, Y1 VPMULUDQ Y2, Y8, Y3 VPMULUDQ Y2, Y9, Y4 VPMULUDQ Y2, Y10, Y5 VPMULUDQ Y0, Y1, Y1 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y5, Y5 VPADDQ Y1, Y7, Y1 VPADDQ Y3, Y8, Y3 VPADDQ Y4, Y9, Y4 VPADDQ Y5, Y10, Y5 VPSRLQ $0x20, Y1, Y7 VPSRLQ $0x20, Y3, Y8 VPSRLQ $0x20, Y4, Y9 VPSRLQ $0x20, Y5, Y10 VPMULUDQ Y2, Y11, Y1 VPMULUDQ Y2, Y12, Y3 VPMULUDQ Y2, Y13, Y4 VPMULUDQ Y2, Y14, Y2 VPMULUDQ Y0, Y1, Y1 VPMULUDQ Y0, Y3, Y3 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y2, Y2 VPADDQ Y1, Y11, Y1 VPADDQ Y3, Y12, Y3 VPADDQ Y4, Y13, Y4 VPADDQ Y2, Y14, Y2 VPSRLQ $0x20, Y1, Y11 VPSRLQ $0x20, Y3, Y12 VPSRLQ $0x20, Y4, Y13 VPSRLQ $0x20, Y2, Y14 VMOVDQA Y7, 224(DX) VMOVDQA Y8, 480(DX) VMOVDQA Y9, 736(DX) VMOVDQA Y10, 992(DX) VMOVDQA Y11, 1248(DX) VMOVDQA Y12, 1504(DX) VMOVDQA Y13, 1760(DX) VMOVDQA Y14, 2016(DX) VMOVDQA (DX), Y7 VMOVDQA 32(DX), Y8 VMOVDQA 64(DX), Y9 VMOVDQA 96(DX), Y10 VMOVDQA 128(DX), Y11 VMOVDQA 160(DX), Y12 VMOVDQA 192(DX), Y13 VMOVDQA 224(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, (AX) VMOVDQU Y9, 32(AX) VMOVDQU Y11, 64(AX) VMOVDQU Y13, 96(AX) VMOVDQA 256(DX), Y7 VMOVDQA 288(DX), Y8 VMOVDQA 320(DX), Y9 VMOVDQA 352(DX), Y10 VMOVDQA 384(DX), Y11 VMOVDQA 416(DX), Y12 VMOVDQA 448(DX), Y13 VMOVDQA 480(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 128(AX) VMOVDQU Y9, 160(AX) VMOVDQU Y11, 192(AX) VMOVDQU Y13, 224(AX) VMOVDQA 512(DX), Y7 VMOVDQA 544(DX), Y8 VMOVDQA 576(DX), Y9 VMOVDQA 608(DX), Y10 VMOVDQA 640(DX), Y11 VMOVDQA 672(DX), Y12 VMOVDQA 704(DX), Y13 VMOVDQA 736(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 256(AX) VMOVDQU Y9, 288(AX) VMOVDQU Y11, 320(AX) VMOVDQU Y13, 352(AX) VMOVDQA 768(DX), Y7 VMOVDQA 800(DX), Y8 VMOVDQA 832(DX), Y9 VMOVDQA 864(DX), Y10 VMOVDQA 896(DX), Y11 VMOVDQA 928(DX), Y12 VMOVDQA 960(DX), Y13 VMOVDQA 992(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 384(AX) VMOVDQU Y9, 416(AX) VMOVDQU Y11, 448(AX) VMOVDQU Y13, 480(AX) VMOVDQA 1024(DX), Y7 VMOVDQA 1056(DX), Y8 VMOVDQA 1088(DX), Y9 VMOVDQA 1120(DX), Y10 VMOVDQA 1152(DX), Y11 VMOVDQA 1184(DX), Y12 VMOVDQA 1216(DX), Y13 VMOVDQA 1248(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 512(AX) VMOVDQU Y9, 544(AX) VMOVDQU Y11, 576(AX) VMOVDQU Y13, 608(AX) VMOVDQA 1280(DX), Y7 VMOVDQA 1312(DX), Y8 VMOVDQA 1344(DX), Y9 VMOVDQA 1376(DX), Y10 VMOVDQA 1408(DX), Y11 VMOVDQA 1440(DX), Y12 VMOVDQA 1472(DX), Y13 VMOVDQA 1504(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 640(AX) VMOVDQU Y9, 672(AX) VMOVDQU Y11, 704(AX) VMOVDQU Y13, 736(AX) VMOVDQA 1536(DX), Y7 VMOVDQA 1568(DX), Y8 VMOVDQA 1600(DX), Y9 VMOVDQA 1632(DX), Y10 VMOVDQA 1664(DX), Y11 VMOVDQA 1696(DX), Y12 VMOVDQA 1728(DX), Y13 VMOVDQA 1760(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 768(AX) VMOVDQU Y9, 800(AX) VMOVDQU Y11, 832(AX) VMOVDQU Y13, 864(AX) VMOVDQA 1792(DX), Y7 VMOVDQA 1824(DX), Y8 VMOVDQA 1856(DX), Y9 VMOVDQA 1888(DX), Y10 VMOVDQA 1920(DX), Y11 VMOVDQA 1952(DX), Y12 VMOVDQA 1984(DX), Y13 VMOVDQA 2016(DX), Y14 VPERM2I128 $0x20, Y8, Y7, Y0 VPERM2I128 $0x31, Y8, Y7, Y8 VMOVDQA Y0, Y7 VPERM2I128 $0x20, Y10, Y9, Y0 VPERM2I128 $0x31, Y10, Y9, Y10 VMOVDQA Y0, Y9 VPERM2I128 $0x20, Y12, Y11, Y0 VPERM2I128 $0x31, Y12, Y11, Y12 VMOVDQA Y0, Y11 VPERM2I128 $0x20, Y14, Y13, Y0 VPERM2I128 $0x31, Y14, Y13, Y14 VMOVDQA Y0, Y13 VPUNPCKLQDQ Y8, Y7, Y0 VPUNPCKHQDQ Y8, Y7, Y8 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y10, Y9, Y0 VPUNPCKHQDQ Y10, Y9, Y10 VMOVDQA Y0, Y9 VPUNPCKLQDQ Y12, Y11, Y0 VPUNPCKHQDQ Y12, Y11, Y12 VMOVDQA Y0, Y11 VPUNPCKLQDQ Y14, Y13, Y0 VPUNPCKHQDQ Y14, Y13, Y14 VMOVDQA Y0, Y13 VPSLLQ $0x20, Y8, Y8 VPSLLQ $0x20, Y10, Y10 VPSLLQ $0x20, Y12, Y12 VPSLLQ $0x20, Y14, Y14 VPBLENDD $0xaa, Y8, Y7, Y7 VPBLENDD $0xaa, Y10, Y9, Y9 VPBLENDD $0xaa, Y12, Y11, Y11 VPBLENDD $0xaa, Y14, Y13, Y13 VMOVDQU Y7, 896(AX) VMOVDQU Y9, 928(AX) VMOVDQU Y11, 960(AX) VMOVDQU Y13, 992(AX) RET // func mulHatAVX2(p *[256]uint32, a *[256]uint32, b *[256]uint32) // Requires: AVX, AVX2 TEXT ·mulHatAVX2(SB), NOSPLIT, $0-24 MOVQ p+0(FP), AX MOVQ a+8(FP), CX MOVQ b+16(FP), DX MOVL $0x007fe001, BX VMOVD BX, X0 VPBROADCASTD X0, Y0 MOVL $0xfc7fdfff, BX VMOVD BX, X1 VPBROADCASTD X1, Y1 VPMOVZXDQ (CX), Y2 VPMOVZXDQ 16(CX), Y4 VPMOVZXDQ 32(CX), Y6 VPMOVZXDQ 48(CX), Y8 VPMOVZXDQ (DX), Y3 VPMOVZXDQ 16(DX), Y5 VPMOVZXDQ 32(DX), Y7 VPMOVZXDQ 48(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, (AX) VMOVDQU Y7, 32(AX) VPMOVZXDQ 64(CX), Y2 VPMOVZXDQ 80(CX), Y4 VPMOVZXDQ 96(CX), Y6 VPMOVZXDQ 112(CX), Y8 VPMOVZXDQ 64(DX), Y3 VPMOVZXDQ 80(DX), Y5 VPMOVZXDQ 96(DX), Y7 VPMOVZXDQ 112(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 64(AX) VMOVDQU Y7, 96(AX) VPMOVZXDQ 128(CX), Y2 VPMOVZXDQ 144(CX), Y4 VPMOVZXDQ 160(CX), Y6 VPMOVZXDQ 176(CX), Y8 VPMOVZXDQ 128(DX), Y3 VPMOVZXDQ 144(DX), Y5 VPMOVZXDQ 160(DX), Y7 VPMOVZXDQ 176(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 128(AX) VMOVDQU Y7, 160(AX) VPMOVZXDQ 192(CX), Y2 VPMOVZXDQ 208(CX), Y4 VPMOVZXDQ 224(CX), Y6 VPMOVZXDQ 240(CX), Y8 VPMOVZXDQ 192(DX), Y3 VPMOVZXDQ 208(DX), Y5 VPMOVZXDQ 224(DX), Y7 VPMOVZXDQ 240(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 192(AX) VMOVDQU Y7, 224(AX) VPMOVZXDQ 256(CX), Y2 VPMOVZXDQ 272(CX), Y4 VPMOVZXDQ 288(CX), Y6 VPMOVZXDQ 304(CX), Y8 VPMOVZXDQ 256(DX), Y3 VPMOVZXDQ 272(DX), Y5 VPMOVZXDQ 288(DX), Y7 VPMOVZXDQ 304(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 256(AX) VMOVDQU Y7, 288(AX) VPMOVZXDQ 320(CX), Y2 VPMOVZXDQ 336(CX), Y4 VPMOVZXDQ 352(CX), Y6 VPMOVZXDQ 368(CX), Y8 VPMOVZXDQ 320(DX), Y3 VPMOVZXDQ 336(DX), Y5 VPMOVZXDQ 352(DX), Y7 VPMOVZXDQ 368(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 320(AX) VMOVDQU Y7, 352(AX) VPMOVZXDQ 384(CX), Y2 VPMOVZXDQ 400(CX), Y4 VPMOVZXDQ 416(CX), Y6 VPMOVZXDQ 432(CX), Y8 VPMOVZXDQ 384(DX), Y3 VPMOVZXDQ 400(DX), Y5 VPMOVZXDQ 416(DX), Y7 VPMOVZXDQ 432(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 384(AX) VMOVDQU Y7, 416(AX) VPMOVZXDQ 448(CX), Y2 VPMOVZXDQ 464(CX), Y4 VPMOVZXDQ 480(CX), Y6 VPMOVZXDQ 496(CX), Y8 VPMOVZXDQ 448(DX), Y3 VPMOVZXDQ 464(DX), Y5 VPMOVZXDQ 480(DX), Y7 VPMOVZXDQ 496(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 448(AX) VMOVDQU Y7, 480(AX) VPMOVZXDQ 512(CX), Y2 VPMOVZXDQ 528(CX), Y4 VPMOVZXDQ 544(CX), Y6 VPMOVZXDQ 560(CX), Y8 VPMOVZXDQ 512(DX), Y3 VPMOVZXDQ 528(DX), Y5 VPMOVZXDQ 544(DX), Y7 VPMOVZXDQ 560(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 512(AX) VMOVDQU Y7, 544(AX) VPMOVZXDQ 576(CX), Y2 VPMOVZXDQ 592(CX), Y4 VPMOVZXDQ 608(CX), Y6 VPMOVZXDQ 624(CX), Y8 VPMOVZXDQ 576(DX), Y3 VPMOVZXDQ 592(DX), Y5 VPMOVZXDQ 608(DX), Y7 VPMOVZXDQ 624(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 576(AX) VMOVDQU Y7, 608(AX) VPMOVZXDQ 640(CX), Y2 VPMOVZXDQ 656(CX), Y4 VPMOVZXDQ 672(CX), Y6 VPMOVZXDQ 688(CX), Y8 VPMOVZXDQ 640(DX), Y3 VPMOVZXDQ 656(DX), Y5 VPMOVZXDQ 672(DX), Y7 VPMOVZXDQ 688(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 640(AX) VMOVDQU Y7, 672(AX) VPMOVZXDQ 704(CX), Y2 VPMOVZXDQ 720(CX), Y4 VPMOVZXDQ 736(CX), Y6 VPMOVZXDQ 752(CX), Y8 VPMOVZXDQ 704(DX), Y3 VPMOVZXDQ 720(DX), Y5 VPMOVZXDQ 736(DX), Y7 VPMOVZXDQ 752(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 704(AX) VMOVDQU Y7, 736(AX) VPMOVZXDQ 768(CX), Y2 VPMOVZXDQ 784(CX), Y4 VPMOVZXDQ 800(CX), Y6 VPMOVZXDQ 816(CX), Y8 VPMOVZXDQ 768(DX), Y3 VPMOVZXDQ 784(DX), Y5 VPMOVZXDQ 800(DX), Y7 VPMOVZXDQ 816(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 768(AX) VMOVDQU Y7, 800(AX) VPMOVZXDQ 832(CX), Y2 VPMOVZXDQ 848(CX), Y4 VPMOVZXDQ 864(CX), Y6 VPMOVZXDQ 880(CX), Y8 VPMOVZXDQ 832(DX), Y3 VPMOVZXDQ 848(DX), Y5 VPMOVZXDQ 864(DX), Y7 VPMOVZXDQ 880(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 832(AX) VMOVDQU Y7, 864(AX) VPMOVZXDQ 896(CX), Y2 VPMOVZXDQ 912(CX), Y4 VPMOVZXDQ 928(CX), Y6 VPMOVZXDQ 944(CX), Y8 VPMOVZXDQ 896(DX), Y3 VPMOVZXDQ 912(DX), Y5 VPMOVZXDQ 928(DX), Y7 VPMOVZXDQ 944(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y2 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y2, Y3 VPERM2I128 $0x20, Y9, Y7, Y2 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y2, Y7 VPUNPCKLQDQ Y5, Y3, Y2 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y2, Y3 VPUNPCKLQDQ Y9, Y7, Y2 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y2, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 896(AX) VMOVDQU Y7, 928(AX) VPMOVZXDQ 960(CX), Y2 VPMOVZXDQ 976(CX), Y4 VPMOVZXDQ 992(CX), Y6 VPMOVZXDQ 1008(CX), Y8 VPMOVZXDQ 960(DX), Y3 VPMOVZXDQ 976(DX), Y5 VPMOVZXDQ 992(DX), Y7 VPMOVZXDQ 1008(DX), Y9 VPMULUDQ Y2, Y3, Y3 VPMULUDQ Y4, Y5, Y5 VPMULUDQ Y6, Y7, Y7 VPMULUDQ Y8, Y9, Y9 VPMULUDQ Y1, Y3, Y2 VPMULUDQ Y1, Y5, Y4 VPMULUDQ Y1, Y7, Y6 VPMULUDQ Y1, Y9, Y8 VPMULUDQ Y0, Y2, Y2 VPMULUDQ Y0, Y4, Y4 VPMULUDQ Y0, Y6, Y6 VPMULUDQ Y0, Y8, Y8 VPADDQ Y2, Y3, Y2 VPADDQ Y4, Y5, Y4 VPADDQ Y6, Y7, Y6 VPADDQ Y8, Y9, Y8 VPSRLQ $0x20, Y2, Y3 VPSRLQ $0x20, Y4, Y5 VPSRLQ $0x20, Y6, Y7 VPSRLQ $0x20, Y8, Y9 VPERM2I128 $0x20, Y5, Y3, Y0 VPERM2I128 $0x31, Y5, Y3, Y5 VMOVDQA Y0, Y3 VPERM2I128 $0x20, Y9, Y7, Y0 VPERM2I128 $0x31, Y9, Y7, Y9 VMOVDQA Y0, Y7 VPUNPCKLQDQ Y5, Y3, Y0 VPUNPCKHQDQ Y5, Y3, Y5 VMOVDQA Y0, Y3 VPUNPCKLQDQ Y9, Y7, Y0 VPUNPCKHQDQ Y9, Y7, Y9 VMOVDQA Y0, Y7 VPSLLQ $0x20, Y5, Y5 VPSLLQ $0x20, Y9, Y9 VPBLENDD $0xaa, Y5, Y3, Y3 VPBLENDD $0xaa, Y9, Y7, Y7 VMOVDQU Y3, 960(AX) VMOVDQU Y7, 992(AX) RET // func addAVX2(p *[256]uint32, a *[256]uint32, b *[256]uint32) // Requires: AVX, AVX2 TEXT ·addAVX2(SB), NOSPLIT, $0-24 MOVQ p+0(FP), AX MOVQ a+8(FP), CX MOVQ b+16(FP), DX VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y2 VMOVDQU 64(CX), Y4 VMOVDQU 96(CX), Y6 VMOVDQU 128(CX), Y8 VMOVDQU 160(CX), Y10 VMOVDQU 192(CX), Y12 VMOVDQU 224(CX), Y14 VMOVDQU (DX), Y1 VMOVDQU 32(DX), Y3 VMOVDQU 64(DX), Y5 VMOVDQU 96(DX), Y7 VMOVDQU 128(DX), Y9 VMOVDQU 160(DX), Y11 VMOVDQU 192(DX), Y13 VMOVDQU 224(DX), Y15 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VPADDD Y8, Y9, Y9 VPADDD Y10, Y11, Y11 VPADDD Y12, Y13, Y13 VPADDD Y14, Y15, Y15 VMOVDQU Y1, (AX) VMOVDQU Y3, 32(AX) VMOVDQU Y5, 64(AX) VMOVDQU Y7, 96(AX) VMOVDQU Y9, 128(AX) VMOVDQU Y11, 160(AX) VMOVDQU Y13, 192(AX) VMOVDQU Y15, 224(AX) VMOVDQU 256(CX), Y0 VMOVDQU 288(CX), Y2 VMOVDQU 320(CX), Y4 VMOVDQU 352(CX), Y6 VMOVDQU 384(CX), Y8 VMOVDQU 416(CX), Y10 VMOVDQU 448(CX), Y12 VMOVDQU 480(CX), Y14 VMOVDQU 256(DX), Y1 VMOVDQU 288(DX), Y3 VMOVDQU 320(DX), Y5 VMOVDQU 352(DX), Y7 VMOVDQU 384(DX), Y9 VMOVDQU 416(DX), Y11 VMOVDQU 448(DX), Y13 VMOVDQU 480(DX), Y15 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VPADDD Y8, Y9, Y9 VPADDD Y10, Y11, Y11 VPADDD Y12, Y13, Y13 VPADDD Y14, Y15, Y15 VMOVDQU Y1, 256(AX) VMOVDQU Y3, 288(AX) VMOVDQU Y5, 320(AX) VMOVDQU Y7, 352(AX) VMOVDQU Y9, 384(AX) VMOVDQU Y11, 416(AX) VMOVDQU Y13, 448(AX) VMOVDQU Y15, 480(AX) VMOVDQU 512(CX), Y0 VMOVDQU 544(CX), Y2 VMOVDQU 576(CX), Y4 VMOVDQU 608(CX), Y6 VMOVDQU 640(CX), Y8 VMOVDQU 672(CX), Y10 VMOVDQU 704(CX), Y12 VMOVDQU 736(CX), Y14 VMOVDQU 512(DX), Y1 VMOVDQU 544(DX), Y3 VMOVDQU 576(DX), Y5 VMOVDQU 608(DX), Y7 VMOVDQU 640(DX), Y9 VMOVDQU 672(DX), Y11 VMOVDQU 704(DX), Y13 VMOVDQU 736(DX), Y15 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VPADDD Y8, Y9, Y9 VPADDD Y10, Y11, Y11 VPADDD Y12, Y13, Y13 VPADDD Y14, Y15, Y15 VMOVDQU Y1, 512(AX) VMOVDQU Y3, 544(AX) VMOVDQU Y5, 576(AX) VMOVDQU Y7, 608(AX) VMOVDQU Y9, 640(AX) VMOVDQU Y11, 672(AX) VMOVDQU Y13, 704(AX) VMOVDQU Y15, 736(AX) VMOVDQU 768(CX), Y0 VMOVDQU 800(CX), Y2 VMOVDQU 832(CX), Y4 VMOVDQU 864(CX), Y6 VMOVDQU 896(CX), Y8 VMOVDQU 928(CX), Y10 VMOVDQU 960(CX), Y12 VMOVDQU 992(CX), Y14 VMOVDQU 768(DX), Y1 VMOVDQU 800(DX), Y3 VMOVDQU 832(DX), Y5 VMOVDQU 864(DX), Y7 VMOVDQU 896(DX), Y9 VMOVDQU 928(DX), Y11 VMOVDQU 960(DX), Y13 VMOVDQU 992(DX), Y15 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VPADDD Y8, Y9, Y9 VPADDD Y10, Y11, Y11 VPADDD Y12, Y13, Y13 VPADDD Y14, Y15, Y15 VMOVDQU Y1, 768(AX) VMOVDQU Y3, 800(AX) VMOVDQU Y5, 832(AX) VMOVDQU Y7, 864(AX) VMOVDQU Y9, 896(AX) VMOVDQU Y11, 928(AX) VMOVDQU Y13, 960(AX) VMOVDQU Y15, 992(AX) RET // func subAVX2(p *[256]uint32, a *[256]uint32, b *[256]uint32) // Requires: AVX, AVX2 TEXT ·subAVX2(SB), NOSPLIT, $0-24 MOVQ p+0(FP), AX MOVQ a+8(FP), CX MOVQ b+16(FP), DX MOVL $0x00ffc002, BX VMOVD BX, X0 VPBROADCASTD X0, Y8 VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y2 VMOVDQU 64(CX), Y4 VMOVDQU 96(CX), Y6 VMOVDQU (DX), Y1 VMOVDQU 32(DX), Y3 VMOVDQU 64(DX), Y5 VMOVDQU 96(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, (AX) VMOVDQU Y3, 32(AX) VMOVDQU Y5, 64(AX) VMOVDQU Y7, 96(AX) VMOVDQU 128(CX), Y0 VMOVDQU 160(CX), Y2 VMOVDQU 192(CX), Y4 VMOVDQU 224(CX), Y6 VMOVDQU 128(DX), Y1 VMOVDQU 160(DX), Y3 VMOVDQU 192(DX), Y5 VMOVDQU 224(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 128(AX) VMOVDQU Y3, 160(AX) VMOVDQU Y5, 192(AX) VMOVDQU Y7, 224(AX) VMOVDQU 256(CX), Y0 VMOVDQU 288(CX), Y2 VMOVDQU 320(CX), Y4 VMOVDQU 352(CX), Y6 VMOVDQU 256(DX), Y1 VMOVDQU 288(DX), Y3 VMOVDQU 320(DX), Y5 VMOVDQU 352(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 256(AX) VMOVDQU Y3, 288(AX) VMOVDQU Y5, 320(AX) VMOVDQU Y7, 352(AX) VMOVDQU 384(CX), Y0 VMOVDQU 416(CX), Y2 VMOVDQU 448(CX), Y4 VMOVDQU 480(CX), Y6 VMOVDQU 384(DX), Y1 VMOVDQU 416(DX), Y3 VMOVDQU 448(DX), Y5 VMOVDQU 480(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 384(AX) VMOVDQU Y3, 416(AX) VMOVDQU Y5, 448(AX) VMOVDQU Y7, 480(AX) VMOVDQU 512(CX), Y0 VMOVDQU 544(CX), Y2 VMOVDQU 576(CX), Y4 VMOVDQU 608(CX), Y6 VMOVDQU 512(DX), Y1 VMOVDQU 544(DX), Y3 VMOVDQU 576(DX), Y5 VMOVDQU 608(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 512(AX) VMOVDQU Y3, 544(AX) VMOVDQU Y5, 576(AX) VMOVDQU Y7, 608(AX) VMOVDQU 640(CX), Y0 VMOVDQU 672(CX), Y2 VMOVDQU 704(CX), Y4 VMOVDQU 736(CX), Y6 VMOVDQU 640(DX), Y1 VMOVDQU 672(DX), Y3 VMOVDQU 704(DX), Y5 VMOVDQU 736(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 640(AX) VMOVDQU Y3, 672(AX) VMOVDQU Y5, 704(AX) VMOVDQU Y7, 736(AX) VMOVDQU 768(CX), Y0 VMOVDQU 800(CX), Y2 VMOVDQU 832(CX), Y4 VMOVDQU 864(CX), Y6 VMOVDQU 768(DX), Y1 VMOVDQU 800(DX), Y3 VMOVDQU 832(DX), Y5 VMOVDQU 864(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 768(AX) VMOVDQU Y3, 800(AX) VMOVDQU Y5, 832(AX) VMOVDQU Y7, 864(AX) VMOVDQU 896(CX), Y0 VMOVDQU 928(CX), Y2 VMOVDQU 960(CX), Y4 VMOVDQU 992(CX), Y6 VMOVDQU 896(DX), Y1 VMOVDQU 928(DX), Y3 VMOVDQU 960(DX), Y5 VMOVDQU 992(DX), Y7 VPSUBD Y1, Y8, Y1 VPSUBD Y3, Y8, Y3 VPSUBD Y5, Y8, Y5 VPSUBD Y7, Y8, Y7 VPADDD Y0, Y1, Y1 VPADDD Y2, Y3, Y3 VPADDD Y4, Y5, Y5 VPADDD Y6, Y7, Y7 VMOVDQU Y1, 896(AX) VMOVDQU Y3, 928(AX) VMOVDQU Y5, 960(AX) VMOVDQU Y7, 992(AX) RET // func packLe16AVX2(p *[256]uint32, buf *byte) // Requires: AVX, AVX2 TEXT ·packLe16AVX2(SB), NOSPLIT, $0-16 MOVQ p+0(FP), AX MOVQ buf+8(FP), CX VMOVDQU (AX), Y0 VPUNPCKLDQ 32(AX), Y0, Y0 VMOVDQU (AX), Y2 VPUNPCKHDQ 32(AX), Y2, Y2 VMOVDQU 64(AX), Y4 VPUNPCKLDQ 96(AX), Y4, Y4 VMOVDQU 64(AX), Y6 VPUNPCKHDQ 96(AX), Y6, Y6 VMOVDQU 128(AX), Y8 VPUNPCKLDQ 160(AX), Y8, Y8 VMOVDQU 128(AX), Y10 VPUNPCKHDQ 160(AX), Y10, Y10 VMOVDQU 192(AX), Y11 VPUNPCKLDQ 224(AX), Y11, Y11 VMOVDQU 192(AX), Y13 VPUNPCKHDQ 224(AX), Y13, Y13 VPUNPCKLQDQ Y4, Y0, Y1 VPUNPCKHQDQ Y4, Y0, Y3 VPUNPCKLQDQ Y6, Y2, Y5 VPUNPCKHQDQ Y6, Y2, Y7 VPUNPCKLQDQ Y11, Y8, Y9 VPUNPCKHQDQ Y11, Y8, Y11 VPUNPCKLQDQ Y13, Y10, Y12 VPUNPCKHQDQ Y13, Y10, Y13 VPERM2I128 $0x20, Y9, Y1, Y0 VPERM2I128 $0x20, Y11, Y3, Y2 VPERM2I128 $0x20, Y12, Y5, Y4 VPERM2I128 $0x20, Y13, Y7, Y6 VPERM2I128 $0x31, Y9, Y1, Y8 VPERM2I128 $0x31, Y11, Y3, Y10 VPERM2I128 $0x31, Y12, Y5, Y11 VPERM2I128 $0x31, Y13, Y7, Y13 VPSLLD $0x04, Y2, Y2 VPSLLD $0x08, Y4, Y4 VPSLLD $0x0c, Y6, Y6 VPSLLD $0x10, Y8, Y8 VPSLLD $0x14, Y10, Y10 VPSLLD $0x18, Y11, Y11 VPSLLD $0x1c, Y13, Y13 VPOR Y0, Y2, Y2 VPOR Y4, Y6, Y6 VPOR Y8, Y10, Y10 VPOR Y11, Y13, Y13 VPOR Y2, Y6, Y6 VPOR Y10, Y13, Y13 VPOR Y6, Y13, Y13 VMOVDQU Y13, (CX) VMOVDQU 256(AX), Y0 VPUNPCKLDQ 288(AX), Y0, Y0 VMOVDQU 256(AX), Y2 VPUNPCKHDQ 288(AX), Y2, Y2 VMOVDQU 320(AX), Y4 VPUNPCKLDQ 352(AX), Y4, Y4 VMOVDQU 320(AX), Y6 VPUNPCKHDQ 352(AX), Y6, Y6 VMOVDQU 384(AX), Y8 VPUNPCKLDQ 416(AX), Y8, Y8 VMOVDQU 384(AX), Y10 VPUNPCKHDQ 416(AX), Y10, Y10 VMOVDQU 448(AX), Y11 VPUNPCKLDQ 480(AX), Y11, Y11 VMOVDQU 448(AX), Y13 VPUNPCKHDQ 480(AX), Y13, Y13 VPUNPCKLQDQ Y4, Y0, Y1 VPUNPCKHQDQ Y4, Y0, Y3 VPUNPCKLQDQ Y6, Y2, Y5 VPUNPCKHQDQ Y6, Y2, Y7 VPUNPCKLQDQ Y11, Y8, Y9 VPUNPCKHQDQ Y11, Y8, Y11 VPUNPCKLQDQ Y13, Y10, Y12 VPUNPCKHQDQ Y13, Y10, Y13 VPERM2I128 $0x20, Y9, Y1, Y0 VPERM2I128 $0x20, Y11, Y3, Y2 VPERM2I128 $0x20, Y12, Y5, Y4 VPERM2I128 $0x20, Y13, Y7, Y6 VPERM2I128 $0x31, Y9, Y1, Y8 VPERM2I128 $0x31, Y11, Y3, Y10 VPERM2I128 $0x31, Y12, Y5, Y11 VPERM2I128 $0x31, Y13, Y7, Y13 VPSLLD $0x04, Y2, Y2 VPSLLD $0x08, Y4, Y4 VPSLLD $0x0c, Y6, Y6 VPSLLD $0x10, Y8, Y8 VPSLLD $0x14, Y10, Y10 VPSLLD $0x18, Y11, Y11 VPSLLD $0x1c, Y13, Y13 VPOR Y0, Y2, Y2 VPOR Y4, Y6, Y6 VPOR Y8, Y10, Y10 VPOR Y11, Y13, Y13 VPOR Y2, Y6, Y6 VPOR Y10, Y13, Y13 VPOR Y6, Y13, Y13 VMOVDQU Y13, 32(CX) VMOVDQU 512(AX), Y0 VPUNPCKLDQ 544(AX), Y0, Y0 VMOVDQU 512(AX), Y2 VPUNPCKHDQ 544(AX), Y2, Y2 VMOVDQU 576(AX), Y4 VPUNPCKLDQ 608(AX), Y4, Y4 VMOVDQU 576(AX), Y6 VPUNPCKHDQ 608(AX), Y6, Y6 VMOVDQU 640(AX), Y8 VPUNPCKLDQ 672(AX), Y8, Y8 VMOVDQU 640(AX), Y10 VPUNPCKHDQ 672(AX), Y10, Y10 VMOVDQU 704(AX), Y11 VPUNPCKLDQ 736(AX), Y11, Y11 VMOVDQU 704(AX), Y13 VPUNPCKHDQ 736(AX), Y13, Y13 VPUNPCKLQDQ Y4, Y0, Y1 VPUNPCKHQDQ Y4, Y0, Y3 VPUNPCKLQDQ Y6, Y2, Y5 VPUNPCKHQDQ Y6, Y2, Y7 VPUNPCKLQDQ Y11, Y8, Y9 VPUNPCKHQDQ Y11, Y8, Y11 VPUNPCKLQDQ Y13, Y10, Y12 VPUNPCKHQDQ Y13, Y10, Y13 VPERM2I128 $0x20, Y9, Y1, Y0 VPERM2I128 $0x20, Y11, Y3, Y2 VPERM2I128 $0x20, Y12, Y5, Y4 VPERM2I128 $0x20, Y13, Y7, Y6 VPERM2I128 $0x31, Y9, Y1, Y8 VPERM2I128 $0x31, Y11, Y3, Y10 VPERM2I128 $0x31, Y12, Y5, Y11 VPERM2I128 $0x31, Y13, Y7, Y13 VPSLLD $0x04, Y2, Y2 VPSLLD $0x08, Y4, Y4 VPSLLD $0x0c, Y6, Y6 VPSLLD $0x10, Y8, Y8 VPSLLD $0x14, Y10, Y10 VPSLLD $0x18, Y11, Y11 VPSLLD $0x1c, Y13, Y13 VPOR Y0, Y2, Y2 VPOR Y4, Y6, Y6 VPOR Y8, Y10, Y10 VPOR Y11, Y13, Y13 VPOR Y2, Y6, Y6 VPOR Y10, Y13, Y13 VPOR Y6, Y13, Y13 VMOVDQU Y13, 64(CX) VMOVDQU 768(AX), Y0 VPUNPCKLDQ 800(AX), Y0, Y0 VMOVDQU 768(AX), Y2 VPUNPCKHDQ 800(AX), Y2, Y2 VMOVDQU 832(AX), Y4 VPUNPCKLDQ 864(AX), Y4, Y4 VMOVDQU 832(AX), Y6 VPUNPCKHDQ 864(AX), Y6, Y6 VMOVDQU 896(AX), Y8 VPUNPCKLDQ 928(AX), Y8, Y8 VMOVDQU 896(AX), Y10 VPUNPCKHDQ 928(AX), Y10, Y10 VMOVDQU 960(AX), Y11 VPUNPCKLDQ 992(AX), Y11, Y11 VMOVDQU 960(AX), Y13 VPUNPCKHDQ 992(AX), Y13, Y13 VPUNPCKLQDQ Y4, Y0, Y1 VPUNPCKHQDQ Y4, Y0, Y3 VPUNPCKLQDQ Y6, Y2, Y5 VPUNPCKHQDQ Y6, Y2, Y7 VPUNPCKLQDQ Y11, Y8, Y9 VPUNPCKHQDQ Y11, Y8, Y11 VPUNPCKLQDQ Y13, Y10, Y12 VPUNPCKHQDQ Y13, Y10, Y13 VPERM2I128 $0x20, Y9, Y1, Y0 VPERM2I128 $0x20, Y11, Y3, Y2 VPERM2I128 $0x20, Y12, Y5, Y4 VPERM2I128 $0x20, Y13, Y7, Y6 VPERM2I128 $0x31, Y9, Y1, Y8 VPERM2I128 $0x31, Y11, Y3, Y10 VPERM2I128 $0x31, Y12, Y5, Y11 VPERM2I128 $0x31, Y13, Y7, Y13 VPSLLD $0x04, Y2, Y2 VPSLLD $0x08, Y4, Y4 VPSLLD $0x0c, Y6, Y6 VPSLLD $0x10, Y8, Y8 VPSLLD $0x14, Y10, Y10 VPSLLD $0x18, Y11, Y11 VPSLLD $0x1c, Y13, Y13 VPOR Y0, Y2, Y2 VPOR Y4, Y6, Y6 VPOR Y8, Y10, Y10 VPOR Y11, Y13, Y13 VPOR Y2, Y6, Y6 VPOR Y10, Y13, Y13 VPOR Y6, Y13, Y13 VMOVDQU Y13, 96(CX) RET // func reduceLe2QAVX2(p *[256]uint32) // Requires: AVX, AVX2 TEXT ·reduceLe2QAVX2(SB), NOSPLIT, $0-8 MOVQ p+0(FP), AX MOVL $0x007fffff, CX VMOVD CX, X0 VPBROADCASTD X0, Y12 VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y3 VMOVDQU 64(AX), Y6 VMOVDQU 96(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, (AX) VMOVDQU Y3, 32(AX) VMOVDQU Y6, 64(AX) VMOVDQU Y9, 96(AX) VMOVDQU 128(AX), Y0 VMOVDQU 160(AX), Y3 VMOVDQU 192(AX), Y6 VMOVDQU 224(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 128(AX) VMOVDQU Y3, 160(AX) VMOVDQU Y6, 192(AX) VMOVDQU Y9, 224(AX) VMOVDQU 256(AX), Y0 VMOVDQU 288(AX), Y3 VMOVDQU 320(AX), Y6 VMOVDQU 352(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 256(AX) VMOVDQU Y3, 288(AX) VMOVDQU Y6, 320(AX) VMOVDQU Y9, 352(AX) VMOVDQU 384(AX), Y0 VMOVDQU 416(AX), Y3 VMOVDQU 448(AX), Y6 VMOVDQU 480(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 384(AX) VMOVDQU Y3, 416(AX) VMOVDQU Y6, 448(AX) VMOVDQU Y9, 480(AX) VMOVDQU 512(AX), Y0 VMOVDQU 544(AX), Y3 VMOVDQU 576(AX), Y6 VMOVDQU 608(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 512(AX) VMOVDQU Y3, 544(AX) VMOVDQU Y6, 576(AX) VMOVDQU Y9, 608(AX) VMOVDQU 640(AX), Y0 VMOVDQU 672(AX), Y3 VMOVDQU 704(AX), Y6 VMOVDQU 736(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 640(AX) VMOVDQU Y3, 672(AX) VMOVDQU Y6, 704(AX) VMOVDQU Y9, 736(AX) VMOVDQU 768(AX), Y0 VMOVDQU 800(AX), Y3 VMOVDQU 832(AX), Y6 VMOVDQU 864(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 768(AX) VMOVDQU Y3, 800(AX) VMOVDQU Y6, 832(AX) VMOVDQU Y9, 864(AX) VMOVDQU 896(AX), Y0 VMOVDQU 928(AX), Y3 VMOVDQU 960(AX), Y6 VMOVDQU 992(AX), Y9 VPSRLD $0x17, Y0, Y1 VPSRLD $0x17, Y3, Y4 VPSRLD $0x17, Y6, Y7 VPSRLD $0x17, Y9, Y10 VPAND Y0, Y12, Y0 VPAND Y3, Y12, Y3 VPAND Y6, Y12, Y6 VPAND Y9, Y12, Y9 VPSLLD $0x0d, Y1, Y2 VPSLLD $0x0d, Y4, Y5 VPSLLD $0x0d, Y7, Y8 VPSLLD $0x0d, Y10, Y11 VPSUBD Y1, Y2, Y2 VPSUBD Y4, Y5, Y5 VPSUBD Y7, Y8, Y8 VPSUBD Y10, Y11, Y11 VPADDD Y0, Y2, Y0 VPADDD Y3, Y5, Y3 VPADDD Y6, Y8, Y6 VPADDD Y9, Y11, Y9 VMOVDQU Y0, 896(AX) VMOVDQU Y3, 928(AX) VMOVDQU Y6, 960(AX) VMOVDQU Y9, 992(AX) RET // func le2qModQAVX2(p *[256]uint32) // Requires: AVX, AVX2 TEXT ·le2qModQAVX2(SB), NOSPLIT, $0-8 MOVQ p+0(FP), AX MOVL $0x007fe001, CX VMOVD CX, X0 VPBROADCASTD X0, Y8 VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y2 VMOVDQU 64(AX), Y4 VMOVDQU 96(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, (AX) VMOVDQU Y2, 32(AX) VMOVDQU Y4, 64(AX) VMOVDQU Y6, 96(AX) VMOVDQU 128(AX), Y0 VMOVDQU 160(AX), Y2 VMOVDQU 192(AX), Y4 VMOVDQU 224(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 128(AX) VMOVDQU Y2, 160(AX) VMOVDQU Y4, 192(AX) VMOVDQU Y6, 224(AX) VMOVDQU 256(AX), Y0 VMOVDQU 288(AX), Y2 VMOVDQU 320(AX), Y4 VMOVDQU 352(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 256(AX) VMOVDQU Y2, 288(AX) VMOVDQU Y4, 320(AX) VMOVDQU Y6, 352(AX) VMOVDQU 384(AX), Y0 VMOVDQU 416(AX), Y2 VMOVDQU 448(AX), Y4 VMOVDQU 480(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 384(AX) VMOVDQU Y2, 416(AX) VMOVDQU Y4, 448(AX) VMOVDQU Y6, 480(AX) VMOVDQU 512(AX), Y0 VMOVDQU 544(AX), Y2 VMOVDQU 576(AX), Y4 VMOVDQU 608(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 512(AX) VMOVDQU Y2, 544(AX) VMOVDQU Y4, 576(AX) VMOVDQU Y6, 608(AX) VMOVDQU 640(AX), Y0 VMOVDQU 672(AX), Y2 VMOVDQU 704(AX), Y4 VMOVDQU 736(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 640(AX) VMOVDQU Y2, 672(AX) VMOVDQU Y4, 704(AX) VMOVDQU Y6, 736(AX) VMOVDQU 768(AX), Y0 VMOVDQU 800(AX), Y2 VMOVDQU 832(AX), Y4 VMOVDQU 864(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 768(AX) VMOVDQU Y2, 800(AX) VMOVDQU Y4, 832(AX) VMOVDQU Y6, 864(AX) VMOVDQU 896(AX), Y0 VMOVDQU 928(AX), Y2 VMOVDQU 960(AX), Y4 VMOVDQU 992(AX), Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPAND Y1, Y8, Y1 VPAND Y3, Y8, Y3 VPAND Y5, Y8, Y5 VPAND Y7, Y8, Y7 VPADDD Y0, Y1, Y0 VPADDD Y2, Y3, Y2 VPADDD Y4, Y5, Y4 VPADDD Y6, Y7, Y6 VMOVDQU Y0, 896(AX) VMOVDQU Y2, 928(AX) VMOVDQU Y4, 960(AX) VMOVDQU Y6, 992(AX) RET // func exceedsAVX2(p *[256]uint32, bound uint32) uint8 // Requires: AVX, AVX2 TEXT ·exceedsAVX2(SB), NOSPLIT, $0-17 MOVQ p+0(FP), AX MOVL bound+8(FP), CX VMOVD CX, X0 VPBROADCASTD X0, Y8 MOVL $0x003ff000, CX VMOVD CX, X0 VPBROADCASTD X0, Y9 MOVL $0x80000000, CX VMOVD CX, X0 VPBROADCASTD X0, Y10 MOVL $0x88888888, CX VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y2 VMOVDQU 64(AX), Y4 VMOVDQU 96(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 128(AX), Y0 VMOVDQU 160(AX), Y2 VMOVDQU 192(AX), Y4 VMOVDQU 224(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 256(AX), Y0 VMOVDQU 288(AX), Y2 VMOVDQU 320(AX), Y4 VMOVDQU 352(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 384(AX), Y0 VMOVDQU 416(AX), Y2 VMOVDQU 448(AX), Y4 VMOVDQU 480(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 512(AX), Y0 VMOVDQU 544(AX), Y2 VMOVDQU 576(AX), Y4 VMOVDQU 608(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 640(AX), Y0 VMOVDQU 672(AX), Y2 VMOVDQU 704(AX), Y4 VMOVDQU 736(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 768(AX), Y0 VMOVDQU 800(AX), Y2 VMOVDQU 832(AX), Y4 VMOVDQU 864(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y2, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y4, DX XORL CX, DX TESTL DX, DX JNZ exceeded VPMOVMSKB Y6, DX XORL CX, DX TESTL DX, DX JNZ exceeded VMOVDQU 896(AX), Y0 VMOVDQU 928(AX), Y2 VMOVDQU 960(AX), Y4 VMOVDQU 992(AX), Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSRAD $0x1f, Y0, Y1 VPSRAD $0x1f, Y2, Y3 VPSRAD $0x1f, Y4, Y5 VPSRAD $0x1f, Y6, Y7 VPXOR Y0, Y1, Y0 VPXOR Y2, Y3, Y2 VPXOR Y4, Y5, Y4 VPXOR Y6, Y7, Y6 VPSUBD Y0, Y9, Y0 VPSUBD Y2, Y9, Y2 VPSUBD Y4, Y9, Y4 VPSUBD Y6, Y9, Y6 VPSUBD Y8, Y0, Y0 VPSUBD Y8, Y2, Y2 VPSUBD Y8, Y4, Y4 VPSUBD Y8, Y6, Y6 VPAND Y0, Y10, Y0 VPAND Y2, Y10, Y2 VPAND Y4, Y10, Y4 VPAND Y6, Y10, Y6 VPMOVMSKB Y0, AX XORL CX, AX TESTL AX, AX JNZ exceeded VPMOVMSKB Y2, AX XORL CX, AX TESTL AX, AX JNZ exceeded VPMOVMSKB Y4, AX XORL CX, AX TESTL AX, AX JNZ exceeded VPMOVMSKB Y6, AX XORL CX, AX TESTL AX, AX JNZ exceeded XORB AL, AL MOVB AL, ret+16(FP) RET exceeded: MOVB $0x01, AL MOVB AL, ret+16(FP) RET // func mulBy2toDAVX2(p *[256]uint32, q *[256]uint32) // Requires: AVX, AVX2 TEXT ·mulBy2toDAVX2(SB), NOSPLIT, $0-16 MOVQ p+0(FP), AX MOVQ q+8(FP), CX VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 VMOVDQU 128(CX), Y4 VMOVDQU 160(CX), Y5 VMOVDQU 192(CX), Y6 VMOVDQU 224(CX), Y7 VPSLLD $0x0d, Y0, Y0 VPSLLD $0x0d, Y1, Y1 VPSLLD $0x0d, Y2, Y2 VPSLLD $0x0d, Y3, Y3 VPSLLD $0x0d, Y4, Y4 VPSLLD $0x0d, Y5, Y5 VPSLLD $0x0d, Y6, Y6 VPSLLD $0x0d, Y7, Y7 VMOVDQU Y0, (AX) VMOVDQU Y1, 32(AX) VMOVDQU Y2, 64(AX) VMOVDQU Y3, 96(AX) VMOVDQU Y4, 128(AX) VMOVDQU Y5, 160(AX) VMOVDQU Y6, 192(AX) VMOVDQU Y7, 224(AX) VMOVDQU 256(CX), Y0 VMOVDQU 288(CX), Y1 VMOVDQU 320(CX), Y2 VMOVDQU 352(CX), Y3 VMOVDQU 384(CX), Y4 VMOVDQU 416(CX), Y5 VMOVDQU 448(CX), Y6 VMOVDQU 480(CX), Y7 VPSLLD $0x0d, Y0, Y0 VPSLLD $0x0d, Y1, Y1 VPSLLD $0x0d, Y2, Y2 VPSLLD $0x0d, Y3, Y3 VPSLLD $0x0d, Y4, Y4 VPSLLD $0x0d, Y5, Y5 VPSLLD $0x0d, Y6, Y6 VPSLLD $0x0d, Y7, Y7 VMOVDQU Y0, 256(AX) VMOVDQU Y1, 288(AX) VMOVDQU Y2, 320(AX) VMOVDQU Y3, 352(AX) VMOVDQU Y4, 384(AX) VMOVDQU Y5, 416(AX) VMOVDQU Y6, 448(AX) VMOVDQU Y7, 480(AX) VMOVDQU 512(CX), Y0 VMOVDQU 544(CX), Y1 VMOVDQU 576(CX), Y2 VMOVDQU 608(CX), Y3 VMOVDQU 640(CX), Y4 VMOVDQU 672(CX), Y5 VMOVDQU 704(CX), Y6 VMOVDQU 736(CX), Y7 VPSLLD $0x0d, Y0, Y0 VPSLLD $0x0d, Y1, Y1 VPSLLD $0x0d, Y2, Y2 VPSLLD $0x0d, Y3, Y3 VPSLLD $0x0d, Y4, Y4 VPSLLD $0x0d, Y5, Y5 VPSLLD $0x0d, Y6, Y6 VPSLLD $0x0d, Y7, Y7 VMOVDQU Y0, 512(AX) VMOVDQU Y1, 544(AX) VMOVDQU Y2, 576(AX) VMOVDQU Y3, 608(AX) VMOVDQU Y4, 640(AX) VMOVDQU Y5, 672(AX) VMOVDQU Y6, 704(AX) VMOVDQU Y7, 736(AX) VMOVDQU 768(CX), Y0 VMOVDQU 800(CX), Y1 VMOVDQU 832(CX), Y2 VMOVDQU 864(CX), Y3 VMOVDQU 896(CX), Y4 VMOVDQU 928(CX), Y5 VMOVDQU 960(CX), Y6 VMOVDQU 992(CX), Y7 VPSLLD $0x0d, Y0, Y0 VPSLLD $0x0d, Y1, Y1 VPSLLD $0x0d, Y2, Y2 VPSLLD $0x0d, Y3, Y3 VPSLLD $0x0d, Y4, Y4 VPSLLD $0x0d, Y5, Y5 VPSLLD $0x0d, Y6, Y6 VPSLLD $0x0d, Y7, Y7 VMOVDQU Y0, 768(AX) VMOVDQU Y1, 800(AX) VMOVDQU Y2, 832(AX) VMOVDQU Y3, 864(AX) VMOVDQU Y4, 896(AX) VMOVDQU Y5, 928(AX) VMOVDQU Y6, 960(AX) VMOVDQU Y7, 992(AX) RET