1// Code generated by command: go run gen.go -sse -out ../accum_vector_sse_amd64.s -pkg xxh3. DO NOT EDIT.
2
3#include "textflag.h"
4
5DATA prime_sse<>+0(SB)/4, $0x9e3779b1
6DATA prime_sse<>+4(SB)/4, $0x9e3779b1
7DATA prime_sse<>+8(SB)/4, $0x9e3779b1
8DATA prime_sse<>+12(SB)/4, $0x9e3779b1
9GLOBL prime_sse<>(SB), RODATA|NOPTR, $16
10
11// func accumSSE(acc *[8]uint64, data *byte, key *byte, len uint64)
12// Requires: SSE2
13TEXT ·accumSSE(SB), NOSPLIT, $0-32
14 MOVQ acc+0(FP), AX
15 MOVQ data+8(FP), CX
16 MOVQ key+16(FP), DX
17 MOVQ key+16(FP), BX
18 MOVQ len+24(FP), SI
19 MOVOU (AX), X1
20 MOVOU 16(AX), X2
21 MOVOU 32(AX), X3
22 MOVOU 48(AX), X4
23 MOVOU prime_sse<>+0(SB), X0
24
25accum_large:
26 CMPQ SI, $0x00000400
27 JLE accum
28 MOVOU (CX), X5
29 MOVOU (DX), X6
30 PXOR X5, X6
31 PSHUFD $0x31, X6, X7
32 PMULULQ X6, X7
33 PSHUFD $0x4e, X5, X5
34 PADDQ X5, X1
35 PADDQ X7, X1
36 MOVOU 16(CX), X5
37 MOVOU 16(DX), X6
38 PXOR X5, X6
39 PSHUFD $0x31, X6, X7
40 PMULULQ X6, X7
41 PSHUFD $0x4e, X5, X5
42 PADDQ X5, X2
43 PADDQ X7, X2
44 MOVOU 32(CX), X5
45 MOVOU 32(DX), X6
46 PXOR X5, X6
47 PSHUFD $0x31, X6, X7
48 PMULULQ X6, X7
49 PSHUFD $0x4e, X5, X5
50 PADDQ X5, X3
51 PADDQ X7, X3
52 MOVOU 48(CX), X5
53 MOVOU 48(DX), X6
54 PXOR X5, X6
55 PSHUFD $0x31, X6, X7
56 PMULULQ X6, X7
57 PSHUFD $0x4e, X5, X5
58 PADDQ X5, X4
59 PADDQ X7, X4
60 MOVOU 64(CX), X5
61 MOVOU 8(DX), X6
62 PXOR X5, X6
63 PSHUFD $0x31, X6, X7
64 PMULULQ X6, X7
65 PSHUFD $0x4e, X5, X5
66 PADDQ X5, X1
67 PADDQ X7, X1
68 MOVOU 80(CX), X5
69 MOVOU 24(DX), X6
70 PXOR X5, X6
71 PSHUFD $0x31, X6, X7
72 PMULULQ X6, X7
73 PSHUFD $0x4e, X5, X5
74 PADDQ X5, X2
75 PADDQ X7, X2
76 MOVOU 96(CX), X5
77 MOVOU 40(DX), X6
78 PXOR X5, X6
79 PSHUFD $0x31, X6, X7
80 PMULULQ X6, X7
81 PSHUFD $0x4e, X5, X5
82 PADDQ X5, X3
83 PADDQ X7, X3
84 MOVOU 112(CX), X5
85 MOVOU 56(DX), X6
86 PXOR X5, X6
87 PSHUFD $0x31, X6, X7
88 PMULULQ X6, X7
89 PSHUFD $0x4e, X5, X5
90 PADDQ X5, X4
91 PADDQ X7, X4
92 MOVOU 128(CX), X5
93 MOVOU 16(DX), X6
94 PXOR X5, X6
95 PSHUFD $0x31, X6, X7
96 PMULULQ X6, X7
97 PSHUFD $0x4e, X5, X5
98 PADDQ X5, X1
99 PADDQ X7, X1
100 MOVOU 144(CX), X5
101 MOVOU 32(DX), X6
102 PXOR X5, X6
103 PSHUFD $0x31, X6, X7
104 PMULULQ X6, X7
105 PSHUFD $0x4e, X5, X5
106 PADDQ X5, X2
107 PADDQ X7, X2
108 MOVOU 160(CX), X5
109 MOVOU 48(DX), X6
110 PXOR X5, X6
111 PSHUFD $0x31, X6, X7
112 PMULULQ X6, X7
113 PSHUFD $0x4e, X5, X5
114 PADDQ X5, X3
115 PADDQ X7, X3
116 MOVOU 176(CX), X5
117 MOVOU 64(DX), X6
118 PXOR X5, X6
119 PSHUFD $0x31, X6, X7
120 PMULULQ X6, X7
121 PSHUFD $0x4e, X5, X5
122 PADDQ X5, X4
123 PADDQ X7, X4
124 MOVOU 192(CX), X5
125 MOVOU 24(DX), X6
126 PXOR X5, X6
127 PSHUFD $0x31, X6, X7
128 PMULULQ X6, X7
129 PSHUFD $0x4e, X5, X5
130 PADDQ X5, X1
131 PADDQ X7, X1
132 MOVOU 208(CX), X5
133 MOVOU 40(DX), X6
134 PXOR X5, X6
135 PSHUFD $0x31, X6, X7
136 PMULULQ X6, X7
137 PSHUFD $0x4e, X5, X5
138 PADDQ X5, X2
139 PADDQ X7, X2
140 MOVOU 224(CX), X5
141 MOVOU 56(DX), X6
142 PXOR X5, X6
143 PSHUFD $0x31, X6, X7
144 PMULULQ X6, X7
145 PSHUFD $0x4e, X5, X5
146 PADDQ X5, X3
147 PADDQ X7, X3
148 MOVOU 240(CX), X5
149 MOVOU 72(DX), X6
150 PXOR X5, X6
151 PSHUFD $0x31, X6, X7
152 PMULULQ X6, X7
153 PSHUFD $0x4e, X5, X5
154 PADDQ X5, X4
155 PADDQ X7, X4
156 MOVOU 256(CX), X5
157 MOVOU 32(DX), X6
158 PXOR X5, X6
159 PSHUFD $0x31, X6, X7
160 PMULULQ X6, X7
161 PSHUFD $0x4e, X5, X5
162 PADDQ X5, X1
163 PADDQ X7, X1
164 MOVOU 272(CX), X5
165 MOVOU 48(DX), X6
166 PXOR X5, X6
167 PSHUFD $0x31, X6, X7
168 PMULULQ X6, X7
169 PSHUFD $0x4e, X5, X5
170 PADDQ X5, X2
171 PADDQ X7, X2
172 MOVOU 288(CX), X5
173 MOVOU 64(DX), X6
174 PXOR X5, X6
175 PSHUFD $0x31, X6, X7
176 PMULULQ X6, X7
177 PSHUFD $0x4e, X5, X5
178 PADDQ X5, X3
179 PADDQ X7, X3
180 MOVOU 304(CX), X5
181 MOVOU 80(DX), X6
182 PXOR X5, X6
183 PSHUFD $0x31, X6, X7
184 PMULULQ X6, X7
185 PSHUFD $0x4e, X5, X5
186 PADDQ X5, X4
187 PADDQ X7, X4
188 MOVOU 320(CX), X5
189 MOVOU 40(DX), X6
190 PXOR X5, X6
191 PSHUFD $0x31, X6, X7
192 PMULULQ X6, X7
193 PSHUFD $0x4e, X5, X5
194 PADDQ X5, X1
195 PADDQ X7, X1
196 MOVOU 336(CX), X5
197 MOVOU 56(DX), X6
198 PXOR X5, X6
199 PSHUFD $0x31, X6, X7
200 PMULULQ X6, X7
201 PSHUFD $0x4e, X5, X5
202 PADDQ X5, X2
203 PADDQ X7, X2
204 MOVOU 352(CX), X5
205 MOVOU 72(DX), X6
206 PXOR X5, X6
207 PSHUFD $0x31, X6, X7
208 PMULULQ X6, X7
209 PSHUFD $0x4e, X5, X5
210 PADDQ X5, X3
211 PADDQ X7, X3
212 MOVOU 368(CX), X5
213 MOVOU 88(DX), X6
214 PXOR X5, X6
215 PSHUFD $0x31, X6, X7
216 PMULULQ X6, X7
217 PSHUFD $0x4e, X5, X5
218 PADDQ X5, X4
219 PADDQ X7, X4
220 MOVOU 384(CX), X5
221 MOVOU 48(DX), X6
222 PXOR X5, X6
223 PSHUFD $0x31, X6, X7
224 PMULULQ X6, X7
225 PSHUFD $0x4e, X5, X5
226 PADDQ X5, X1
227 PADDQ X7, X1
228 MOVOU 400(CX), X5
229 MOVOU 64(DX), X6
230 PXOR X5, X6
231 PSHUFD $0x31, X6, X7
232 PMULULQ X6, X7
233 PSHUFD $0x4e, X5, X5
234 PADDQ X5, X2
235 PADDQ X7, X2
236 MOVOU 416(CX), X5
237 MOVOU 80(DX), X6
238 PXOR X5, X6
239 PSHUFD $0x31, X6, X7
240 PMULULQ X6, X7
241 PSHUFD $0x4e, X5, X5
242 PADDQ X5, X3
243 PADDQ X7, X3
244 MOVOU 432(CX), X5
245 MOVOU 96(DX), X6
246 PXOR X5, X6
247 PSHUFD $0x31, X6, X7
248 PMULULQ X6, X7
249 PSHUFD $0x4e, X5, X5
250 PADDQ X5, X4
251 PADDQ X7, X4
252 MOVOU 448(CX), X5
253 MOVOU 56(DX), X6
254 PXOR X5, X6
255 PSHUFD $0x31, X6, X7
256 PMULULQ X6, X7
257 PSHUFD $0x4e, X5, X5
258 PADDQ X5, X1
259 PADDQ X7, X1
260 MOVOU 464(CX), X5
261 MOVOU 72(DX), X6
262 PXOR X5, X6
263 PSHUFD $0x31, X6, X7
264 PMULULQ X6, X7
265 PSHUFD $0x4e, X5, X5
266 PADDQ X5, X2
267 PADDQ X7, X2
268 MOVOU 480(CX), X5
269 MOVOU 88(DX), X6
270 PXOR X5, X6
271 PSHUFD $0x31, X6, X7
272 PMULULQ X6, X7
273 PSHUFD $0x4e, X5, X5
274 PADDQ X5, X3
275 PADDQ X7, X3
276 MOVOU 496(CX), X5
277 MOVOU 104(DX), X6
278 PXOR X5, X6
279 PSHUFD $0x31, X6, X7
280 PMULULQ X6, X7
281 PSHUFD $0x4e, X5, X5
282 PADDQ X5, X4
283 PADDQ X7, X4
284 MOVOU 512(CX), X5
285 MOVOU 64(DX), X6
286 PXOR X5, X6
287 PSHUFD $0x31, X6, X7
288 PMULULQ X6, X7
289 PSHUFD $0x4e, X5, X5
290 PADDQ X5, X1
291 PADDQ X7, X1
292 MOVOU 528(CX), X5
293 MOVOU 80(DX), X6
294 PXOR X5, X6
295 PSHUFD $0x31, X6, X7
296 PMULULQ X6, X7
297 PSHUFD $0x4e, X5, X5
298 PADDQ X5, X2
299 PADDQ X7, X2
300 MOVOU 544(CX), X5
301 MOVOU 96(DX), X6
302 PXOR X5, X6
303 PSHUFD $0x31, X6, X7
304 PMULULQ X6, X7
305 PSHUFD $0x4e, X5, X5
306 PADDQ X5, X3
307 PADDQ X7, X3
308 MOVOU 560(CX), X5
309 MOVOU 112(DX), X6
310 PXOR X5, X6
311 PSHUFD $0x31, X6, X7
312 PMULULQ X6, X7
313 PSHUFD $0x4e, X5, X5
314 PADDQ X5, X4
315 PADDQ X7, X4
316 MOVOU 576(CX), X5
317 MOVOU 72(DX), X6
318 PXOR X5, X6
319 PSHUFD $0x31, X6, X7
320 PMULULQ X6, X7
321 PSHUFD $0x4e, X5, X5
322 PADDQ X5, X1
323 PADDQ X7, X1
324 MOVOU 592(CX), X5
325 MOVOU 88(DX), X6
326 PXOR X5, X6
327 PSHUFD $0x31, X6, X7
328 PMULULQ X6, X7
329 PSHUFD $0x4e, X5, X5
330 PADDQ X5, X2
331 PADDQ X7, X2
332 MOVOU 608(CX), X5
333 MOVOU 104(DX), X6
334 PXOR X5, X6
335 PSHUFD $0x31, X6, X7
336 PMULULQ X6, X7
337 PSHUFD $0x4e, X5, X5
338 PADDQ X5, X3
339 PADDQ X7, X3
340 MOVOU 624(CX), X5
341 MOVOU 120(DX), X6
342 PXOR X5, X6
343 PSHUFD $0x31, X6, X7
344 PMULULQ X6, X7
345 PSHUFD $0x4e, X5, X5
346 PADDQ X5, X4
347 PADDQ X7, X4
348 MOVOU 640(CX), X5
349 MOVOU 80(DX), X6
350 PXOR X5, X6
351 PSHUFD $0x31, X6, X7
352 PMULULQ X6, X7
353 PSHUFD $0x4e, X5, X5
354 PADDQ X5, X1
355 PADDQ X7, X1
356 MOVOU 656(CX), X5
357 MOVOU 96(DX), X6
358 PXOR X5, X6
359 PSHUFD $0x31, X6, X7
360 PMULULQ X6, X7
361 PSHUFD $0x4e, X5, X5
362 PADDQ X5, X2
363 PADDQ X7, X2
364 MOVOU 672(CX), X5
365 MOVOU 112(DX), X6
366 PXOR X5, X6
367 PSHUFD $0x31, X6, X7
368 PMULULQ X6, X7
369 PSHUFD $0x4e, X5, X5
370 PADDQ X5, X3
371 PADDQ X7, X3
372 MOVOU 688(CX), X5
373 MOVOU 128(DX), X6
374 PXOR X5, X6
375 PSHUFD $0x31, X6, X7
376 PMULULQ X6, X7
377 PSHUFD $0x4e, X5, X5
378 PADDQ X5, X4
379 PADDQ X7, X4
380 MOVOU 704(CX), X5
381 MOVOU 88(DX), X6
382 PXOR X5, X6
383 PSHUFD $0x31, X6, X7
384 PMULULQ X6, X7
385 PSHUFD $0x4e, X5, X5
386 PADDQ X5, X1
387 PADDQ X7, X1
388 MOVOU 720(CX), X5
389 MOVOU 104(DX), X6
390 PXOR X5, X6
391 PSHUFD $0x31, X6, X7
392 PMULULQ X6, X7
393 PSHUFD $0x4e, X5, X5
394 PADDQ X5, X2
395 PADDQ X7, X2
396 MOVOU 736(CX), X5
397 MOVOU 120(DX), X6
398 PXOR X5, X6
399 PSHUFD $0x31, X6, X7
400 PMULULQ X6, X7
401 PSHUFD $0x4e, X5, X5
402 PADDQ X5, X3
403 PADDQ X7, X3
404 MOVOU 752(CX), X5
405 MOVOU 136(DX), X6
406 PXOR X5, X6
407 PSHUFD $0x31, X6, X7
408 PMULULQ X6, X7
409 PSHUFD $0x4e, X5, X5
410 PADDQ X5, X4
411 PADDQ X7, X4
412 MOVOU 768(CX), X5
413 MOVOU 96(DX), X6
414 PXOR X5, X6
415 PSHUFD $0x31, X6, X7
416 PMULULQ X6, X7
417 PSHUFD $0x4e, X5, X5
418 PADDQ X5, X1
419 PADDQ X7, X1
420 MOVOU 784(CX), X5
421 MOVOU 112(DX), X6
422 PXOR X5, X6
423 PSHUFD $0x31, X6, X7
424 PMULULQ X6, X7
425 PSHUFD $0x4e, X5, X5
426 PADDQ X5, X2
427 PADDQ X7, X2
428 MOVOU 800(CX), X5
429 MOVOU 128(DX), X6
430 PXOR X5, X6
431 PSHUFD $0x31, X6, X7
432 PMULULQ X6, X7
433 PSHUFD $0x4e, X5, X5
434 PADDQ X5, X3
435 PADDQ X7, X3
436 MOVOU 816(CX), X5
437 MOVOU 144(DX), X6
438 PXOR X5, X6
439 PSHUFD $0x31, X6, X7
440 PMULULQ X6, X7
441 PSHUFD $0x4e, X5, X5
442 PADDQ X5, X4
443 PADDQ X7, X4
444 MOVOU 832(CX), X5
445 MOVOU 104(DX), X6
446 PXOR X5, X6
447 PSHUFD $0x31, X6, X7
448 PMULULQ X6, X7
449 PSHUFD $0x4e, X5, X5
450 PADDQ X5, X1
451 PADDQ X7, X1
452 MOVOU 848(CX), X5
453 MOVOU 120(DX), X6
454 PXOR X5, X6
455 PSHUFD $0x31, X6, X7
456 PMULULQ X6, X7
457 PSHUFD $0x4e, X5, X5
458 PADDQ X5, X2
459 PADDQ X7, X2
460 MOVOU 864(CX), X5
461 MOVOU 136(DX), X6
462 PXOR X5, X6
463 PSHUFD $0x31, X6, X7
464 PMULULQ X6, X7
465 PSHUFD $0x4e, X5, X5
466 PADDQ X5, X3
467 PADDQ X7, X3
468 MOVOU 880(CX), X5
469 MOVOU 152(DX), X6
470 PXOR X5, X6
471 PSHUFD $0x31, X6, X7
472 PMULULQ X6, X7
473 PSHUFD $0x4e, X5, X5
474 PADDQ X5, X4
475 PADDQ X7, X4
476 MOVOU 896(CX), X5
477 MOVOU 112(DX), X6
478 PXOR X5, X6
479 PSHUFD $0x31, X6, X7
480 PMULULQ X6, X7
481 PSHUFD $0x4e, X5, X5
482 PADDQ X5, X1
483 PADDQ X7, X1
484 MOVOU 912(CX), X5
485 MOVOU 128(DX), X6
486 PXOR X5, X6
487 PSHUFD $0x31, X6, X7
488 PMULULQ X6, X7
489 PSHUFD $0x4e, X5, X5
490 PADDQ X5, X2
491 PADDQ X7, X2
492 MOVOU 928(CX), X5
493 MOVOU 144(DX), X6
494 PXOR X5, X6
495 PSHUFD $0x31, X6, X7
496 PMULULQ X6, X7
497 PSHUFD $0x4e, X5, X5
498 PADDQ X5, X3
499 PADDQ X7, X3
500 MOVOU 944(CX), X5
501 MOVOU 160(DX), X6
502 PXOR X5, X6
503 PSHUFD $0x31, X6, X7
504 PMULULQ X6, X7
505 PSHUFD $0x4e, X5, X5
506 PADDQ X5, X4
507 PADDQ X7, X4
508 MOVOU 960(CX), X5
509 MOVOU 120(DX), X6
510 PXOR X5, X6
511 PSHUFD $0x31, X6, X7
512 PMULULQ X6, X7
513 PSHUFD $0x4e, X5, X5
514 PADDQ X5, X1
515 PADDQ X7, X1
516 MOVOU 976(CX), X5
517 MOVOU 136(DX), X6
518 PXOR X5, X6
519 PSHUFD $0x31, X6, X7
520 PMULULQ X6, X7
521 PSHUFD $0x4e, X5, X5
522 PADDQ X5, X2
523 PADDQ X7, X2
524 MOVOU 992(CX), X5
525 MOVOU 152(DX), X6
526 PXOR X5, X6
527 PSHUFD $0x31, X6, X7
528 PMULULQ X6, X7
529 PSHUFD $0x4e, X5, X5
530 PADDQ X5, X3
531 PADDQ X7, X3
532 MOVOU 1008(CX), X5
533 MOVOU 168(DX), X6
534 PXOR X5, X6
535 PSHUFD $0x31, X6, X7
536 PMULULQ X6, X7
537 PSHUFD $0x4e, X5, X5
538 PADDQ X5, X4
539 PADDQ X7, X4
540 ADDQ $0x00000400, CX
541 SUBQ $0x00000400, SI
542 MOVOU X1, X5
543 PSRLQ $0x2f, X5
544 PXOR X5, X1
545 MOVOU 128(DX), X5
546 PXOR X5, X1
547 PSHUFD $0xf5, X1, X5
548 PMULULQ X0, X1
549 PMULULQ X0, X5
550 PSLLQ $0x20, X5
551 PADDQ X5, X1
552 MOVOU X2, X5
553 PSRLQ $0x2f, X5
554 PXOR X5, X2
555 MOVOU 144(DX), X5
556 PXOR X5, X2
557 PSHUFD $0xf5, X2, X5
558 PMULULQ X0, X2
559 PMULULQ X0, X5
560 PSLLQ $0x20, X5
561 PADDQ X5, X2
562 MOVOU X3, X5
563 PSRLQ $0x2f, X5
564 PXOR X5, X3
565 MOVOU 160(DX), X5
566 PXOR X5, X3
567 PSHUFD $0xf5, X3, X5
568 PMULULQ X0, X3
569 PMULULQ X0, X5
570 PSLLQ $0x20, X5
571 PADDQ X5, X3
572 MOVOU X4, X5
573 PSRLQ $0x2f, X5
574 PXOR X5, X4
575 MOVOU 176(DX), X5
576 PXOR X5, X4
577 PSHUFD $0xf5, X4, X5
578 PMULULQ X0, X4
579 PMULULQ X0, X5
580 PSLLQ $0x20, X5
581 PADDQ X5, X4
582 JMP accum_large
583
584accum:
585 CMPQ SI, $0x40
586 JLE finalize
587 MOVOU (CX), X0
588 MOVOU (BX), X5
589 PXOR X0, X5
590 PSHUFD $0x31, X5, X6
591 PMULULQ X5, X6
592 PSHUFD $0x4e, X0, X0
593 PADDQ X0, X1
594 PADDQ X6, X1
595 MOVOU 16(CX), X0
596 MOVOU 16(BX), X5
597 PXOR X0, X5
598 PSHUFD $0x31, X5, X6
599 PMULULQ X5, X6
600 PSHUFD $0x4e, X0, X0
601 PADDQ X0, X2
602 PADDQ X6, X2
603 MOVOU 32(CX), X0
604 MOVOU 32(BX), X5
605 PXOR X0, X5
606 PSHUFD $0x31, X5, X6
607 PMULULQ X5, X6
608 PSHUFD $0x4e, X0, X0
609 PADDQ X0, X3
610 PADDQ X6, X3
611 MOVOU 48(CX), X0
612 MOVOU 48(BX), X5
613 PXOR X0, X5
614 PSHUFD $0x31, X5, X6
615 PMULULQ X5, X6
616 PSHUFD $0x4e, X0, X0
617 PADDQ X0, X4
618 PADDQ X6, X4
619 ADDQ $0x00000040, CX
620 SUBQ $0x00000040, SI
621 ADDQ $0x00000008, BX
622 JMP accum
623
624finalize:
625 CMPQ SI, $0x00
626 JE return
627 SUBQ $0x40, CX
628 ADDQ SI, CX
629 MOVOU (CX), X0
630 MOVOU 121(DX), X5
631 PXOR X0, X5
632 PSHUFD $0x31, X5, X6
633 PMULULQ X5, X6
634 PSHUFD $0x4e, X0, X0
635 PADDQ X0, X1
636 PADDQ X6, X1
637 MOVOU 16(CX), X0
638 MOVOU 137(DX), X5
639 PXOR X0, X5
640 PSHUFD $0x31, X5, X6
641 PMULULQ X5, X6
642 PSHUFD $0x4e, X0, X0
643 PADDQ X0, X2
644 PADDQ X6, X2
645 MOVOU 32(CX), X0
646 MOVOU 153(DX), X5
647 PXOR X0, X5
648 PSHUFD $0x31, X5, X6
649 PMULULQ X5, X6
650 PSHUFD $0x4e, X0, X0
651 PADDQ X0, X3
652 PADDQ X6, X3
653 MOVOU 48(CX), X0
654 MOVOU 169(DX), X5
655 PXOR X0, X5
656 PSHUFD $0x31, X5, X6
657 PMULULQ X5, X6
658 PSHUFD $0x4e, X0, X0
659 PADDQ X0, X4
660 PADDQ X6, X4
661
662return:
663 MOVOU X1, (AX)
664 MOVOU X2, 16(AX)
665 MOVOU X3, 32(AX)
666 MOVOU X4, 48(AX)
667 RET
668
669// func accumBlockSSE(acc *[8]uint64, data *byte, key *byte)
670// Requires: SSE2
671TEXT ·accumBlockSSE(SB), NOSPLIT, $0-24
672 MOVQ acc+0(FP), AX
673 MOVQ data+8(FP), CX
674 MOVQ key+16(FP), DX
675 MOVOU (AX), X1
676 MOVOU 16(AX), X2
677 MOVOU 32(AX), X3
678 MOVOU 48(AX), X4
679 MOVOU prime_sse<>+0(SB), X0
680 MOVOU (CX), X5
681 MOVOU (DX), X6
682 PXOR X5, X6
683 PSHUFD $0x31, X6, X7
684 PMULULQ X6, X7
685 PSHUFD $0x4e, X5, X5
686 PADDQ X5, X1
687 PADDQ X7, X1
688 MOVOU 16(CX), X5
689 MOVOU 16(DX), X6
690 PXOR X5, X6
691 PSHUFD $0x31, X6, X7
692 PMULULQ X6, X7
693 PSHUFD $0x4e, X5, X5
694 PADDQ X5, X2
695 PADDQ X7, X2
696 MOVOU 32(CX), X5
697 MOVOU 32(DX), X6
698 PXOR X5, X6
699 PSHUFD $0x31, X6, X7
700 PMULULQ X6, X7
701 PSHUFD $0x4e, X5, X5
702 PADDQ X5, X3
703 PADDQ X7, X3
704 MOVOU 48(CX), X5
705 MOVOU 48(DX), X6
706 PXOR X5, X6
707 PSHUFD $0x31, X6, X7
708 PMULULQ X6, X7
709 PSHUFD $0x4e, X5, X5
710 PADDQ X5, X4
711 PADDQ X7, X4
712 MOVOU 64(CX), X5
713 MOVOU 8(DX), X6
714 PXOR X5, X6
715 PSHUFD $0x31, X6, X7
716 PMULULQ X6, X7
717 PSHUFD $0x4e, X5, X5
718 PADDQ X5, X1
719 PADDQ X7, X1
720 MOVOU 80(CX), X5
721 MOVOU 24(DX), X6
722 PXOR X5, X6
723 PSHUFD $0x31, X6, X7
724 PMULULQ X6, X7
725 PSHUFD $0x4e, X5, X5
726 PADDQ X5, X2
727 PADDQ X7, X2
728 MOVOU 96(CX), X5
729 MOVOU 40(DX), X6
730 PXOR X5, X6
731 PSHUFD $0x31, X6, X7
732 PMULULQ X6, X7
733 PSHUFD $0x4e, X5, X5
734 PADDQ X5, X3
735 PADDQ X7, X3
736 MOVOU 112(CX), X5
737 MOVOU 56(DX), X6
738 PXOR X5, X6
739 PSHUFD $0x31, X6, X7
740 PMULULQ X6, X7
741 PSHUFD $0x4e, X5, X5
742 PADDQ X5, X4
743 PADDQ X7, X4
744 MOVOU 128(CX), X5
745 MOVOU 16(DX), X6
746 PXOR X5, X6
747 PSHUFD $0x31, X6, X7
748 PMULULQ X6, X7
749 PSHUFD $0x4e, X5, X5
750 PADDQ X5, X1
751 PADDQ X7, X1
752 MOVOU 144(CX), X5
753 MOVOU 32(DX), X6
754 PXOR X5, X6
755 PSHUFD $0x31, X6, X7
756 PMULULQ X6, X7
757 PSHUFD $0x4e, X5, X5
758 PADDQ X5, X2
759 PADDQ X7, X2
760 MOVOU 160(CX), X5
761 MOVOU 48(DX), X6
762 PXOR X5, X6
763 PSHUFD $0x31, X6, X7
764 PMULULQ X6, X7
765 PSHUFD $0x4e, X5, X5
766 PADDQ X5, X3
767 PADDQ X7, X3
768 MOVOU 176(CX), X5
769 MOVOU 64(DX), X6
770 PXOR X5, X6
771 PSHUFD $0x31, X6, X7
772 PMULULQ X6, X7
773 PSHUFD $0x4e, X5, X5
774 PADDQ X5, X4
775 PADDQ X7, X4
776 MOVOU 192(CX), X5
777 MOVOU 24(DX), X6
778 PXOR X5, X6
779 PSHUFD $0x31, X6, X7
780 PMULULQ X6, X7
781 PSHUFD $0x4e, X5, X5
782 PADDQ X5, X1
783 PADDQ X7, X1
784 MOVOU 208(CX), X5
785 MOVOU 40(DX), X6
786 PXOR X5, X6
787 PSHUFD $0x31, X6, X7
788 PMULULQ X6, X7
789 PSHUFD $0x4e, X5, X5
790 PADDQ X5, X2
791 PADDQ X7, X2
792 MOVOU 224(CX), X5
793 MOVOU 56(DX), X6
794 PXOR X5, X6
795 PSHUFD $0x31, X6, X7
796 PMULULQ X6, X7
797 PSHUFD $0x4e, X5, X5
798 PADDQ X5, X3
799 PADDQ X7, X3
800 MOVOU 240(CX), X5
801 MOVOU 72(DX), X6
802 PXOR X5, X6
803 PSHUFD $0x31, X6, X7
804 PMULULQ X6, X7
805 PSHUFD $0x4e, X5, X5
806 PADDQ X5, X4
807 PADDQ X7, X4
808 MOVOU 256(CX), X5
809 MOVOU 32(DX), X6
810 PXOR X5, X6
811 PSHUFD $0x31, X6, X7
812 PMULULQ X6, X7
813 PSHUFD $0x4e, X5, X5
814 PADDQ X5, X1
815 PADDQ X7, X1
816 MOVOU 272(CX), X5
817 MOVOU 48(DX), X6
818 PXOR X5, X6
819 PSHUFD $0x31, X6, X7
820 PMULULQ X6, X7
821 PSHUFD $0x4e, X5, X5
822 PADDQ X5, X2
823 PADDQ X7, X2
824 MOVOU 288(CX), X5
825 MOVOU 64(DX), X6
826 PXOR X5, X6
827 PSHUFD $0x31, X6, X7
828 PMULULQ X6, X7
829 PSHUFD $0x4e, X5, X5
830 PADDQ X5, X3
831 PADDQ X7, X3
832 MOVOU 304(CX), X5
833 MOVOU 80(DX), X6
834 PXOR X5, X6
835 PSHUFD $0x31, X6, X7
836 PMULULQ X6, X7
837 PSHUFD $0x4e, X5, X5
838 PADDQ X5, X4
839 PADDQ X7, X4
840 MOVOU 320(CX), X5
841 MOVOU 40(DX), X6
842 PXOR X5, X6
843 PSHUFD $0x31, X6, X7
844 PMULULQ X6, X7
845 PSHUFD $0x4e, X5, X5
846 PADDQ X5, X1
847 PADDQ X7, X1
848 MOVOU 336(CX), X5
849 MOVOU 56(DX), X6
850 PXOR X5, X6
851 PSHUFD $0x31, X6, X7
852 PMULULQ X6, X7
853 PSHUFD $0x4e, X5, X5
854 PADDQ X5, X2
855 PADDQ X7, X2
856 MOVOU 352(CX), X5
857 MOVOU 72(DX), X6
858 PXOR X5, X6
859 PSHUFD $0x31, X6, X7
860 PMULULQ X6, X7
861 PSHUFD $0x4e, X5, X5
862 PADDQ X5, X3
863 PADDQ X7, X3
864 MOVOU 368(CX), X5
865 MOVOU 88(DX), X6
866 PXOR X5, X6
867 PSHUFD $0x31, X6, X7
868 PMULULQ X6, X7
869 PSHUFD $0x4e, X5, X5
870 PADDQ X5, X4
871 PADDQ X7, X4
872 MOVOU 384(CX), X5
873 MOVOU 48(DX), X6
874 PXOR X5, X6
875 PSHUFD $0x31, X6, X7
876 PMULULQ X6, X7
877 PSHUFD $0x4e, X5, X5
878 PADDQ X5, X1
879 PADDQ X7, X1
880 MOVOU 400(CX), X5
881 MOVOU 64(DX), X6
882 PXOR X5, X6
883 PSHUFD $0x31, X6, X7
884 PMULULQ X6, X7
885 PSHUFD $0x4e, X5, X5
886 PADDQ X5, X2
887 PADDQ X7, X2
888 MOVOU 416(CX), X5
889 MOVOU 80(DX), X6
890 PXOR X5, X6
891 PSHUFD $0x31, X6, X7
892 PMULULQ X6, X7
893 PSHUFD $0x4e, X5, X5
894 PADDQ X5, X3
895 PADDQ X7, X3
896 MOVOU 432(CX), X5
897 MOVOU 96(DX), X6
898 PXOR X5, X6
899 PSHUFD $0x31, X6, X7
900 PMULULQ X6, X7
901 PSHUFD $0x4e, X5, X5
902 PADDQ X5, X4
903 PADDQ X7, X4
904 MOVOU 448(CX), X5
905 MOVOU 56(DX), X6
906 PXOR X5, X6
907 PSHUFD $0x31, X6, X7
908 PMULULQ X6, X7
909 PSHUFD $0x4e, X5, X5
910 PADDQ X5, X1
911 PADDQ X7, X1
912 MOVOU 464(CX), X5
913 MOVOU 72(DX), X6
914 PXOR X5, X6
915 PSHUFD $0x31, X6, X7
916 PMULULQ X6, X7
917 PSHUFD $0x4e, X5, X5
918 PADDQ X5, X2
919 PADDQ X7, X2
920 MOVOU 480(CX), X5
921 MOVOU 88(DX), X6
922 PXOR X5, X6
923 PSHUFD $0x31, X6, X7
924 PMULULQ X6, X7
925 PSHUFD $0x4e, X5, X5
926 PADDQ X5, X3
927 PADDQ X7, X3
928 MOVOU 496(CX), X5
929 MOVOU 104(DX), X6
930 PXOR X5, X6
931 PSHUFD $0x31, X6, X7
932 PMULULQ X6, X7
933 PSHUFD $0x4e, X5, X5
934 PADDQ X5, X4
935 PADDQ X7, X4
936 MOVOU 512(CX), X5
937 MOVOU 64(DX), X6
938 PXOR X5, X6
939 PSHUFD $0x31, X6, X7
940 PMULULQ X6, X7
941 PSHUFD $0x4e, X5, X5
942 PADDQ X5, X1
943 PADDQ X7, X1
944 MOVOU 528(CX), X5
945 MOVOU 80(DX), X6
946 PXOR X5, X6
947 PSHUFD $0x31, X6, X7
948 PMULULQ X6, X7
949 PSHUFD $0x4e, X5, X5
950 PADDQ X5, X2
951 PADDQ X7, X2
952 MOVOU 544(CX), X5
953 MOVOU 96(DX), X6
954 PXOR X5, X6
955 PSHUFD $0x31, X6, X7
956 PMULULQ X6, X7
957 PSHUFD $0x4e, X5, X5
958 PADDQ X5, X3
959 PADDQ X7, X3
960 MOVOU 560(CX), X5
961 MOVOU 112(DX), X6
962 PXOR X5, X6
963 PSHUFD $0x31, X6, X7
964 PMULULQ X6, X7
965 PSHUFD $0x4e, X5, X5
966 PADDQ X5, X4
967 PADDQ X7, X4
968 MOVOU 576(CX), X5
969 MOVOU 72(DX), X6
970 PXOR X5, X6
971 PSHUFD $0x31, X6, X7
972 PMULULQ X6, X7
973 PSHUFD $0x4e, X5, X5
974 PADDQ X5, X1
975 PADDQ X7, X1
976 MOVOU 592(CX), X5
977 MOVOU 88(DX), X6
978 PXOR X5, X6
979 PSHUFD $0x31, X6, X7
980 PMULULQ X6, X7
981 PSHUFD $0x4e, X5, X5
982 PADDQ X5, X2
983 PADDQ X7, X2
984 MOVOU 608(CX), X5
985 MOVOU 104(DX), X6
986 PXOR X5, X6
987 PSHUFD $0x31, X6, X7
988 PMULULQ X6, X7
989 PSHUFD $0x4e, X5, X5
990 PADDQ X5, X3
991 PADDQ X7, X3
992 MOVOU 624(CX), X5
993 MOVOU 120(DX), X6
994 PXOR X5, X6
995 PSHUFD $0x31, X6, X7
996 PMULULQ X6, X7
997 PSHUFD $0x4e, X5, X5
998 PADDQ X5, X4
999 PADDQ X7, X4
1000 MOVOU 640(CX), X5
1001 MOVOU 80(DX), X6
1002 PXOR X5, X6
1003 PSHUFD $0x31, X6, X7
1004 PMULULQ X6, X7
1005 PSHUFD $0x4e, X5, X5
1006 PADDQ X5, X1
1007 PADDQ X7, X1
1008 MOVOU 656(CX), X5
1009 MOVOU 96(DX), X6
1010 PXOR X5, X6
1011 PSHUFD $0x31, X6, X7
1012 PMULULQ X6, X7
1013 PSHUFD $0x4e, X5, X5
1014 PADDQ X5, X2
1015 PADDQ X7, X2
1016 MOVOU 672(CX), X5
1017 MOVOU 112(DX), X6
1018 PXOR X5, X6
1019 PSHUFD $0x31, X6, X7
1020 PMULULQ X6, X7
1021 PSHUFD $0x4e, X5, X5
1022 PADDQ X5, X3
1023 PADDQ X7, X3
1024 MOVOU 688(CX), X5
1025 MOVOU 128(DX), X6
1026 PXOR X5, X6
1027 PSHUFD $0x31, X6, X7
1028 PMULULQ X6, X7
1029 PSHUFD $0x4e, X5, X5
1030 PADDQ X5, X4
1031 PADDQ X7, X4
1032 MOVOU 704(CX), X5
1033 MOVOU 88(DX), X6
1034 PXOR X5, X6
1035 PSHUFD $0x31, X6, X7
1036 PMULULQ X6, X7
1037 PSHUFD $0x4e, X5, X5
1038 PADDQ X5, X1
1039 PADDQ X7, X1
1040 MOVOU 720(CX), X5
1041 MOVOU 104(DX), X6
1042 PXOR X5, X6
1043 PSHUFD $0x31, X6, X7
1044 PMULULQ X6, X7
1045 PSHUFD $0x4e, X5, X5
1046 PADDQ X5, X2
1047 PADDQ X7, X2
1048 MOVOU 736(CX), X5
1049 MOVOU 120(DX), X6
1050 PXOR X5, X6
1051 PSHUFD $0x31, X6, X7
1052 PMULULQ X6, X7
1053 PSHUFD $0x4e, X5, X5
1054 PADDQ X5, X3
1055 PADDQ X7, X3
1056 MOVOU 752(CX), X5
1057 MOVOU 136(DX), X6
1058 PXOR X5, X6
1059 PSHUFD $0x31, X6, X7
1060 PMULULQ X6, X7
1061 PSHUFD $0x4e, X5, X5
1062 PADDQ X5, X4
1063 PADDQ X7, X4
1064 MOVOU 768(CX), X5
1065 MOVOU 96(DX), X6
1066 PXOR X5, X6
1067 PSHUFD $0x31, X6, X7
1068 PMULULQ X6, X7
1069 PSHUFD $0x4e, X5, X5
1070 PADDQ X5, X1
1071 PADDQ X7, X1
1072 MOVOU 784(CX), X5
1073 MOVOU 112(DX), X6
1074 PXOR X5, X6
1075 PSHUFD $0x31, X6, X7
1076 PMULULQ X6, X7
1077 PSHUFD $0x4e, X5, X5
1078 PADDQ X5, X2
1079 PADDQ X7, X2
1080 MOVOU 800(CX), X5
1081 MOVOU 128(DX), X6
1082 PXOR X5, X6
1083 PSHUFD $0x31, X6, X7
1084 PMULULQ X6, X7
1085 PSHUFD $0x4e, X5, X5
1086 PADDQ X5, X3
1087 PADDQ X7, X3
1088 MOVOU 816(CX), X5
1089 MOVOU 144(DX), X6
1090 PXOR X5, X6
1091 PSHUFD $0x31, X6, X7
1092 PMULULQ X6, X7
1093 PSHUFD $0x4e, X5, X5
1094 PADDQ X5, X4
1095 PADDQ X7, X4
1096 MOVOU 832(CX), X5
1097 MOVOU 104(DX), X6
1098 PXOR X5, X6
1099 PSHUFD $0x31, X6, X7
1100 PMULULQ X6, X7
1101 PSHUFD $0x4e, X5, X5
1102 PADDQ X5, X1
1103 PADDQ X7, X1
1104 MOVOU 848(CX), X5
1105 MOVOU 120(DX), X6
1106 PXOR X5, X6
1107 PSHUFD $0x31, X6, X7
1108 PMULULQ X6, X7
1109 PSHUFD $0x4e, X5, X5
1110 PADDQ X5, X2
1111 PADDQ X7, X2
1112 MOVOU 864(CX), X5
1113 MOVOU 136(DX), X6
1114 PXOR X5, X6
1115 PSHUFD $0x31, X6, X7
1116 PMULULQ X6, X7
1117 PSHUFD $0x4e, X5, X5
1118 PADDQ X5, X3
1119 PADDQ X7, X3
1120 MOVOU 880(CX), X5
1121 MOVOU 152(DX), X6
1122 PXOR X5, X6
1123 PSHUFD $0x31, X6, X7
1124 PMULULQ X6, X7
1125 PSHUFD $0x4e, X5, X5
1126 PADDQ X5, X4
1127 PADDQ X7, X4
1128 MOVOU 896(CX), X5
1129 MOVOU 112(DX), X6
1130 PXOR X5, X6
1131 PSHUFD $0x31, X6, X7
1132 PMULULQ X6, X7
1133 PSHUFD $0x4e, X5, X5
1134 PADDQ X5, X1
1135 PADDQ X7, X1
1136 MOVOU 912(CX), X5
1137 MOVOU 128(DX), X6
1138 PXOR X5, X6
1139 PSHUFD $0x31, X6, X7
1140 PMULULQ X6, X7
1141 PSHUFD $0x4e, X5, X5
1142 PADDQ X5, X2
1143 PADDQ X7, X2
1144 MOVOU 928(CX), X5
1145 MOVOU 144(DX), X6
1146 PXOR X5, X6
1147 PSHUFD $0x31, X6, X7
1148 PMULULQ X6, X7
1149 PSHUFD $0x4e, X5, X5
1150 PADDQ X5, X3
1151 PADDQ X7, X3
1152 MOVOU 944(CX), X5
1153 MOVOU 160(DX), X6
1154 PXOR X5, X6
1155 PSHUFD $0x31, X6, X7
1156 PMULULQ X6, X7
1157 PSHUFD $0x4e, X5, X5
1158 PADDQ X5, X4
1159 PADDQ X7, X4
1160 MOVOU 960(CX), X5
1161 MOVOU 120(DX), X6
1162 PXOR X5, X6
1163 PSHUFD $0x31, X6, X7
1164 PMULULQ X6, X7
1165 PSHUFD $0x4e, X5, X5
1166 PADDQ X5, X1
1167 PADDQ X7, X1
1168 MOVOU 976(CX), X5
1169 MOVOU 136(DX), X6
1170 PXOR X5, X6
1171 PSHUFD $0x31, X6, X7
1172 PMULULQ X6, X7
1173 PSHUFD $0x4e, X5, X5
1174 PADDQ X5, X2
1175 PADDQ X7, X2
1176 MOVOU 992(CX), X5
1177 MOVOU 152(DX), X6
1178 PXOR X5, X6
1179 PSHUFD $0x31, X6, X7
1180 PMULULQ X6, X7
1181 PSHUFD $0x4e, X5, X5
1182 PADDQ X5, X3
1183 PADDQ X7, X3
1184 MOVOU 1008(CX), X5
1185 MOVOU 168(DX), X6
1186 PXOR X5, X6
1187 PSHUFD $0x31, X6, X7
1188 PMULULQ X6, X7
1189 PSHUFD $0x4e, X5, X5
1190 PADDQ X5, X4
1191 PADDQ X7, X4
1192 MOVOU X1, X5
1193 PSRLQ $0x2f, X5
1194 PXOR X5, X1
1195 MOVOU 128(DX), X5
1196 PXOR X5, X1
1197 PSHUFD $0xf5, X1, X5
1198 PMULULQ X0, X1
1199 PMULULQ X0, X5
1200 PSLLQ $0x20, X5
1201 PADDQ X5, X1
1202 MOVOU X2, X5
1203 PSRLQ $0x2f, X5
1204 PXOR X5, X2
1205 MOVOU 144(DX), X5
1206 PXOR X5, X2
1207 PSHUFD $0xf5, X2, X5
1208 PMULULQ X0, X2
1209 PMULULQ X0, X5
1210 PSLLQ $0x20, X5
1211 PADDQ X5, X2
1212 MOVOU X3, X5
1213 PSRLQ $0x2f, X5
1214 PXOR X5, X3
1215 MOVOU 160(DX), X5
1216 PXOR X5, X3
1217 PSHUFD $0xf5, X3, X5
1218 PMULULQ X0, X3
1219 PMULULQ X0, X5
1220 PSLLQ $0x20, X5
1221 PADDQ X5, X3
1222 MOVOU X4, X5
1223 PSRLQ $0x2f, X5
1224 PXOR X5, X4
1225 MOVOU 176(DX), X5
1226 PXOR X5, X4
1227 PSHUFD $0xf5, X4, X5
1228 PMULULQ X0, X4
1229 PMULULQ X0, X5
1230 PSLLQ $0x20, X5
1231 PADDQ X5, X4
1232 MOVOU X1, (AX)
1233 MOVOU X2, 16(AX)
1234 MOVOU X3, 32(AX)
1235 MOVOU X4, 48(AX)
1236 RET
View as plain text