1// +build amd64
2
3#include "textflag.h"
4#include "fp_amd64.h"
5
6// func cmovAmd64(x, y *Elt, n uint)
7TEXT ·cmovAmd64(SB),NOSPLIT,$0-24
8 MOVQ x+0(FP), DI
9 MOVQ y+8(FP), SI
10 MOVQ n+16(FP), BX
11 cselect(0(DI),0(SI),BX)
12 RET
13
14// func cswapAmd64(x, y *Elt, n uint)
15TEXT ·cswapAmd64(SB),NOSPLIT,$0-24
16 MOVQ x+0(FP), DI
17 MOVQ y+8(FP), SI
18 MOVQ n+16(FP), BX
19 cswap(0(DI),0(SI),BX)
20 RET
21
22// func subAmd64(z, x, y *Elt)
23TEXT ·subAmd64(SB),NOSPLIT,$0-24
24 MOVQ z+0(FP), DI
25 MOVQ x+8(FP), SI
26 MOVQ y+16(FP), BX
27 subtraction(0(DI),0(SI),0(BX))
28 RET
29
30// func addsubAmd64(x, y *Elt)
31TEXT ·addsubAmd64(SB),NOSPLIT,$0-16
32 MOVQ x+0(FP), DI
33 MOVQ y+8(FP), SI
34 addSub(0(DI),0(SI))
35 RET
36
37#define addLegacy \
38 additionLeg(0(DI),0(SI),0(BX))
39#define addBmi2Adx \
40 additionAdx(0(DI),0(SI),0(BX))
41
42#define mulLegacy \
43 integerMulLeg(0(SP),0(SI),0(BX)) \
44 reduceFromDoubleLeg(0(DI),0(SP))
45#define mulBmi2Adx \
46 integerMulAdx(0(SP),0(SI),0(BX)) \
47 reduceFromDoubleAdx(0(DI),0(SP))
48
49#define sqrLegacy \
50 integerSqrLeg(0(SP),0(SI)) \
51 reduceFromDoubleLeg(0(DI),0(SP))
52#define sqrBmi2Adx \
53 integerSqrAdx(0(SP),0(SI)) \
54 reduceFromDoubleAdx(0(DI),0(SP))
55
56// func addAmd64(z, x, y *Elt)
57TEXT ·addAmd64(SB),NOSPLIT,$0-24
58 MOVQ z+0(FP), DI
59 MOVQ x+8(FP), SI
60 MOVQ y+16(FP), BX
61 CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx)
62
63// func mulAmd64(z, x, y *Elt)
64TEXT ·mulAmd64(SB),NOSPLIT,$64-24
65 MOVQ z+0(FP), DI
66 MOVQ x+8(FP), SI
67 MOVQ y+16(FP), BX
68 CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx)
69
70// func sqrAmd64(z, x *Elt)
71TEXT ·sqrAmd64(SB),NOSPLIT,$64-16
72 MOVQ z+0(FP), DI
73 MOVQ x+8(FP), SI
74 CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx)
75
76// func modpAmd64(z *Elt)
77TEXT ·modpAmd64(SB),NOSPLIT,$0-8
78 MOVQ z+0(FP), DI
79
80 MOVQ (DI), R8
81 MOVQ 8(DI), R9
82 MOVQ 16(DI), R10
83 MOVQ 24(DI), R11
84
85 MOVL $19, AX
86 MOVL $38, CX
87
88 BTRQ $63, R11 // PUT BIT 255 IN CARRY FLAG AND CLEAR
89 CMOVLCC AX, CX // C[255] ? 38 : 19
90
91 // ADD EITHER 19 OR 38 TO C
92 ADDQ CX, R8
93 ADCQ $0, R9
94 ADCQ $0, R10
95 ADCQ $0, R11
96
97 // TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19
98 MOVL $0, CX
99 CMOVLPL AX, CX // C[255] ? 0 : 19
100 BTRQ $63, R11 // CLEAR BIT 255
101
102 // SUBTRACT 19 IF NECESSARY
103 SUBQ CX, R8
104 MOVQ R8, (DI)
105 SBBQ $0, R9
106 MOVQ R9, 8(DI)
107 SBBQ $0, R10
108 MOVQ R10, 16(DI)
109 SBBQ $0, R11
110 MOVQ R11, 24(DI)
111 RET
View as plain text