1 package compiler
2
3 import (
4 "errors"
5
6 "github.com/tetratelabs/wazero/internal/asm"
7 "github.com/tetratelabs/wazero/internal/asm/amd64"
8 "github.com/tetratelabs/wazero/internal/wazeroir"
9 )
10
11
12 func (c *amd64Compiler) compileV128Const(o *wazeroir.UnionOperation) error {
13 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
14 return err
15 }
16
17 lo, hi := o.U1, o.U2
18
19 result, err := c.allocateRegister(registerTypeVector)
20 if err != nil {
21 return err
22 }
23
24
25
26 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose)
27 if err != nil {
28 return err
29 }
30
31
32 if lo == 0 {
33 c.assembler.CompileRegisterToRegister(amd64.XORQ, tmpReg, tmpReg)
34 } else {
35 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(lo), tmpReg)
36 }
37 c.assembler.CompileRegisterToRegister(amd64.MOVQ, tmpReg, result)
38
39 if lo != 0 && hi == 0 {
40 c.assembler.CompileRegisterToRegister(amd64.XORQ, tmpReg, tmpReg)
41 } else if hi != 0 {
42 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(hi), tmpReg)
43 }
44
45 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, tmpReg, result, 1)
46
47 c.pushVectorRuntimeValueLocationOnRegister(result)
48 return nil
49 }
50
51
52 func (c *amd64Compiler) compileV128Add(o *wazeroir.UnionOperation) error {
53 x2 := c.locationStack.popV128()
54 if err := c.compileEnsureOnRegister(x2); err != nil {
55 return err
56 }
57
58 x1 := c.locationStack.popV128()
59 if err := c.compileEnsureOnRegister(x1); err != nil {
60 return err
61 }
62 var inst asm.Instruction
63 shape := o.B1
64 switch shape {
65 case wazeroir.ShapeI8x16:
66 inst = amd64.PADDB
67 case wazeroir.ShapeI16x8:
68 inst = amd64.PADDW
69 case wazeroir.ShapeI32x4:
70 inst = amd64.PADDD
71 case wazeroir.ShapeI64x2:
72 inst = amd64.PADDQ
73 case wazeroir.ShapeF32x4:
74 inst = amd64.ADDPS
75 case wazeroir.ShapeF64x2:
76 inst = amd64.ADDPD
77 }
78 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
79
80 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
81 c.locationStack.markRegisterUnused(x2.register)
82 return nil
83 }
84
85
86 func (c *amd64Compiler) compileV128Sub(o *wazeroir.UnionOperation) error {
87 x2 := c.locationStack.popV128()
88 if err := c.compileEnsureOnRegister(x2); err != nil {
89 return err
90 }
91
92 x1 := c.locationStack.popV128()
93 if err := c.compileEnsureOnRegister(x1); err != nil {
94 return err
95 }
96 var inst asm.Instruction
97 shape := o.B1
98 switch shape {
99 case wazeroir.ShapeI8x16:
100 inst = amd64.PSUBB
101 case wazeroir.ShapeI16x8:
102 inst = amd64.PSUBW
103 case wazeroir.ShapeI32x4:
104 inst = amd64.PSUBD
105 case wazeroir.ShapeI64x2:
106 inst = amd64.PSUBQ
107 case wazeroir.ShapeF32x4:
108 inst = amd64.SUBPS
109 case wazeroir.ShapeF64x2:
110 inst = amd64.SUBPD
111 }
112 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
113
114 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
115 c.locationStack.markRegisterUnused(x2.register)
116 return nil
117 }
118
119
120 func (c *amd64Compiler) compileV128Load(o *wazeroir.UnionOperation) error {
121 result, err := c.allocateRegister(registerTypeVector)
122 if err != nil {
123 return err
124 }
125
126 offset := uint32(o.U2)
127 loadType := wazeroir.V128LoadType(o.B1)
128
129 switch loadType {
130 case wazeroir.V128LoadType128:
131 err = c.compileV128LoadImpl(amd64.MOVDQU, offset, 16, result)
132 case wazeroir.V128LoadType8x8s:
133 err = c.compileV128LoadImpl(amd64.PMOVSXBW, offset, 8, result)
134 case wazeroir.V128LoadType8x8u:
135 err = c.compileV128LoadImpl(amd64.PMOVZXBW, offset, 8, result)
136 case wazeroir.V128LoadType16x4s:
137 err = c.compileV128LoadImpl(amd64.PMOVSXWD, offset, 8, result)
138 case wazeroir.V128LoadType16x4u:
139 err = c.compileV128LoadImpl(amd64.PMOVZXWD, offset, 8, result)
140 case wazeroir.V128LoadType32x2s:
141 err = c.compileV128LoadImpl(amd64.PMOVSXDQ, offset, 8, result)
142 case wazeroir.V128LoadType32x2u:
143 err = c.compileV128LoadImpl(amd64.PMOVZXDQ, offset, 8, result)
144 case wazeroir.V128LoadType8Splat:
145 reg, err := c.compileMemoryAccessCeilSetup(offset, 1)
146 if err != nil {
147 return err
148 }
149 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVBQZX, amd64ReservedRegisterForMemory, -1,
150 reg, 1, reg)
151
152
153
154 c.locationStack.markRegisterUsed(result)
155 tmpVReg, err := c.allocateRegister(registerTypeVector)
156 if err != nil {
157 return err
158 }
159 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRB, reg, result, 0)
160 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmpVReg, tmpVReg)
161 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmpVReg, result)
162 case wazeroir.V128LoadType16Splat:
163 reg, err := c.compileMemoryAccessCeilSetup(offset, 2)
164 if err != nil {
165 return err
166 }
167 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVWQZX, amd64ReservedRegisterForMemory, -2,
168 reg, 1, reg)
169
170
171
172 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, reg, result, 0)
173 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, reg, result, 1)
174 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0)
175 case wazeroir.V128LoadType32Splat:
176 reg, err := c.compileMemoryAccessCeilSetup(offset, 4)
177 if err != nil {
178 return err
179 }
180 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVLQZX, amd64ReservedRegisterForMemory, -4,
181 reg, 1, reg)
182
183
184 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRD, reg, result, 0)
185 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0)
186 case wazeroir.V128LoadType64Splat:
187 reg, err := c.compileMemoryAccessCeilSetup(offset, 8)
188 if err != nil {
189 return err
190 }
191 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVQ, amd64ReservedRegisterForMemory, -8,
192 reg, 1, reg)
193
194
195 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, reg, result, 0)
196 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, reg, result, 1)
197 case wazeroir.V128LoadType32zero:
198 err = c.compileV128LoadImpl(amd64.MOVL, offset, 4, result)
199 case wazeroir.V128LoadType64zero:
200 err = c.compileV128LoadImpl(amd64.MOVQ, offset, 8, result)
201 }
202
203 if err != nil {
204 return err
205 }
206
207 c.pushVectorRuntimeValueLocationOnRegister(result)
208 return nil
209 }
210
211 func (c *amd64Compiler) compileV128LoadImpl(inst asm.Instruction, offset uint32, targetSizeInBytes int64, dst asm.Register) error {
212 offsetReg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes)
213 if err != nil {
214 return err
215 }
216 c.assembler.CompileMemoryWithIndexToRegister(inst, amd64ReservedRegisterForMemory, -targetSizeInBytes,
217 offsetReg, 1, dst)
218 return nil
219 }
220
221
222 func (c *amd64Compiler) compileV128LoadLane(o *wazeroir.UnionOperation) error {
223 targetVector := c.locationStack.popV128()
224 if err := c.compileEnsureOnRegister(targetVector); err != nil {
225 return err
226 }
227
228 laneSize, laneIndex := o.B1, o.B2
229 offset := uint32(o.U2)
230
231 var insertInst asm.Instruction
232 switch laneSize {
233 case 8:
234 insertInst = amd64.PINSRB
235 case 16:
236 insertInst = amd64.PINSRW
237 case 32:
238 insertInst = amd64.PINSRD
239 case 64:
240 insertInst = amd64.PINSRQ
241 }
242
243 targetSizeInBytes := int64(laneSize / 8)
244 offsetReg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes)
245 if err != nil {
246 return err
247 }
248 c.assembler.CompileMemoryWithIndexAndArgToRegister(insertInst, amd64ReservedRegisterForMemory, -targetSizeInBytes,
249 offsetReg, 1, targetVector.register, laneIndex)
250
251 c.pushVectorRuntimeValueLocationOnRegister(targetVector.register)
252 return nil
253 }
254
255
256 func (c *amd64Compiler) compileV128Store(o *wazeroir.UnionOperation) error {
257 val := c.locationStack.popV128()
258 if err := c.compileEnsureOnRegister(val); err != nil {
259 return err
260 }
261
262 const targetSizeInBytes = 16
263 offset := uint32(o.U2)
264 offsetReg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes)
265 if err != nil {
266 return err
267 }
268
269 c.assembler.CompileRegisterToMemoryWithIndex(amd64.MOVDQU, val.register,
270 amd64ReservedRegisterForMemory, -targetSizeInBytes, offsetReg, 1)
271
272 c.locationStack.markRegisterUnused(val.register, offsetReg)
273 return nil
274 }
275
276
277 func (c *amd64Compiler) compileV128StoreLane(o *wazeroir.UnionOperation) error {
278 var storeInst asm.Instruction
279 laneSize := o.B1
280 laneIndex := o.B2
281 offset := uint32(o.U2)
282 switch laneSize {
283 case 8:
284 storeInst = amd64.PEXTRB
285 case 16:
286 storeInst = amd64.PEXTRW
287 case 32:
288 storeInst = amd64.PEXTRD
289 case 64:
290 storeInst = amd64.PEXTRQ
291 }
292
293 val := c.locationStack.popV128()
294 if err := c.compileEnsureOnRegister(val); err != nil {
295 return err
296 }
297
298 targetSizeInBytes := int64(laneSize / 8)
299 offsetReg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes)
300 if err != nil {
301 return err
302 }
303
304 c.assembler.CompileRegisterToMemoryWithIndexAndArg(storeInst, val.register,
305 amd64ReservedRegisterForMemory, -targetSizeInBytes, offsetReg, 1, laneIndex)
306
307 c.locationStack.markRegisterUnused(val.register, offsetReg)
308 return nil
309 }
310
311
312 func (c *amd64Compiler) compileV128ExtractLane(o *wazeroir.UnionOperation) error {
313 v := c.locationStack.popV128()
314 if err := c.compileEnsureOnRegister(v); err != nil {
315 return err
316 }
317 vreg := v.register
318 shape := o.B1
319 laneIndex := o.B2
320 signed := o.B3
321 switch shape {
322 case wazeroir.ShapeI8x16:
323 result, err := c.allocateRegister(registerTypeGeneralPurpose)
324 if err != nil {
325 return err
326 }
327 c.assembler.CompileRegisterToRegisterWithArg(amd64.PEXTRB, vreg, result, laneIndex)
328 if signed {
329 c.assembler.CompileRegisterToRegister(amd64.MOVBLSX, result, result)
330 } else {
331 c.assembler.CompileRegisterToRegister(amd64.MOVBLZX, result, result)
332 }
333 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
334 c.locationStack.markRegisterUnused(vreg)
335 case wazeroir.ShapeI16x8:
336 result, err := c.allocateRegister(registerTypeGeneralPurpose)
337 if err != nil {
338 return err
339 }
340 c.assembler.CompileRegisterToRegisterWithArg(amd64.PEXTRW, vreg, result, laneIndex)
341 if signed {
342 c.assembler.CompileRegisterToRegister(amd64.MOVWLSX, result, result)
343 } else {
344 c.assembler.CompileRegisterToRegister(amd64.MOVWLZX, result, result)
345 }
346 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
347 c.locationStack.markRegisterUnused(vreg)
348 case wazeroir.ShapeI32x4:
349 result, err := c.allocateRegister(registerTypeGeneralPurpose)
350 if err != nil {
351 return err
352 }
353 c.assembler.CompileRegisterToRegisterWithArg(amd64.PEXTRD, vreg, result, laneIndex)
354 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
355 c.locationStack.markRegisterUnused(vreg)
356 case wazeroir.ShapeI64x2:
357 result, err := c.allocateRegister(registerTypeGeneralPurpose)
358 if err != nil {
359 return err
360 }
361 c.assembler.CompileRegisterToRegisterWithArg(amd64.PEXTRQ, vreg, result, laneIndex)
362 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64)
363 c.locationStack.markRegisterUnused(vreg)
364 case wazeroir.ShapeF32x4:
365 if laneIndex != 0 {
366 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, vreg, vreg, laneIndex)
367 }
368 c.pushRuntimeValueLocationOnRegister(vreg, runtimeValueTypeF32)
369 case wazeroir.ShapeF64x2:
370 if laneIndex != 0 {
371
372
373
374
375
376
377
378 arg := byte(0b00_00_11_10)
379 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, vreg, vreg, arg)
380 }
381 c.pushRuntimeValueLocationOnRegister(vreg, runtimeValueTypeF64)
382 }
383
384 return nil
385 }
386
387
388 func (c *amd64Compiler) compileV128ReplaceLane(o *wazeroir.UnionOperation) error {
389 origin := c.locationStack.pop()
390 if err := c.compileEnsureOnRegister(origin); err != nil {
391 return err
392 }
393
394 vector := c.locationStack.popV128()
395 if err := c.compileEnsureOnRegister(vector); err != nil {
396 return err
397 }
398
399 shape := o.B1
400 laneIndex := o.B2
401 switch shape {
402 case wazeroir.ShapeI8x16:
403 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRB, origin.register, vector.register, laneIndex)
404 case wazeroir.ShapeI16x8:
405 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, origin.register, vector.register, laneIndex)
406 case wazeroir.ShapeI32x4:
407 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRD, origin.register, vector.register, laneIndex)
408 case wazeroir.ShapeI64x2:
409 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, origin.register, vector.register, laneIndex)
410 case wazeroir.ShapeF32x4:
411 c.assembler.CompileRegisterToRegisterWithArg(amd64.INSERTPS, origin.register, vector.register,
412
413
414 laneIndex<<4,
415 )
416 case wazeroir.ShapeF64x2:
417 if laneIndex == 0 {
418 c.assembler.CompileRegisterToRegister(amd64.MOVSD, origin.register, vector.register)
419 } else {
420 c.assembler.CompileRegisterToRegister(amd64.MOVLHPS, origin.register, vector.register)
421 }
422 }
423
424 c.pushVectorRuntimeValueLocationOnRegister(vector.register)
425 c.locationStack.markRegisterUnused(origin.register)
426 return nil
427 }
428
429
430 func (c *amd64Compiler) compileV128Splat(o *wazeroir.UnionOperation) (err error) {
431 origin := c.locationStack.pop()
432 if err = c.compileEnsureOnRegister(origin); err != nil {
433 return
434 }
435
436 var result asm.Register
437 shape := o.B1
438 switch shape {
439 case wazeroir.ShapeI8x16:
440 result, err = c.allocateRegister(registerTypeVector)
441 if err != nil {
442 return err
443 }
444 c.locationStack.markRegisterUsed(result)
445
446 tmp, err := c.allocateRegister(registerTypeVector)
447 if err != nil {
448 return err
449 }
450 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRB, origin.register, result, 0)
451 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp)
452 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmp, result)
453 case wazeroir.ShapeI16x8:
454 result, err = c.allocateRegister(registerTypeVector)
455 if err != nil {
456 return err
457 }
458 c.locationStack.markRegisterUsed(result)
459 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, origin.register, result, 0)
460 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, origin.register, result, 1)
461 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0)
462 case wazeroir.ShapeI32x4:
463 result, err = c.allocateRegister(registerTypeVector)
464 if err != nil {
465 return err
466 }
467 c.locationStack.markRegisterUsed(result)
468 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRD, origin.register, result, 0)
469 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0)
470 case wazeroir.ShapeI64x2:
471 result, err = c.allocateRegister(registerTypeVector)
472 if err != nil {
473 return err
474 }
475 c.locationStack.markRegisterUsed(result)
476 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, origin.register, result, 0)
477 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, origin.register, result, 1)
478 case wazeroir.ShapeF32x4:
479 result = origin.register
480 c.assembler.CompileRegisterToRegisterWithArg(amd64.INSERTPS, origin.register, result, 0)
481 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0)
482 case wazeroir.ShapeF64x2:
483 result = origin.register
484 c.assembler.CompileRegisterToRegister(amd64.MOVQ, origin.register, result)
485 c.assembler.CompileRegisterToRegister(amd64.MOVLHPS, origin.register, result)
486 }
487
488 c.locationStack.markRegisterUnused(origin.register)
489 c.pushVectorRuntimeValueLocationOnRegister(result)
490 return nil
491 }
492
493
494 func (c *amd64Compiler) compileV128Shuffle(o *wazeroir.UnionOperation) error {
495 w := c.locationStack.popV128()
496 if err := c.compileEnsureOnRegister(w); err != nil {
497 return err
498 }
499
500 v := c.locationStack.popV128()
501 if err := c.compileEnsureOnRegister(v); err != nil {
502 return err
503 }
504
505 wr, vr := w.register, v.register
506
507 tmp, err := c.allocateRegister(registerTypeVector)
508 if err != nil {
509 return err
510 }
511
512 consts := [32]byte{}
513 lanes := o.Us
514 for i, unsignedLane := range lanes {
515 lane := byte(unsignedLane)
516 if lane < 16 {
517 consts[i+16] = 0x80
518 consts[i] = lane
519 } else {
520 consts[i+16] = lane - 16
521 consts[i] = 0x80
522 }
523 }
524
525 err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(consts[:16]), tmp)
526 if err != nil {
527 return err
528 }
529 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmp, vr)
530 err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(consts[16:]), tmp)
531 if err != nil {
532 return err
533 }
534 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmp, wr)
535 c.assembler.CompileRegisterToRegister(amd64.ORPS, vr, wr)
536
537 c.pushVectorRuntimeValueLocationOnRegister(wr)
538 c.locationStack.markRegisterUnused(vr)
539 return nil
540 }
541
542 var swizzleConst = [16]byte{
543 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
544 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
545 }
546
547
548 func (c *amd64Compiler) compileV128Swizzle(*wazeroir.UnionOperation) error {
549 index := c.locationStack.popV128()
550 if err := c.compileEnsureOnRegister(index); err != nil {
551 return err
552 }
553
554 base := c.locationStack.popV128()
555 if err := c.compileEnsureOnRegister(base); err != nil {
556 return err
557 }
558
559 idxReg, baseReg := index.register, base.register
560
561 tmp, err := c.allocateRegister(registerTypeVector)
562 if err != nil {
563 return err
564 }
565
566 err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(swizzleConst[:]), tmp)
567 if err != nil {
568 return err
569 }
570
571 c.assembler.CompileRegisterToRegister(amd64.PADDUSB, tmp, idxReg)
572 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, idxReg, baseReg)
573
574 c.pushVectorRuntimeValueLocationOnRegister(baseReg)
575 c.locationStack.markRegisterUnused(idxReg)
576 return nil
577 }
578
579
580 func (c *amd64Compiler) compileV128AnyTrue(*wazeroir.UnionOperation) error {
581 v := c.locationStack.popV128()
582 if err := c.compileEnsureOnRegister(v); err != nil {
583 return err
584 }
585 vreg := v.register
586
587 c.assembler.CompileRegisterToRegister(amd64.PTEST, vreg, vreg)
588
589 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(amd64.ConditionalRegisterStateNE)
590 c.locationStack.markRegisterUnused(vreg)
591 return nil
592 }
593
594
595 func (c *amd64Compiler) compileV128AllTrue(o *wazeroir.UnionOperation) error {
596 v := c.locationStack.popV128()
597 if err := c.compileEnsureOnRegister(v); err != nil {
598 return err
599 }
600
601 tmp, err := c.allocateRegister(registerTypeVector)
602 if err != nil {
603 return err
604 }
605
606 var cmpInst asm.Instruction
607 shape := o.B1
608 switch shape {
609 case wazeroir.ShapeI8x16:
610 cmpInst = amd64.PCMPEQB
611 case wazeroir.ShapeI16x8:
612 cmpInst = amd64.PCMPEQW
613 case wazeroir.ShapeI32x4:
614 cmpInst = amd64.PCMPEQD
615 case wazeroir.ShapeI64x2:
616 cmpInst = amd64.PCMPEQQ
617 }
618
619 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp)
620 c.assembler.CompileRegisterToRegister(cmpInst, v.register, tmp)
621 c.assembler.CompileRegisterToRegister(amd64.PTEST, tmp, tmp)
622 c.locationStack.markRegisterUnused(v.register, tmp)
623 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(amd64.ConditionalRegisterStateE)
624 return nil
625 }
626
627
628 func (c *amd64Compiler) compileV128BitMask(o *wazeroir.UnionOperation) error {
629 v := c.locationStack.popV128()
630 if err := c.compileEnsureOnRegister(v); err != nil {
631 return err
632 }
633
634 result, err := c.allocateRegister(registerTypeGeneralPurpose)
635 if err != nil {
636 return err
637 }
638
639 shape := o.B1
640 switch shape {
641 case wazeroir.ShapeI8x16:
642 c.assembler.CompileRegisterToRegister(amd64.PMOVMSKB, v.register, result)
643 case wazeroir.ShapeI16x8:
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664 c.assembler.CompileRegisterToRegister(amd64.PACKSSWB, v.register, v.register)
665 c.assembler.CompileRegisterToRegister(amd64.PMOVMSKB, v.register, result)
666
667 c.assembler.CompileConstToRegister(amd64.SHRQ, 8, result)
668 case wazeroir.ShapeI32x4:
669 c.assembler.CompileRegisterToRegister(amd64.MOVMSKPS, v.register, result)
670 case wazeroir.ShapeI64x2:
671 c.assembler.CompileRegisterToRegister(amd64.MOVMSKPD, v.register, result)
672 }
673
674 c.locationStack.markRegisterUnused(v.register)
675 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
676 return nil
677 }
678
679
680 func (c *amd64Compiler) compileV128And(*wazeroir.UnionOperation) error {
681 x2 := c.locationStack.popV128()
682 if err := c.compileEnsureOnRegister(x2); err != nil {
683 return err
684 }
685
686 x1 := c.locationStack.popV128()
687 if err := c.compileEnsureOnRegister(x1); err != nil {
688 return err
689 }
690
691 c.assembler.CompileRegisterToRegister(amd64.PAND, x2.register, x1.register)
692
693 c.locationStack.markRegisterUnused(x2.register)
694 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
695 return nil
696 }
697
698
699 func (c *amd64Compiler) compileV128Not(*wazeroir.UnionOperation) error {
700 v := c.locationStack.popV128()
701 if err := c.compileEnsureOnRegister(v); err != nil {
702 return err
703 }
704
705 tmp, err := c.allocateRegister(registerTypeVector)
706 if err != nil {
707 return err
708 }
709
710
711 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp)
712
713 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, v.register)
714 c.pushVectorRuntimeValueLocationOnRegister(v.register)
715 return nil
716 }
717
718
719 func (c *amd64Compiler) compileV128Or(*wazeroir.UnionOperation) error {
720 x2 := c.locationStack.popV128()
721 if err := c.compileEnsureOnRegister(x2); err != nil {
722 return err
723 }
724
725 x1 := c.locationStack.popV128()
726 if err := c.compileEnsureOnRegister(x1); err != nil {
727 return err
728 }
729
730 c.assembler.CompileRegisterToRegister(amd64.POR, x2.register, x1.register)
731
732 c.locationStack.markRegisterUnused(x2.register)
733 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
734 return nil
735 }
736
737
738 func (c *amd64Compiler) compileV128Xor(*wazeroir.UnionOperation) error {
739 x2 := c.locationStack.popV128()
740 if err := c.compileEnsureOnRegister(x2); err != nil {
741 return err
742 }
743
744 x1 := c.locationStack.popV128()
745 if err := c.compileEnsureOnRegister(x1); err != nil {
746 return err
747 }
748
749 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2.register, x1.register)
750
751 c.locationStack.markRegisterUnused(x2.register)
752 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
753 return nil
754 }
755
756
757 func (c *amd64Compiler) compileV128Bitselect(*wazeroir.UnionOperation) error {
758 selector := c.locationStack.popV128()
759 if err := c.compileEnsureOnRegister(selector); err != nil {
760 return err
761 }
762
763 x2 := c.locationStack.popV128()
764 if err := c.compileEnsureOnRegister(x2); err != nil {
765 return err
766 }
767
768 x1 := c.locationStack.popV128()
769 if err := c.compileEnsureOnRegister(x1); err != nil {
770 return err
771 }
772
773
774
775 c.assembler.CompileRegisterToRegister(amd64.PAND, selector.register, x1.register)
776 c.assembler.CompileRegisterToRegister(amd64.PANDN, x2.register, selector.register)
777 c.assembler.CompileRegisterToRegister(amd64.POR, selector.register, x1.register)
778
779 c.locationStack.markRegisterUnused(x2.register, selector.register)
780 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
781 return nil
782 }
783
784
785 func (c *amd64Compiler) compileV128AndNot(*wazeroir.UnionOperation) error {
786 x2 := c.locationStack.popV128()
787 if err := c.compileEnsureOnRegister(x2); err != nil {
788 return err
789 }
790
791 x1 := c.locationStack.popV128()
792 if err := c.compileEnsureOnRegister(x1); err != nil {
793 return err
794 }
795
796 c.assembler.CompileRegisterToRegister(amd64.PANDN, x1.register, x2.register)
797
798 c.locationStack.markRegisterUnused(x1.register)
799 c.pushVectorRuntimeValueLocationOnRegister(x2.register)
800 return nil
801 }
802
803
804 func (c *amd64Compiler) compileV128Shr(o *wazeroir.UnionOperation) error {
805
806 shape := o.B1
807 signed := o.B3
808 if shape == wazeroir.ShapeI8x16 {
809 return c.compileV128ShrI8x16Impl(signed)
810 } else if shape == wazeroir.ShapeI64x2 && signed {
811 return c.compileV128ShrI64x2SignedImpl()
812 } else {
813 return c.compileV128ShrImpl(o)
814 }
815 }
816
817
818 func (c *amd64Compiler) compileV128ShrImpl(o *wazeroir.UnionOperation) error {
819 s := c.locationStack.pop()
820 if err := c.compileEnsureOnRegister(s); err != nil {
821 return err
822 }
823
824 x1 := c.locationStack.popV128()
825 if err := c.compileEnsureOnRegister(x1); err != nil {
826 return err
827 }
828
829 vecTmp, err := c.allocateRegister(registerTypeVector)
830 if err != nil {
831 return err
832 }
833
834 var moduleConst int64
835 var shift asm.Instruction
836 shape := o.B1
837 signed := o.B3
838 switch shape {
839 case wazeroir.ShapeI16x8:
840 moduleConst = 0xf
841 if signed {
842 shift = amd64.PSRAW
843 } else {
844 shift = amd64.PSRLW
845 }
846 case wazeroir.ShapeI32x4:
847 moduleConst = 0x1f
848 if signed {
849 shift = amd64.PSRAD
850 } else {
851 shift = amd64.PSRLD
852 }
853 case wazeroir.ShapeI64x2:
854 moduleConst = 0x3f
855 shift = amd64.PSRLQ
856 }
857
858 gpShiftAmount := s.register
859 c.assembler.CompileConstToRegister(amd64.ANDQ, moduleConst, gpShiftAmount)
860 c.assembler.CompileRegisterToRegister(amd64.MOVL, gpShiftAmount, vecTmp)
861 c.assembler.CompileRegisterToRegister(shift, vecTmp, x1.register)
862
863 c.locationStack.markRegisterUnused(gpShiftAmount)
864 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
865 return nil
866 }
867
868
869
870 func (c *amd64Compiler) compileV128ShrI64x2SignedImpl() error {
871 const shiftCountRegister = amd64.RegCX
872
873 s := c.locationStack.pop()
874 if s.register != shiftCountRegister {
875
876 c.onValueReleaseRegisterToStack(shiftCountRegister)
877 if s.onStack() {
878 s.setRegister(shiftCountRegister)
879 c.compileLoadValueOnStackToRegister(s)
880 } else if s.onConditionalRegister() {
881 c.compileMoveConditionalToGeneralPurposeRegister(s, shiftCountRegister)
882 } else {
883 old := s.register
884 c.assembler.CompileRegisterToRegister(amd64.MOVL, old, shiftCountRegister)
885 s.setRegister(shiftCountRegister)
886 c.locationStack.markRegisterUnused(old)
887 }
888 }
889
890 c.locationStack.markRegisterUsed(shiftCountRegister)
891 tmp, err := c.allocateRegister(registerTypeGeneralPurpose)
892 if err != nil {
893 return err
894 }
895
896 x1 := c.locationStack.popV128()
897 if err := c.compileEnsureOnRegister(x1); err != nil {
898 return err
899 }
900
901
902 c.assembler.CompileRegisterToRegisterWithArg(amd64.PEXTRQ, x1.register, tmp, 0)
903 c.assembler.CompileRegisterToRegister(amd64.SARQ, shiftCountRegister, tmp)
904 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, tmp, x1.register, 0)
905 c.assembler.CompileRegisterToRegisterWithArg(amd64.PEXTRQ, x1.register, tmp, 1)
906 c.assembler.CompileRegisterToRegister(amd64.SARQ, shiftCountRegister, tmp)
907 c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, tmp, x1.register, 1)
908
909 c.locationStack.markRegisterUnused(shiftCountRegister)
910 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
911 return nil
912 }
913
914
915
916 var i8x16LogicalSHRMaskTable = [8 * 16]byte{
917 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
918 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
919 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
920 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
921 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
922 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
923 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
924 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
925 }
926
927
928
929
930 func (c *amd64Compiler) compileV128ShrI8x16Impl(signed bool) error {
931 s := c.locationStack.pop()
932 if err := c.compileEnsureOnRegister(s); err != nil {
933 return err
934 }
935
936 v := c.locationStack.popV128()
937 if err := c.compileEnsureOnRegister(v); err != nil {
938 return err
939 }
940
941 vecTmp, err := c.allocateRegister(registerTypeVector)
942 if err != nil {
943 return err
944 }
945
946 gpShiftAmount := s.register
947 c.assembler.CompileConstToRegister(amd64.ANDQ, 0x7, gpShiftAmount)
948
949 if signed {
950 c.locationStack.markRegisterUsed(vecTmp)
951 vecTmp2, err := c.allocateRegister(registerTypeVector)
952 if err != nil {
953 return err
954 }
955
956 vreg := v.register
957
958
959 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vreg, vecTmp)
960
961
962
963
964
965
966
967 c.assembler.CompileRegisterToRegister(amd64.PUNPCKLBW, vreg, vreg)
968 c.assembler.CompileRegisterToRegister(amd64.PUNPCKHBW, vecTmp, vecTmp)
969
970
971 c.assembler.CompileConstToRegister(amd64.ADDQ, 0x8, gpShiftAmount)
972 c.assembler.CompileRegisterToRegister(amd64.MOVL, gpShiftAmount, vecTmp2)
973
974
975
976
977
978
979 c.assembler.CompileRegisterToRegister(amd64.PSRAW, vecTmp2, vreg)
980 c.assembler.CompileRegisterToRegister(amd64.PSRAW, vecTmp2, vecTmp)
981
982
983 c.assembler.CompileRegisterToRegister(amd64.PACKSSWB, vecTmp, vreg)
984
985 c.locationStack.markRegisterUnused(gpShiftAmount, vecTmp)
986 c.pushVectorRuntimeValueLocationOnRegister(vreg)
987 } else {
988 c.assembler.CompileRegisterToRegister(amd64.MOVL, gpShiftAmount, vecTmp)
989
990
991 c.assembler.CompileRegisterToRegister(amd64.PSRLW, vecTmp, v.register)
992
993 gpTmp, err := c.allocateRegister(registerTypeGeneralPurpose)
994 if err != nil {
995 return err
996 }
997
998
999 err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, asm.NewStaticConst(i8x16LogicalSHRMaskTable[:]), gpTmp)
1000 if err != nil {
1001 return err
1002 }
1003
1004
1005
1006 c.assembler.CompileConstToRegister(amd64.SHLQ, 4, gpShiftAmount)
1007
1008
1009 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVDQU,
1010 gpTmp, 0, gpShiftAmount, 1,
1011 vecTmp,
1012 )
1013
1014
1015 c.assembler.CompileRegisterToRegister(amd64.PAND, vecTmp, v.register)
1016
1017 c.locationStack.markRegisterUnused(gpShiftAmount)
1018 c.pushVectorRuntimeValueLocationOnRegister(v.register)
1019 }
1020 return nil
1021 }
1022
1023
1024
1025 var i8x16SHLMaskTable = [8 * 16]byte{
1026 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1027 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1028 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1029 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1030 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1031 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1032 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1033 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1034 }
1035
1036
1037 func (c *amd64Compiler) compileV128Shl(o *wazeroir.UnionOperation) error {
1038 s := c.locationStack.pop()
1039 if err := c.compileEnsureOnRegister(s); err != nil {
1040 return err
1041 }
1042
1043 x1 := c.locationStack.popV128()
1044 if err := c.compileEnsureOnRegister(x1); err != nil {
1045 return err
1046 }
1047
1048 vecTmp, err := c.allocateRegister(registerTypeVector)
1049 if err != nil {
1050 return err
1051 }
1052
1053 var modulo int64
1054 var shift asm.Instruction
1055 shape := o.B1
1056 switch shape {
1057 case wazeroir.ShapeI8x16:
1058 modulo = 0x7
1059
1060
1061 shift = amd64.PSLLW
1062 case wazeroir.ShapeI16x8:
1063 modulo = 0xf
1064 shift = amd64.PSLLW
1065 case wazeroir.ShapeI32x4:
1066 modulo = 0x1f
1067 shift = amd64.PSLLD
1068 case wazeroir.ShapeI64x2:
1069 modulo = 0x3f
1070 shift = amd64.PSLLQ
1071 }
1072
1073 gpShiftAmount := s.register
1074 c.assembler.CompileConstToRegister(amd64.ANDQ, modulo, gpShiftAmount)
1075 c.assembler.CompileRegisterToRegister(amd64.MOVL, gpShiftAmount, vecTmp)
1076 c.assembler.CompileRegisterToRegister(shift, vecTmp, x1.register)
1077
1078 if shape == wazeroir.ShapeI8x16 {
1079 gpTmp, err := c.allocateRegister(registerTypeGeneralPurpose)
1080 if err != nil {
1081 return err
1082 }
1083
1084
1085 err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, asm.NewStaticConst(i8x16SHLMaskTable[:]), gpTmp)
1086 if err != nil {
1087 return err
1088 }
1089
1090
1091
1092 c.assembler.CompileConstToRegister(amd64.SHLQ, 4, gpShiftAmount)
1093
1094
1095 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVDQU,
1096 gpTmp, 0, gpShiftAmount, 1,
1097 vecTmp,
1098 )
1099
1100
1101 c.assembler.CompileRegisterToRegister(amd64.PAND, vecTmp, x1.register)
1102 }
1103
1104 c.locationStack.markRegisterUnused(gpShiftAmount)
1105 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1106 return nil
1107 }
1108
1109
1110 func (c *amd64Compiler) compileV128Cmp(o *wazeroir.UnionOperation) error {
1111 x2 := c.locationStack.popV128()
1112 if err := c.compileEnsureOnRegister(x2); err != nil {
1113 return err
1114 }
1115
1116 x1 := c.locationStack.popV128()
1117 if err := c.compileEnsureOnRegister(x1); err != nil {
1118 return err
1119 }
1120
1121 const (
1122
1123 floatEqualArg = 0
1124 floatLessThanArg = 1
1125 floatLessThanOrEqualArg = 2
1126 floatNotEqualARg = 4
1127 )
1128
1129 x1Reg, x2Reg, result := x1.register, x2.register, asm.NilRegister
1130 v128CmpType := o.B1
1131 switch v128CmpType {
1132 case wazeroir.V128CmpTypeF32x4Eq:
1133 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, x2Reg, x1Reg, floatEqualArg)
1134 result = x1Reg
1135 case wazeroir.V128CmpTypeF32x4Ne:
1136 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, x2Reg, x1Reg, floatNotEqualARg)
1137 result = x1Reg
1138 case wazeroir.V128CmpTypeF32x4Lt:
1139 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, x2Reg, x1Reg, floatLessThanArg)
1140 result = x1Reg
1141 case wazeroir.V128CmpTypeF32x4Gt:
1142
1143 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, x1Reg, x2Reg, floatLessThanArg)
1144 result = x2Reg
1145 case wazeroir.V128CmpTypeF32x4Le:
1146 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, x2Reg, x1Reg, floatLessThanOrEqualArg)
1147 result = x1Reg
1148 case wazeroir.V128CmpTypeF32x4Ge:
1149
1150 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, x1Reg, x2Reg, floatLessThanOrEqualArg)
1151 result = x2Reg
1152 case wazeroir.V128CmpTypeF64x2Eq:
1153 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, x2Reg, x1Reg, floatEqualArg)
1154 result = x1Reg
1155 case wazeroir.V128CmpTypeF64x2Ne:
1156 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, x2Reg, x1Reg, floatNotEqualARg)
1157 result = x1Reg
1158 case wazeroir.V128CmpTypeF64x2Lt:
1159 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, x2Reg, x1Reg, floatLessThanArg)
1160 result = x1Reg
1161 case wazeroir.V128CmpTypeF64x2Gt:
1162
1163 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, x1Reg, x2Reg, floatLessThanArg)
1164 result = x2Reg
1165 case wazeroir.V128CmpTypeF64x2Le:
1166 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, x2Reg, x1Reg, floatLessThanOrEqualArg)
1167 result = x1Reg
1168 case wazeroir.V128CmpTypeF64x2Ge:
1169
1170 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, x1Reg, x2Reg, floatLessThanOrEqualArg)
1171 result = x2Reg
1172 case wazeroir.V128CmpTypeI8x16Eq:
1173 c.assembler.CompileRegisterToRegister(amd64.PCMPEQB, x2Reg, x1Reg)
1174 result = x1Reg
1175 case wazeroir.V128CmpTypeI8x16Ne:
1176 c.assembler.CompileRegisterToRegister(amd64.PCMPEQB, x2Reg, x1Reg)
1177
1178 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1179
1180 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1181 result = x1Reg
1182 case wazeroir.V128CmpTypeI8x16LtS:
1183 c.assembler.CompileRegisterToRegister(amd64.PCMPGTB, x1Reg, x2Reg)
1184 result = x2Reg
1185 case wazeroir.V128CmpTypeI8x16LtU, wazeroir.V128CmpTypeI8x16GtU:
1186
1187 if v128CmpType == wazeroir.V128CmpTypeI8x16LtU {
1188 c.assembler.CompileRegisterToRegister(amd64.PMINUB, x2Reg, x1Reg)
1189 } else {
1190 c.assembler.CompileRegisterToRegister(amd64.PMAXUB, x2Reg, x1Reg)
1191 }
1192 c.assembler.CompileRegisterToRegister(amd64.PCMPEQB, x2Reg, x1Reg)
1193
1194 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1195
1196 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1197 result = x1Reg
1198 case wazeroir.V128CmpTypeI8x16GtS:
1199 c.assembler.CompileRegisterToRegister(amd64.PCMPGTB, x2Reg, x1Reg)
1200 result = x1Reg
1201 case wazeroir.V128CmpTypeI8x16LeS, wazeroir.V128CmpTypeI8x16LeU:
1202 tmp, err := c.allocateRegister(registerTypeVector)
1203 if err != nil {
1204 return err
1205 }
1206
1207 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1Reg, tmp)
1208 if v128CmpType == wazeroir.V128CmpTypeI8x16LeS {
1209 c.assembler.CompileRegisterToRegister(amd64.PMINSB, x2Reg, tmp)
1210 } else {
1211 c.assembler.CompileRegisterToRegister(amd64.PMINUB, x2Reg, tmp)
1212 }
1213 c.assembler.CompileRegisterToRegister(amd64.PCMPEQB, tmp, x1Reg)
1214 result = x1Reg
1215 case wazeroir.V128CmpTypeI8x16GeS, wazeroir.V128CmpTypeI8x16GeU:
1216 tmp, err := c.allocateRegister(registerTypeVector)
1217 if err != nil {
1218 return err
1219 }
1220 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1Reg, tmp)
1221 if v128CmpType == wazeroir.V128CmpTypeI8x16GeS {
1222 c.assembler.CompileRegisterToRegister(amd64.PMAXSB, x2Reg, tmp)
1223 } else {
1224 c.assembler.CompileRegisterToRegister(amd64.PMAXUB, x2Reg, tmp)
1225 }
1226 c.assembler.CompileRegisterToRegister(amd64.PCMPEQB, tmp, x1Reg)
1227 result = x1Reg
1228 case wazeroir.V128CmpTypeI16x8Eq:
1229 c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, x2Reg, x1Reg)
1230 result = x1Reg
1231 case wazeroir.V128CmpTypeI16x8Ne:
1232 c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, x2Reg, x1Reg)
1233
1234 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1235
1236 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1237 result = x1Reg
1238 case wazeroir.V128CmpTypeI16x8LtS:
1239 c.assembler.CompileRegisterToRegister(amd64.PCMPGTW, x1Reg, x2Reg)
1240 result = x2Reg
1241 case wazeroir.V128CmpTypeI16x8LtU, wazeroir.V128CmpTypeI16x8GtU:
1242
1243 if v128CmpType == wazeroir.V128CmpTypeI16x8LtU {
1244 c.assembler.CompileRegisterToRegister(amd64.PMINUW, x2Reg, x1Reg)
1245 } else {
1246 c.assembler.CompileRegisterToRegister(amd64.PMAXUW, x2Reg, x1Reg)
1247 }
1248 c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, x2Reg, x1Reg)
1249
1250 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1251
1252 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1253 result = x1Reg
1254 case wazeroir.V128CmpTypeI16x8GtS:
1255 c.assembler.CompileRegisterToRegister(amd64.PCMPGTW, x2Reg, x1Reg)
1256 result = x1Reg
1257 case wazeroir.V128CmpTypeI16x8LeS, wazeroir.V128CmpTypeI16x8LeU:
1258 tmp, err := c.allocateRegister(registerTypeVector)
1259 if err != nil {
1260 return err
1261 }
1262
1263 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1Reg, tmp)
1264 if v128CmpType == wazeroir.V128CmpTypeI16x8LeS {
1265 c.assembler.CompileRegisterToRegister(amd64.PMINSW, x2Reg, tmp)
1266 } else {
1267 c.assembler.CompileRegisterToRegister(amd64.PMINUW, x2Reg, tmp)
1268 }
1269 c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, tmp, x1Reg)
1270 result = x1Reg
1271 case wazeroir.V128CmpTypeI16x8GeS, wazeroir.V128CmpTypeI16x8GeU:
1272 tmp, err := c.allocateRegister(registerTypeVector)
1273 if err != nil {
1274 return err
1275 }
1276 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1Reg, tmp)
1277 if v128CmpType == wazeroir.V128CmpTypeI16x8GeS {
1278 c.assembler.CompileRegisterToRegister(amd64.PMAXSW, x2Reg, tmp)
1279 } else {
1280 c.assembler.CompileRegisterToRegister(amd64.PMAXUW, x2Reg, tmp)
1281 }
1282 c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, tmp, x1Reg)
1283 result = x1Reg
1284 case wazeroir.V128CmpTypeI32x4Eq:
1285 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x1Reg)
1286 result = x1Reg
1287 case wazeroir.V128CmpTypeI32x4Ne:
1288 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x1Reg)
1289
1290 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1291
1292 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1293 result = x1Reg
1294 case wazeroir.V128CmpTypeI32x4LtS:
1295 c.assembler.CompileRegisterToRegister(amd64.PCMPGTD, x1Reg, x2Reg)
1296 result = x2Reg
1297 case wazeroir.V128CmpTypeI32x4LtU, wazeroir.V128CmpTypeI32x4GtU:
1298
1299 if v128CmpType == wazeroir.V128CmpTypeI32x4LtU {
1300 c.assembler.CompileRegisterToRegister(amd64.PMINUD, x2Reg, x1Reg)
1301 } else {
1302 c.assembler.CompileRegisterToRegister(amd64.PMAXUD, x2Reg, x1Reg)
1303 }
1304 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x1Reg)
1305
1306 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1307
1308 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1309 result = x1Reg
1310 case wazeroir.V128CmpTypeI32x4GtS:
1311 c.assembler.CompileRegisterToRegister(amd64.PCMPGTD, x2Reg, x1Reg)
1312 result = x1Reg
1313 case wazeroir.V128CmpTypeI32x4LeS, wazeroir.V128CmpTypeI32x4LeU:
1314 tmp, err := c.allocateRegister(registerTypeVector)
1315 if err != nil {
1316 return err
1317 }
1318
1319 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1Reg, tmp)
1320 if v128CmpType == wazeroir.V128CmpTypeI32x4LeS {
1321 c.assembler.CompileRegisterToRegister(amd64.PMINSD, x2Reg, tmp)
1322 } else {
1323 c.assembler.CompileRegisterToRegister(amd64.PMINUD, x2Reg, tmp)
1324 }
1325 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, x1Reg)
1326 result = x1Reg
1327 case wazeroir.V128CmpTypeI32x4GeS, wazeroir.V128CmpTypeI32x4GeU:
1328 tmp, err := c.allocateRegister(registerTypeVector)
1329 if err != nil {
1330 return err
1331 }
1332 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1Reg, tmp)
1333 if v128CmpType == wazeroir.V128CmpTypeI32x4GeS {
1334 c.assembler.CompileRegisterToRegister(amd64.PMAXSD, x2Reg, tmp)
1335 } else {
1336 c.assembler.CompileRegisterToRegister(amd64.PMAXUD, x2Reg, tmp)
1337 }
1338 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, x1Reg)
1339 result = x1Reg
1340 case wazeroir.V128CmpTypeI64x2Eq:
1341 c.assembler.CompileRegisterToRegister(amd64.PCMPEQQ, x2Reg, x1Reg)
1342 result = x1Reg
1343 case wazeroir.V128CmpTypeI64x2Ne:
1344 c.assembler.CompileRegisterToRegister(amd64.PCMPEQQ, x2Reg, x1Reg)
1345
1346 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1347
1348 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1349 result = x1Reg
1350 case wazeroir.V128CmpTypeI64x2LtS:
1351 c.assembler.CompileRegisterToRegister(amd64.PCMPGTQ, x1Reg, x2Reg)
1352 result = x2Reg
1353 case wazeroir.V128CmpTypeI64x2GtS:
1354 c.assembler.CompileRegisterToRegister(amd64.PCMPGTQ, x2Reg, x1Reg)
1355 result = x1Reg
1356 case wazeroir.V128CmpTypeI64x2LeS:
1357 c.assembler.CompileRegisterToRegister(amd64.PCMPGTQ, x2Reg, x1Reg)
1358
1359 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x2Reg, x2Reg)
1360
1361 c.assembler.CompileRegisterToRegister(amd64.PXOR, x2Reg, x1Reg)
1362 result = x1Reg
1363 case wazeroir.V128CmpTypeI64x2GeS:
1364 c.assembler.CompileRegisterToRegister(amd64.PCMPGTQ, x1Reg, x2Reg)
1365
1366 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, x1Reg, x1Reg)
1367
1368 c.assembler.CompileRegisterToRegister(amd64.PXOR, x1Reg, x2Reg)
1369 result = x2Reg
1370 }
1371
1372 c.locationStack.markRegisterUnused(x1Reg, x2Reg)
1373 c.pushVectorRuntimeValueLocationOnRegister(result)
1374 return nil
1375 }
1376
1377
1378 func (c *amd64Compiler) compileV128AddSat(o *wazeroir.UnionOperation) error {
1379 var inst asm.Instruction
1380 shape := o.B1
1381 signed := o.B3
1382 switch shape {
1383 case wazeroir.ShapeI8x16:
1384 if signed {
1385 inst = amd64.PADDSB
1386 } else {
1387 inst = amd64.PADDUSB
1388 }
1389 case wazeroir.ShapeI16x8:
1390 if signed {
1391 inst = amd64.PADDSW
1392 } else {
1393 inst = amd64.PADDUSW
1394 }
1395 }
1396
1397 x2 := c.locationStack.popV128()
1398 if err := c.compileEnsureOnRegister(x2); err != nil {
1399 return err
1400 }
1401
1402 x1 := c.locationStack.popV128()
1403 if err := c.compileEnsureOnRegister(x1); err != nil {
1404 return err
1405 }
1406
1407 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
1408
1409 c.locationStack.markRegisterUnused(x2.register)
1410 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1411 return nil
1412 }
1413
1414
1415 func (c *amd64Compiler) compileV128SubSat(o *wazeroir.UnionOperation) error {
1416 var inst asm.Instruction
1417 shape := o.B1
1418 signed := o.B3
1419 switch shape {
1420 case wazeroir.ShapeI8x16:
1421 if signed {
1422 inst = amd64.PSUBSB
1423 } else {
1424 inst = amd64.PSUBUSB
1425 }
1426 case wazeroir.ShapeI16x8:
1427 if signed {
1428 inst = amd64.PSUBSW
1429 } else {
1430 inst = amd64.PSUBUSW
1431 }
1432 }
1433
1434 x2 := c.locationStack.popV128()
1435 if err := c.compileEnsureOnRegister(x2); err != nil {
1436 return err
1437 }
1438
1439 x1 := c.locationStack.popV128()
1440 if err := c.compileEnsureOnRegister(x1); err != nil {
1441 return err
1442 }
1443
1444 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
1445
1446 c.locationStack.markRegisterUnused(x2.register)
1447 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1448 return nil
1449 }
1450
1451
1452 func (c *amd64Compiler) compileV128Mul(o *wazeroir.UnionOperation) error {
1453 var inst asm.Instruction
1454 shape := o.B1
1455 switch shape {
1456 case wazeroir.ShapeI16x8:
1457 inst = amd64.PMULLW
1458 case wazeroir.ShapeI32x4:
1459 inst = amd64.PMULLD
1460 case wazeroir.ShapeI64x2:
1461 return c.compileV128MulI64x2()
1462 case wazeroir.ShapeF32x4:
1463 inst = amd64.MULPS
1464 case wazeroir.ShapeF64x2:
1465 inst = amd64.MULPD
1466 }
1467
1468 x2 := c.locationStack.popV128()
1469 if err := c.compileEnsureOnRegister(x2); err != nil {
1470 return err
1471 }
1472
1473 x1 := c.locationStack.popV128()
1474 if err := c.compileEnsureOnRegister(x1); err != nil {
1475 return err
1476 }
1477
1478 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
1479
1480 c.locationStack.markRegisterUnused(x2.register)
1481 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1482 return nil
1483 }
1484
1485
1486 func (c *amd64Compiler) compileV128MulI64x2() error {
1487 x2 := c.locationStack.popV128()
1488 if err := c.compileEnsureOnRegister(x2); err != nil {
1489 return err
1490 }
1491
1492 x1 := c.locationStack.popV128()
1493 if err := c.compileEnsureOnRegister(x1); err != nil {
1494 return err
1495 }
1496
1497 x1r, x2r := x1.register, x2.register
1498
1499 tmp1, err := c.allocateRegister(registerTypeVector)
1500 if err != nil {
1501 return err
1502 }
1503
1504 c.locationStack.markRegisterUsed(tmp1)
1505
1506 tmp2, err := c.allocateRegister(registerTypeVector)
1507 if err != nil {
1508 return err
1509 }
1510
1511
1512
1513
1514
1515
1516
1517 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp1)
1518
1519 c.assembler.CompileConstToRegister(amd64.PSRLQ, 32, tmp1)
1520
1521
1522 c.assembler.CompileRegisterToRegister(amd64.PMULUDQ, x2r, tmp1)
1523
1524
1525 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x2r, tmp2)
1526
1527 c.assembler.CompileConstToRegister(amd64.PSRLQ, 32, tmp2)
1528
1529
1530 c.assembler.CompileRegisterToRegister(amd64.PMULUDQ, x1r, tmp2)
1531
1532
1533
1534 c.assembler.CompileRegisterToRegister(amd64.PADDQ, tmp2, tmp1)
1535 c.assembler.CompileConstToRegister(amd64.PSLLQ, 32, tmp1)
1536
1537
1538 c.assembler.CompileRegisterToRegister(amd64.PMULUDQ, x2r, x1r)
1539
1540
1541
1542 c.assembler.CompileRegisterToRegister(amd64.PADDQ, tmp1, x1r)
1543
1544 c.locationStack.markRegisterUnused(x2r, tmp1)
1545 c.pushVectorRuntimeValueLocationOnRegister(x1r)
1546 return nil
1547 }
1548
1549
1550 func (c *amd64Compiler) compileV128Div(o *wazeroir.UnionOperation) error {
1551 x2 := c.locationStack.popV128()
1552 if err := c.compileEnsureOnRegister(x2); err != nil {
1553 return err
1554 }
1555
1556 x1 := c.locationStack.popV128()
1557 if err := c.compileEnsureOnRegister(x1); err != nil {
1558 return err
1559 }
1560
1561 var inst asm.Instruction
1562 shape := o.B1
1563 switch shape {
1564 case wazeroir.ShapeF32x4:
1565 inst = amd64.DIVPS
1566 case wazeroir.ShapeF64x2:
1567 inst = amd64.DIVPD
1568 }
1569
1570 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
1571
1572 c.locationStack.markRegisterUnused(x2.register)
1573 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1574 return nil
1575 }
1576
1577
1578 func (c *amd64Compiler) compileV128Neg(o *wazeroir.UnionOperation) error {
1579 shape := o.B1
1580 if shape <= wazeroir.ShapeI64x2 {
1581 return c.compileV128NegInt(shape)
1582 } else {
1583 return c.compileV128NegFloat(shape)
1584 }
1585 }
1586
1587
1588 func (c *amd64Compiler) compileV128NegInt(s wazeroir.Shape) error {
1589 v := c.locationStack.popV128()
1590 if err := c.compileEnsureOnRegister(v); err != nil {
1591 return err
1592 }
1593
1594 result, err := c.allocateRegister(registerTypeVector)
1595 if err != nil {
1596 return err
1597 }
1598
1599 var subInst asm.Instruction
1600 switch s {
1601 case wazeroir.ShapeI8x16:
1602 subInst = amd64.PSUBB
1603 case wazeroir.ShapeI16x8:
1604 subInst = amd64.PSUBW
1605 case wazeroir.ShapeI32x4:
1606 subInst = amd64.PSUBD
1607 case wazeroir.ShapeI64x2:
1608 subInst = amd64.PSUBQ
1609 }
1610
1611 c.assembler.CompileRegisterToRegister(amd64.PXOR, result, result)
1612 c.assembler.CompileRegisterToRegister(subInst, v.register, result)
1613
1614 c.locationStack.markRegisterUnused(v.register)
1615 c.pushVectorRuntimeValueLocationOnRegister(result)
1616 return nil
1617 }
1618
1619
1620 func (c *amd64Compiler) compileV128NegFloat(s wazeroir.Shape) error {
1621 v := c.locationStack.popV128()
1622 if err := c.compileEnsureOnRegister(v); err != nil {
1623 return err
1624 }
1625
1626 tmp, err := c.allocateRegister(registerTypeVector)
1627 if err != nil {
1628 return err
1629 }
1630
1631 var leftShiftInst, xorInst asm.Instruction
1632 var leftShiftAmount asm.ConstantValue
1633 if s == wazeroir.ShapeF32x4 {
1634 leftShiftInst, leftShiftAmount, xorInst = amd64.PSLLD, 31, amd64.XORPS
1635 } else {
1636 leftShiftInst, leftShiftAmount, xorInst = amd64.PSLLQ, 63, amd64.XORPD
1637 }
1638
1639
1640 c.assembler.CompileRegisterToRegister(amd64.XORPS, tmp, tmp)
1641
1642
1643
1644
1645
1646 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, tmp, tmp, 0x8)
1647
1648 c.assembler.CompileConstToRegister(leftShiftInst, leftShiftAmount, tmp)
1649
1650 c.assembler.CompileRegisterToRegister(xorInst, tmp, v.register)
1651
1652 c.pushVectorRuntimeValueLocationOnRegister(v.register)
1653 return nil
1654 }
1655
1656
1657 func (c *amd64Compiler) compileV128Sqrt(o *wazeroir.UnionOperation) error {
1658 v := c.locationStack.popV128()
1659 if err := c.compileEnsureOnRegister(v); err != nil {
1660 return err
1661 }
1662
1663 var inst asm.Instruction
1664 shape := o.B1
1665 switch shape {
1666 case wazeroir.ShapeF64x2:
1667 inst = amd64.SQRTPD
1668 case wazeroir.ShapeF32x4:
1669 inst = amd64.SQRTPS
1670 }
1671
1672 c.assembler.CompileRegisterToRegister(inst, v.register, v.register)
1673 c.pushVectorRuntimeValueLocationOnRegister(v.register)
1674 return nil
1675 }
1676
1677
1678 func (c *amd64Compiler) compileV128Abs(o *wazeroir.UnionOperation) error {
1679 shape := o.B1
1680 if shape == wazeroir.ShapeI64x2 {
1681 return c.compileV128AbsI64x2()
1682 }
1683
1684 v := c.locationStack.popV128()
1685 if err := c.compileEnsureOnRegister(v); err != nil {
1686 return err
1687 }
1688
1689 result := v.register
1690 switch shape {
1691 case wazeroir.ShapeI8x16:
1692 c.assembler.CompileRegisterToRegister(amd64.PABSB, result, result)
1693 case wazeroir.ShapeI16x8:
1694 c.assembler.CompileRegisterToRegister(amd64.PABSW, result, result)
1695 case wazeroir.ShapeI32x4:
1696 c.assembler.CompileRegisterToRegister(amd64.PABSD, result, result)
1697 case wazeroir.ShapeF32x4:
1698 tmp, err := c.allocateRegister(registerTypeVector)
1699 if err != nil {
1700 return err
1701 }
1702
1703 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp)
1704
1705 c.assembler.CompileConstToRegister(amd64.PSRLD, 1, tmp)
1706
1707 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmp, result)
1708 case wazeroir.ShapeF64x2:
1709 tmp, err := c.allocateRegister(registerTypeVector)
1710 if err != nil {
1711 return err
1712 }
1713
1714 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp)
1715
1716 c.assembler.CompileConstToRegister(amd64.PSRLQ, 1, tmp)
1717
1718 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmp, result)
1719 }
1720
1721 c.pushVectorRuntimeValueLocationOnRegister(result)
1722 return nil
1723 }
1724
1725
1726 func (c *amd64Compiler) compileV128AbsI64x2() error {
1727
1728 const blendMaskReg = amd64.RegX0
1729 c.onValueReleaseRegisterToStack(blendMaskReg)
1730 c.locationStack.markRegisterUsed(blendMaskReg)
1731
1732 v := c.locationStack.popV128()
1733 if err := c.compileEnsureOnRegister(v); err != nil {
1734 return err
1735 }
1736 vr := v.register
1737
1738 if vr == blendMaskReg {
1739 return errors.New("BUG: X0 must not be used")
1740 }
1741
1742 tmp, err := c.allocateRegister(registerTypeVector)
1743 if err != nil {
1744 return err
1745 }
1746 c.locationStack.markRegisterUsed(tmp)
1747
1748
1749 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp)
1750
1751
1752 c.assembler.CompileRegisterToRegister(amd64.PXOR, blendMaskReg, blendMaskReg)
1753
1754 c.assembler.CompileRegisterToRegister(amd64.PSUBQ, vr, blendMaskReg)
1755
1756 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, blendMaskReg, vr)
1757
1758 c.assembler.CompileRegisterToRegister(amd64.BLENDVPD, tmp, vr)
1759
1760 c.locationStack.markRegisterUnused(blendMaskReg, tmp)
1761 c.pushVectorRuntimeValueLocationOnRegister(vr)
1762 return nil
1763 }
1764
1765 var (
1766 popcntMask = [16]byte{
1767 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1768 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1769 }
1770
1771 popcntTable = [16]byte{
1772 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03,
1773 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
1774 }
1775 )
1776
1777
1778 func (c *amd64Compiler) compileV128Popcnt(operation *wazeroir.UnionOperation) error {
1779 v := c.locationStack.popV128()
1780 if err := c.compileEnsureOnRegister(v); err != nil {
1781 return err
1782 }
1783 vr := v.register
1784
1785 tmp1, err := c.allocateRegister(registerTypeVector)
1786 if err != nil {
1787 return err
1788 }
1789
1790 c.locationStack.markRegisterUsed(tmp1)
1791
1792 tmp2, err := c.allocateRegister(registerTypeVector)
1793 if err != nil {
1794 return err
1795 }
1796
1797 c.locationStack.markRegisterUsed(tmp2)
1798
1799 tmp3, err := c.allocateRegister(registerTypeVector)
1800 if err != nil {
1801 return err
1802 }
1803
1804
1805
1806 if err := c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(popcntMask[:]), tmp1); err != nil {
1807 return err
1808 }
1809
1810
1811 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp2)
1812
1813
1814
1815
1816
1817
1818 c.assembler.CompileRegisterToRegister(amd64.PAND, tmp1, tmp2)
1819
1820
1821
1822 c.assembler.CompileConstToRegister(amd64.PSRLW, 4, vr)
1823 c.assembler.CompileRegisterToRegister(amd64.PAND, tmp1, vr)
1824
1825
1826
1827 if err := c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(popcntTable[:]), tmp1); err != nil {
1828 return err
1829 }
1830
1831
1832
1833 c.assembler.CompileRegisterToRegister(amd64.MOVDQU, tmp1, tmp3)
1834
1835
1836 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmp2, tmp3)
1837
1838
1839 c.assembler.CompileRegisterToRegister(amd64.PSHUFB, vr, tmp1)
1840
1841
1842 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, tmp1, vr)
1843
1844
1845 c.assembler.CompileRegisterToRegister(amd64.PADDB, tmp3, vr)
1846
1847 c.locationStack.markRegisterUnused(tmp1, tmp2)
1848 c.pushVectorRuntimeValueLocationOnRegister(vr)
1849 return nil
1850 }
1851
1852
1853 func (c *amd64Compiler) compileV128Min(o *wazeroir.UnionOperation) error {
1854 x2 := c.locationStack.popV128()
1855 if err := c.compileEnsureOnRegister(x2); err != nil {
1856 return err
1857 }
1858
1859 x1 := c.locationStack.popV128()
1860 if err := c.compileEnsureOnRegister(x1); err != nil {
1861 return err
1862 }
1863
1864 shape := o.B1
1865 if shape >= wazeroir.ShapeF32x4 {
1866 return c.compileV128FloatMinImpl(shape == wazeroir.ShapeF32x4, x1.register, x2.register)
1867 }
1868
1869 signed := o.B3
1870 var inst asm.Instruction
1871 switch shape {
1872 case wazeroir.ShapeI8x16:
1873 if signed {
1874 inst = amd64.PMINSB
1875 } else {
1876 inst = amd64.PMINUB
1877 }
1878 case wazeroir.ShapeI16x8:
1879 if signed {
1880 inst = amd64.PMINSW
1881 } else {
1882 inst = amd64.PMINUW
1883 }
1884 case wazeroir.ShapeI32x4:
1885 if signed {
1886 inst = amd64.PMINSD
1887 } else {
1888 inst = amd64.PMINUD
1889 }
1890 }
1891
1892 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
1893
1894 c.locationStack.markRegisterUnused(x2.register)
1895 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1896 return nil
1897 }
1898
1899
1900 func (c *amd64Compiler) compileV128FloatMinImpl(is32bit bool, x1r, x2r asm.Register) error {
1901 tmp, err := c.allocateRegister(registerTypeVector)
1902 if err != nil {
1903 return err
1904 }
1905
1906 var min, cmp, andn, or, srl asm.Instruction
1907 var shiftNumToInverseNaN asm.ConstantValue
1908 if is32bit {
1909 min, cmp, andn, or, srl, shiftNumToInverseNaN = amd64.MINPS, amd64.CMPPS, amd64.ANDNPS, amd64.ORPS, amd64.PSRLD, 0xa
1910 } else {
1911 min, cmp, andn, or, srl, shiftNumToInverseNaN = amd64.MINPD, amd64.CMPPD, amd64.ANDNPD, amd64.ORPD, amd64.PSRLQ, 0xd
1912 }
1913
1914
1915
1916
1917 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp)
1918
1919 c.assembler.CompileRegisterToRegister(min, x2r, tmp)
1920
1921 c.assembler.CompileRegisterToRegister(min, x1r, x2r)
1922
1923 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x2r, x1r)
1924
1925
1926
1927
1928 c.assembler.CompileRegisterToRegister(or, tmp, x2r)
1929
1930
1931 c.assembler.CompileRegisterToRegisterWithArg(cmp, tmp, x1r, 3)
1932
1933
1934
1935 c.assembler.CompileRegisterToRegister(or, x1r, x2r)
1936
1937
1938 c.assembler.CompileConstToRegister(srl, shiftNumToInverseNaN, x1r)
1939
1940
1941
1942
1943 c.assembler.CompileRegisterToRegister(andn, x2r, x1r)
1944
1945 c.locationStack.markRegisterUnused(x2r)
1946 c.pushVectorRuntimeValueLocationOnRegister(x1r)
1947 return nil
1948 }
1949
1950
1951 func (c *amd64Compiler) compileV128Max(o *wazeroir.UnionOperation) error {
1952 x2 := c.locationStack.popV128()
1953 if err := c.compileEnsureOnRegister(x2); err != nil {
1954 return err
1955 }
1956
1957 x1 := c.locationStack.popV128()
1958 if err := c.compileEnsureOnRegister(x1); err != nil {
1959 return err
1960 }
1961
1962 shape := o.B1
1963 if shape >= wazeroir.ShapeF32x4 {
1964 return c.compileV128FloatMaxImpl(shape == wazeroir.ShapeF32x4, x1.register, x2.register)
1965 }
1966
1967 signed := o.B3
1968 var inst asm.Instruction
1969 switch shape {
1970 case wazeroir.ShapeI8x16:
1971 if signed {
1972 inst = amd64.PMAXSB
1973 } else {
1974 inst = amd64.PMAXUB
1975 }
1976 case wazeroir.ShapeI16x8:
1977 if signed {
1978 inst = amd64.PMAXSW
1979 } else {
1980 inst = amd64.PMAXUW
1981 }
1982 case wazeroir.ShapeI32x4:
1983 if signed {
1984 inst = amd64.PMAXSD
1985 } else {
1986 inst = amd64.PMAXUD
1987 }
1988 }
1989
1990 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
1991
1992 c.locationStack.markRegisterUnused(x2.register)
1993 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
1994 return nil
1995 }
1996
1997
1998 func (c *amd64Compiler) compileV128FloatMaxImpl(is32bit bool, x1r, x2r asm.Register) error {
1999 tmp, err := c.allocateRegister(registerTypeVector)
2000 if err != nil {
2001 return err
2002 }
2003
2004 var max, cmp, andn, or, xor, sub, srl asm.Instruction
2005 var shiftNumToInverseNaN asm.ConstantValue
2006 if is32bit {
2007 max, cmp, andn, or, xor, sub, srl, shiftNumToInverseNaN = amd64.MAXPS, amd64.CMPPS, amd64.ANDNPS, amd64.ORPS, amd64.XORPS, amd64.SUBPS, amd64.PSRLD, 0xa
2008 } else {
2009 max, cmp, andn, or, xor, sub, srl, shiftNumToInverseNaN = amd64.MAXPD, amd64.CMPPD, amd64.ANDNPD, amd64.ORPD, amd64.XORPD, amd64.SUBPD, amd64.PSRLQ, 0xd
2010 }
2011
2012
2013
2014
2015 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x2r, tmp)
2016
2017 c.assembler.CompileRegisterToRegister(max, x1r, tmp)
2018
2019 c.assembler.CompileRegisterToRegister(max, x2r, x1r)
2020
2021 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, x2r)
2022
2023
2024
2025
2026
2027
2028 c.assembler.CompileRegisterToRegister(xor, tmp, x2r)
2029
2030
2031
2032
2033
2034 c.assembler.CompileRegisterToRegister(or, x2r, x1r)
2035
2036 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp)
2037
2038
2039
2040
2041
2042
2043 c.assembler.CompileRegisterToRegister(sub, x2r, tmp)
2044
2045 c.assembler.CompileRegisterToRegisterWithArg(cmp, x1r, x1r, 3)
2046
2047
2048 c.assembler.CompileConstToRegister(srl, shiftNumToInverseNaN, x1r)
2049 c.assembler.CompileRegisterToRegister(andn, tmp, x1r)
2050
2051 c.locationStack.markRegisterUnused(x2r)
2052 c.pushVectorRuntimeValueLocationOnRegister(x1r)
2053 return nil
2054 }
2055
2056
2057 func (c *amd64Compiler) compileV128AvgrU(o *wazeroir.UnionOperation) error {
2058 x2 := c.locationStack.popV128()
2059 if err := c.compileEnsureOnRegister(x2); err != nil {
2060 return err
2061 }
2062
2063 x1 := c.locationStack.popV128()
2064 if err := c.compileEnsureOnRegister(x1); err != nil {
2065 return err
2066 }
2067
2068 var inst asm.Instruction
2069 shape := o.B1
2070 switch shape {
2071 case wazeroir.ShapeI8x16:
2072 inst = amd64.PAVGB
2073 case wazeroir.ShapeI16x8:
2074 inst = amd64.PAVGW
2075 }
2076
2077 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
2078
2079 c.locationStack.markRegisterUnused(x2.register)
2080 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
2081 return nil
2082 }
2083
2084
2085 func (c *amd64Compiler) compileV128Pmin(o *wazeroir.UnionOperation) error {
2086 x2 := c.locationStack.popV128()
2087 if err := c.compileEnsureOnRegister(x2); err != nil {
2088 return err
2089 }
2090
2091 x1 := c.locationStack.popV128()
2092 if err := c.compileEnsureOnRegister(x1); err != nil {
2093 return err
2094 }
2095
2096 var min asm.Instruction
2097 if o.B1 == wazeroir.ShapeF32x4 {
2098 min = amd64.MINPS
2099 } else {
2100 min = amd64.MINPD
2101 }
2102
2103 x1r, v2r := x1.register, x2.register
2104
2105 c.assembler.CompileRegisterToRegister(min, x1r, v2r)
2106
2107 c.locationStack.markRegisterUnused(x1r)
2108 c.pushVectorRuntimeValueLocationOnRegister(v2r)
2109 return nil
2110 }
2111
2112
2113 func (c *amd64Compiler) compileV128Pmax(o *wazeroir.UnionOperation) error {
2114 x2 := c.locationStack.popV128()
2115 if err := c.compileEnsureOnRegister(x2); err != nil {
2116 return err
2117 }
2118
2119 x1 := c.locationStack.popV128()
2120 if err := c.compileEnsureOnRegister(x1); err != nil {
2121 return err
2122 }
2123
2124 var min asm.Instruction
2125 if o.B1 == wazeroir.ShapeF32x4 {
2126 min = amd64.MAXPS
2127 } else {
2128 min = amd64.MAXPD
2129 }
2130
2131 x1r, v2r := x1.register, x2.register
2132
2133 c.assembler.CompileRegisterToRegister(min, x1r, v2r)
2134
2135 c.locationStack.markRegisterUnused(x1r)
2136 c.pushVectorRuntimeValueLocationOnRegister(v2r)
2137 return nil
2138 }
2139
2140
2141 func (c *amd64Compiler) compileV128Ceil(o *wazeroir.UnionOperation) error {
2142
2143 const roundModeCeil = 0x2
2144 return c.compileV128RoundImpl(o.B1 == wazeroir.ShapeF32x4, roundModeCeil)
2145 }
2146
2147
2148 func (c *amd64Compiler) compileV128Floor(o *wazeroir.UnionOperation) error {
2149
2150 const roundModeFloor = 0x1
2151 return c.compileV128RoundImpl(o.B1 == wazeroir.ShapeF32x4, roundModeFloor)
2152 }
2153
2154
2155 func (c *amd64Compiler) compileV128Trunc(o *wazeroir.UnionOperation) error {
2156
2157 const roundModeTrunc = 0x3
2158 return c.compileV128RoundImpl(o.B1 == wazeroir.ShapeF32x4, roundModeTrunc)
2159 }
2160
2161
2162 func (c *amd64Compiler) compileV128Nearest(o *wazeroir.UnionOperation) error {
2163
2164 const roundModeNearest = 0x0
2165 return c.compileV128RoundImpl(o.B1 == wazeroir.ShapeF32x4, roundModeNearest)
2166 }
2167
2168
2169
2170 func (c *amd64Compiler) compileV128RoundImpl(is32bit bool, mode byte) error {
2171 v := c.locationStack.popV128()
2172 if err := c.compileEnsureOnRegister(v); err != nil {
2173 return err
2174 }
2175 vr := v.register
2176
2177 var round asm.Instruction
2178 if is32bit {
2179 round = amd64.ROUNDPS
2180 } else {
2181 round = amd64.ROUNDPD
2182 }
2183
2184 c.assembler.CompileRegisterToRegisterWithArg(round, vr, vr, mode)
2185 c.pushVectorRuntimeValueLocationOnRegister(vr)
2186 return nil
2187 }
2188
2189
2190 func (c *amd64Compiler) compileV128Extend(o *wazeroir.UnionOperation) error {
2191 v := c.locationStack.popV128()
2192 if err := c.compileEnsureOnRegister(v); err != nil {
2193 return err
2194 }
2195 vr := v.register
2196
2197 originShape := o.B1
2198 signed := o.B2 == 1
2199 useLow := o.B3
2200 if !useLow {
2201
2202
2203
2204 c.assembler.CompileRegisterToRegisterWithArg(amd64.PALIGNR, v.register, v.register, 0x8)
2205 }
2206
2207 var extend asm.Instruction
2208 switch originShape {
2209 case wazeroir.ShapeI8x16:
2210 if signed {
2211 extend = amd64.PMOVSXBW
2212 } else {
2213 extend = amd64.PMOVZXBW
2214 }
2215 case wazeroir.ShapeI16x8:
2216 if signed {
2217 extend = amd64.PMOVSXWD
2218 } else {
2219 extend = amd64.PMOVZXWD
2220 }
2221 case wazeroir.ShapeI32x4:
2222 if signed {
2223 extend = amd64.PMOVSXDQ
2224 } else {
2225 extend = amd64.PMOVZXDQ
2226 }
2227 }
2228
2229 c.assembler.CompileRegisterToRegister(extend, vr, vr)
2230 c.pushVectorRuntimeValueLocationOnRegister(vr)
2231 return nil
2232 }
2233
2234
2235 func (c *amd64Compiler) compileV128ExtMul(o *wazeroir.UnionOperation) error {
2236 x2 := c.locationStack.popV128()
2237 if err := c.compileEnsureOnRegister(x2); err != nil {
2238 return err
2239 }
2240
2241 x1 := c.locationStack.popV128()
2242 if err := c.compileEnsureOnRegister(x1); err != nil {
2243 return err
2244 }
2245
2246 x1r, x2r := x1.register, x2.register
2247
2248 originShape := o.B1
2249 signed := o.B2 == 1
2250 useLow := o.B3
2251 switch originShape {
2252 case wazeroir.ShapeI8x16:
2253 if !useLow {
2254
2255
2256
2257 c.assembler.CompileRegisterToRegisterWithArg(amd64.PALIGNR, x1r, x1r, 0x8)
2258 c.assembler.CompileRegisterToRegisterWithArg(amd64.PALIGNR, x2r, x2r, 0x8)
2259 }
2260
2261 var ext asm.Instruction
2262 if signed {
2263 ext = amd64.PMOVSXBW
2264 } else {
2265 ext = amd64.PMOVZXBW
2266 }
2267
2268
2269 c.assembler.CompileRegisterToRegister(ext, x1r, x1r)
2270 c.assembler.CompileRegisterToRegister(ext, x2r, x2r)
2271
2272 c.assembler.CompileRegisterToRegister(amd64.PMULLW, x2r, x1r)
2273 case wazeroir.ShapeI16x8:
2274 tmp, err := c.allocateRegister(registerTypeVector)
2275 if err != nil {
2276 return err
2277 }
2278
2279
2280 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp)
2281
2282
2283 c.assembler.CompileRegisterToRegister(amd64.PMULLW, x2r, x1r)
2284 if signed {
2285
2286 c.assembler.CompileRegisterToRegister(amd64.PMULHW, x2r, tmp)
2287 } else {
2288
2289 c.assembler.CompileRegisterToRegister(amd64.PMULHUW, x2r, tmp)
2290 }
2291
2292
2293 if useLow {
2294 c.assembler.CompileRegisterToRegister(amd64.PUNPCKLWD, tmp, x1r)
2295 } else {
2296 c.assembler.CompileRegisterToRegister(amd64.PUNPCKHWD, tmp, x1r)
2297 }
2298 case wazeroir.ShapeI32x4:
2299 var shuffleOrder byte
2300
2301 if useLow {
2302
2303 shuffleOrder = 0b01010000
2304 } else {
2305
2306 shuffleOrder = 0b11111010
2307 }
2308
2309 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, x1r, x1r, shuffleOrder)
2310 c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, x2r, x2r, shuffleOrder)
2311
2312 var mul asm.Instruction
2313 if signed {
2314 mul = amd64.PMULDQ
2315 } else {
2316 mul = amd64.PMULUDQ
2317 }
2318 c.assembler.CompileRegisterToRegister(mul, x2r, x1r)
2319 }
2320
2321 c.locationStack.markRegisterUnused(x2r)
2322 c.pushVectorRuntimeValueLocationOnRegister(x1r)
2323 return nil
2324 }
2325
2326 var q15mulrSatSMask = [16]byte{
2327 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
2328 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
2329 }
2330
2331
2332 func (c *amd64Compiler) compileV128Q15mulrSatS(*wazeroir.UnionOperation) error {
2333 x2 := c.locationStack.popV128()
2334 if err := c.compileEnsureOnRegister(x2); err != nil {
2335 return err
2336 }
2337
2338 x1 := c.locationStack.popV128()
2339 if err := c.compileEnsureOnRegister(x1); err != nil {
2340 return err
2341 }
2342
2343 tmp, err := c.allocateRegister(registerTypeVector)
2344 if err != nil {
2345 return err
2346 }
2347
2348 x1r, x2r := x1.register, x2.register
2349
2350
2351 if err := c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(q15mulrSatSMask[:]), tmp); err != nil {
2352 return err
2353 }
2354
2355 c.assembler.CompileRegisterToRegister(amd64.PMULHRSW, x2r, x1r)
2356 c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, x1r, tmp)
2357 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, x1r)
2358
2359 c.locationStack.markRegisterUnused(x2r)
2360 c.pushVectorRuntimeValueLocationOnRegister(x1r)
2361 return nil
2362 }
2363
2364 var (
2365 allOnesI8x16 = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
2366 allOnesI16x8 = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
2367
2368 extAddPairwiseI16x8uMask = [16 * 2]byte{
2369 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
2370 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
2371 }
2372 )
2373
2374
2375 func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.UnionOperation) error {
2376 v := c.locationStack.popV128()
2377 if err := c.compileEnsureOnRegister(v); err != nil {
2378 return err
2379 }
2380 vr := v.register
2381
2382 originShape := o.B1
2383 signed := o.B3
2384 switch originShape {
2385 case wazeroir.ShapeI8x16:
2386 allOnesReg, err := c.allocateRegister(registerTypeVector)
2387 if err != nil {
2388 return err
2389 }
2390
2391 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU,
2392 asm.NewStaticConst(allOnesI8x16[:]), allOnesReg); err != nil {
2393 return err
2394 }
2395
2396 var result asm.Register
2397
2398 if signed {
2399
2400
2401 c.assembler.CompileRegisterToRegister(amd64.PMADDUBSW, vr, allOnesReg)
2402 result = allOnesReg
2403 } else {
2404
2405 c.assembler.CompileRegisterToRegister(amd64.PMADDUBSW, allOnesReg, vr)
2406 result = vr
2407 }
2408
2409 if result != vr {
2410 c.locationStack.markRegisterUnused(vr)
2411 }
2412 c.pushVectorRuntimeValueLocationOnRegister(result)
2413 case wazeroir.ShapeI16x8:
2414 tmp, err := c.allocateRegister(registerTypeVector)
2415 if err != nil {
2416 return err
2417 }
2418
2419 if signed {
2420
2421 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU,
2422 asm.NewStaticConst(allOnesI16x8[:]), tmp); err != nil {
2423 return err
2424 }
2425
2426 c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
2427 c.pushVectorRuntimeValueLocationOnRegister(vr)
2428 } else {
2429
2430 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU,
2431 asm.NewStaticConst(extAddPairwiseI16x8uMask[:16]), tmp); err != nil {
2432 return err
2433 }
2434
2435
2436
2437
2438
2439 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, vr)
2440
2441 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU,
2442 asm.NewStaticConst(allOnesI16x8[:]), tmp); err != nil {
2443 return err
2444 }
2445
2446
2447
2448 c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
2449
2450
2451 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU,
2452 asm.NewStaticConst(extAddPairwiseI16x8uMask[16:]), tmp); err != nil {
2453 return err
2454 }
2455
2456
2457 c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
2458 c.pushVectorRuntimeValueLocationOnRegister(vr)
2459 }
2460 }
2461 return nil
2462 }
2463
2464
2465 func (c *amd64Compiler) compileV128FloatPromote(*wazeroir.UnionOperation) error {
2466 v := c.locationStack.popV128()
2467 if err := c.compileEnsureOnRegister(v); err != nil {
2468 return err
2469 }
2470 vr := v.register
2471
2472 c.assembler.CompileRegisterToRegister(amd64.CVTPS2PD, vr, vr)
2473 c.pushVectorRuntimeValueLocationOnRegister(vr)
2474 return nil
2475 }
2476
2477
2478 func (c *amd64Compiler) compileV128FloatDemote(*wazeroir.UnionOperation) error {
2479 v := c.locationStack.popV128()
2480 if err := c.compileEnsureOnRegister(v); err != nil {
2481 return err
2482 }
2483 vr := v.register
2484
2485 c.assembler.CompileRegisterToRegister(amd64.CVTPD2PS, vr, vr)
2486 c.pushVectorRuntimeValueLocationOnRegister(vr)
2487 return nil
2488 }
2489
2490
2491 func (c *amd64Compiler) compileV128Dot(*wazeroir.UnionOperation) error {
2492 x2 := c.locationStack.popV128()
2493 if err := c.compileEnsureOnRegister(x2); err != nil {
2494 return err
2495 }
2496
2497 x1 := c.locationStack.popV128()
2498 if err := c.compileEnsureOnRegister(x1); err != nil {
2499 return err
2500 }
2501
2502 c.assembler.CompileRegisterToRegister(amd64.PMADDWD, x2.register, x1.register)
2503
2504 c.locationStack.markRegisterUnused(x2.register)
2505 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
2506 return nil
2507 }
2508
2509 var fConvertFromIMask = [16]byte{
2510 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
2511 }
2512
2513
2514 func (c *amd64Compiler) compileV128FConvertFromI(o *wazeroir.UnionOperation) error {
2515 v := c.locationStack.popV128()
2516 if err := c.compileEnsureOnRegister(v); err != nil {
2517 return err
2518 }
2519 vr := v.register
2520
2521 destinationShape := o.B1
2522 signed := o.B3
2523
2524 switch destinationShape {
2525 case wazeroir.ShapeF32x4:
2526 if signed {
2527 c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, vr, vr)
2528 } else {
2529 tmp, err := c.allocateRegister(registerTypeVector)
2530 if err != nil {
2531 return err
2532 }
2533
2534
2535 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp)
2536
2537
2538 c.assembler.CompileConstToRegister(amd64.PSLLD, 0xa, tmp)
2539 c.assembler.CompileConstToRegister(amd64.PSRLD, 0xa, tmp)
2540
2541
2542 c.assembler.CompileRegisterToRegister(amd64.PSUBD, tmp, vr)
2543
2544
2545 c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, tmp, tmp)
2546
2547
2548 c.assembler.CompileConstToRegister(amd64.PSRLD, 1, vr)
2549 c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, vr, vr)
2550
2551
2552 c.assembler.CompileRegisterToRegister(amd64.ADDPS, vr, vr)
2553
2554
2555 c.assembler.CompileRegisterToRegister(amd64.ADDPS, tmp, vr)
2556 }
2557 case wazeroir.ShapeF64x2:
2558 if signed {
2559 c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PD, vr, vr)
2560 } else {
2561 tmp, err := c.allocateRegister(registerTypeVector)
2562 if err != nil {
2563 return err
2564 }
2565
2566
2567 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, asm.NewStaticConst(fConvertFromIMask[:16]), tmp); err != nil {
2568 return err
2569 }
2570
2571
2572
2573
2574
2575 c.assembler.CompileRegisterToRegister(amd64.UNPCKLPS, tmp, vr)
2576
2577
2578 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU,
2579 asm.NewStaticConst(twop52[:]), tmp); err != nil {
2580 return err
2581 }
2582
2583
2584
2585
2586
2587 c.assembler.CompileRegisterToRegister(amd64.SUBPD, tmp, vr)
2588 }
2589 }
2590
2591 c.pushVectorRuntimeValueLocationOnRegister(vr)
2592 return nil
2593 }
2594
2595
2596 func (c *amd64Compiler) compileV128Narrow(o *wazeroir.UnionOperation) error {
2597 x2 := c.locationStack.popV128()
2598 if err := c.compileEnsureOnRegister(x2); err != nil {
2599 return err
2600 }
2601
2602 x1 := c.locationStack.popV128()
2603 if err := c.compileEnsureOnRegister(x1); err != nil {
2604 return err
2605 }
2606
2607 var narrow asm.Instruction
2608 originShape := o.B1
2609 signed := o.B3
2610 switch originShape {
2611 case wazeroir.ShapeI16x8:
2612 if signed {
2613 narrow = amd64.PACKSSWB
2614 } else {
2615 narrow = amd64.PACKUSWB
2616 }
2617 case wazeroir.ShapeI32x4:
2618 if signed {
2619 narrow = amd64.PACKSSDW
2620 } else {
2621 narrow = amd64.PACKUSDW
2622 }
2623 }
2624 c.assembler.CompileRegisterToRegister(narrow, x2.register, x1.register)
2625
2626 c.locationStack.markRegisterUnused(x2.register)
2627 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
2628 return nil
2629 }
2630
2631 var (
2632
2633 i32sMaxOnF64x2 = [16]byte{
2634 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41,
2635 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41,
2636 }
2637
2638
2639 i32uMaxOnF64x2 = [16]byte{
2640 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41,
2641 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41,
2642 }
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652 twop52 = [16]byte{
2653 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43,
2654 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43,
2655 }
2656 )
2657
2658
2659 func (c *amd64Compiler) compileV128ITruncSatFromF(o *wazeroir.UnionOperation) error {
2660 v := c.locationStack.popV128()
2661 if err := c.compileEnsureOnRegister(v); err != nil {
2662 return err
2663 }
2664 vr := v.register
2665
2666 tmp, err := c.allocateRegister(registerTypeVector)
2667 if err != nil {
2668 return err
2669 }
2670
2671 c.locationStack.markRegisterUsed(tmp)
2672
2673 originShape := o.B1
2674 signed := o.B3
2675 switch originShape {
2676 case wazeroir.ShapeF32x4:
2677 if signed {
2678
2679 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp)
2680
2681
2682
2683
2684
2685
2686 c.assembler.CompileRegisterToRegister(amd64.CMPEQPS, tmp, tmp)
2687
2688
2689
2690
2691 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmp, vr)
2692
2693
2694
2695
2696 c.assembler.CompileRegisterToRegister(amd64.PXOR, vr, tmp)
2697
2698
2699
2700
2701 c.assembler.CompileRegisterToRegister(amd64.CVTTPS2DQ, vr, vr)
2702
2703
2704
2705
2706
2707 c.assembler.CompileRegisterToRegister(amd64.PAND, vr, tmp)
2708
2709
2710
2711 c.assembler.CompileConstToRegister(amd64.PSRAD, 0x1f, tmp)
2712
2713
2714 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, vr)
2715 } else {
2716 tmp2, err := c.allocateRegister(registerTypeVector)
2717 if err != nil {
2718 return err
2719 }
2720
2721
2722
2723 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp)
2724 c.assembler.CompileRegisterToRegister(amd64.MAXPS, tmp, vr)
2725 c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp)
2726 c.assembler.CompileConstToRegister(amd64.PSRLD, 0x1, tmp)
2727 c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, tmp, tmp)
2728 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp2)
2729 c.assembler.CompileRegisterToRegister(amd64.CVTTPS2DQ, vr, vr)
2730 c.assembler.CompileRegisterToRegister(amd64.SUBPS, tmp, tmp2)
2731 c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, tmp2, tmp, 0x2)
2732 c.assembler.CompileRegisterToRegister(amd64.CVTTPS2DQ, tmp2, tmp2)
2733 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp2)
2734 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp)
2735 c.assembler.CompileRegisterToRegister(amd64.PMAXSD, tmp, tmp2)
2736 c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp2, vr)
2737 }
2738 case wazeroir.ShapeF64x2:
2739 tmp2, err := c.allocateRegister(registerTypeVector)
2740 if err != nil {
2741 return err
2742 }
2743
2744 if signed {
2745
2746 c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp)
2747
2748
2749
2750 c.assembler.CompileRegisterToRegister(amd64.CMPEQPD, tmp, tmp)
2751
2752
2753 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVUPD, asm.NewStaticConst(i32sMaxOnF64x2[:]), tmp2); err != nil {
2754 return err
2755 }
2756
2757
2758 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmp2, tmp)
2759
2760
2761
2762
2763 c.assembler.CompileRegisterToRegister(amd64.MINPD, tmp, vr)
2764
2765 c.assembler.CompileRegisterToRegister(amd64.CVTTPD2DQ, vr, vr)
2766 } else {
2767
2768 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp)
2769
2770
2771
2772 c.assembler.CompileRegisterToRegister(amd64.MAXPD, tmp, vr)
2773
2774
2775 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVUPD, asm.NewStaticConst(i32uMaxOnF64x2[:]), tmp2); err != nil {
2776 return err
2777 }
2778
2779
2780
2781 c.assembler.CompileRegisterToRegister(amd64.MINPD, tmp2, vr)
2782
2783
2784 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDPD, vr, vr, 0x3)
2785
2786
2787 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVUPD, asm.NewStaticConst(twop52[:]), tmp2); err != nil {
2788 return err
2789 }
2790
2791
2792
2793
2794
2795 c.assembler.CompileRegisterToRegister(amd64.ADDPD, tmp2, vr)
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805 c.assembler.CompileRegisterToRegisterWithArg(amd64.SHUFPS, tmp, vr, 0b00_00_10_00)
2806 }
2807 }
2808
2809 c.locationStack.markRegisterUnused(tmp)
2810 c.pushVectorRuntimeValueLocationOnRegister(vr)
2811 return nil
2812 }
2813
View as plain text