1 package compiler
2
3 import (
4 "github.com/tetratelabs/wazero/internal/asm"
5 "github.com/tetratelabs/wazero/internal/asm/arm64"
6 "github.com/tetratelabs/wazero/internal/wazeroir"
7 )
8
9
10 func (c *arm64Compiler) compileV128Const(o *wazeroir.UnionOperation) error {
11 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
12 return err
13 }
14
15 lo, hi := o.U1, o.U2
16
17 result, err := c.allocateRegister(registerTypeVector)
18 if err != nil {
19 return err
20 }
21
22
23 intReg := arm64ReservedRegisterForTemporary
24 if lo == 0 {
25 intReg = arm64.RegRZR
26 } else {
27 c.assembler.CompileConstToRegister(arm64.MOVD, int64(lo), arm64ReservedRegisterForTemporary)
28 }
29 c.assembler.CompileRegisterToRegister(arm64.FMOVD, intReg, result)
30
31
32 intReg = arm64ReservedRegisterForTemporary
33 if hi == 0 {
34 intReg = arm64.RegRZR
35 } else {
36 c.assembler.CompileConstToRegister(arm64.MOVD, int64(hi), arm64ReservedRegisterForTemporary)
37 }
38
39 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, intReg, result, arm64.VectorArrangementD, 1)
40
41 c.pushVectorRuntimeValueLocationOnRegister(result)
42 return nil
43 }
44
45
46 func (c *arm64Compiler) compileV128Add(o *wazeroir.UnionOperation) error {
47 x2 := c.locationStack.popV128()
48 if err := c.compileEnsureOnRegister(x2); err != nil {
49 return err
50 }
51
52 x1 := c.locationStack.popV128()
53 if err := c.compileEnsureOnRegister(x1); err != nil {
54 return err
55 }
56
57 x1r, x2r := x1.register, x2.register
58
59 var arr arm64.VectorArrangement
60 var inst asm.Instruction
61 shape := o.B1
62 switch shape {
63 case wazeroir.ShapeI8x16:
64 inst = arm64.VADD
65 arr = arm64.VectorArrangement16B
66 case wazeroir.ShapeI16x8:
67 inst = arm64.VADD
68 arr = arm64.VectorArrangement8H
69 case wazeroir.ShapeI32x4:
70 inst = arm64.VADD
71 arr = arm64.VectorArrangement4S
72 case wazeroir.ShapeI64x2:
73 inst = arm64.VADD
74 arr = arm64.VectorArrangement2D
75 case wazeroir.ShapeF32x4:
76 inst = arm64.VFADDS
77 arr = arm64.VectorArrangement4S
78 case wazeroir.ShapeF64x2:
79 inst = arm64.VFADDD
80 arr = arm64.VectorArrangement2D
81 }
82
83 c.assembler.CompileVectorRegisterToVectorRegister(inst, x1r, x2r, arr,
84 arm64.VectorIndexNone, arm64.VectorIndexNone)
85
86 c.pushVectorRuntimeValueLocationOnRegister(x2r)
87 c.markRegisterUnused(x1r)
88 return nil
89 }
90
91
92 func (c *arm64Compiler) compileV128Sub(o *wazeroir.UnionOperation) (err error) {
93 x2 := c.locationStack.popV128()
94 if err := c.compileEnsureOnRegister(x2); err != nil {
95 return err
96 }
97
98 x1 := c.locationStack.popV128()
99 if err := c.compileEnsureOnRegister(x1); err != nil {
100 return err
101 }
102
103 x1r, x2r := x1.register, x2.register
104
105 var arr arm64.VectorArrangement
106 var inst asm.Instruction
107 shape := o.B1
108 switch shape {
109 case wazeroir.ShapeI8x16:
110 inst = arm64.VSUB
111 arr = arm64.VectorArrangement16B
112 case wazeroir.ShapeI16x8:
113 inst = arm64.VSUB
114 arr = arm64.VectorArrangement8H
115 case wazeroir.ShapeI32x4:
116 inst = arm64.VSUB
117 arr = arm64.VectorArrangement4S
118 case wazeroir.ShapeI64x2:
119 inst = arm64.VSUB
120 arr = arm64.VectorArrangement2D
121 case wazeroir.ShapeF32x4:
122 inst = arm64.VFSUBS
123 arr = arm64.VectorArrangement4S
124 case wazeroir.ShapeF64x2:
125 inst = arm64.VFSUBD
126 arr = arm64.VectorArrangement2D
127 }
128
129 c.assembler.CompileVectorRegisterToVectorRegister(inst, x2r, x1r, arr,
130 arm64.VectorIndexNone, arm64.VectorIndexNone)
131
132 c.pushVectorRuntimeValueLocationOnRegister(x1r)
133 c.markRegisterUnused(x2r)
134 return
135 }
136
137
138 func (c *arm64Compiler) compileV128Load(o *wazeroir.UnionOperation) (err error) {
139 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
140 return err
141 }
142 result, err := c.allocateRegister(registerTypeVector)
143 if err != nil {
144 return err
145 }
146
147 offset := uint32(o.U2)
148 loadType := wazeroir.V128LoadType(o.B1)
149
150 switch loadType {
151 case wazeroir.V128LoadType128:
152 offset, err := c.compileMemoryAccessOffsetSetup(offset, 16)
153 if err != nil {
154 return err
155 }
156 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
157 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementQ,
158 )
159 case wazeroir.V128LoadType8x8s:
160 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
161 if err != nil {
162 return err
163 }
164 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
165 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
166 )
167 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result,
168 arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone)
169 case wazeroir.V128LoadType8x8u:
170 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
171 if err != nil {
172 return err
173 }
174 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
175 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
176 )
177 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result,
178 arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone)
179 case wazeroir.V128LoadType16x4s:
180 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
181 if err != nil {
182 return err
183 }
184 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
185 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
186 )
187 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result,
188 arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone)
189 case wazeroir.V128LoadType16x4u:
190 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
191 if err != nil {
192 return err
193 }
194 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
195 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
196 )
197 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result,
198 arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone)
199 case wazeroir.V128LoadType32x2s:
200 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
201 if err != nil {
202 return err
203 }
204 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
205 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
206 )
207 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result,
208 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone)
209 case wazeroir.V128LoadType32x2u:
210 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
211 if err != nil {
212 return err
213 }
214 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
215 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
216 )
217 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result,
218 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone)
219 case wazeroir.V128LoadType8Splat:
220 offset, err := c.compileMemoryAccessOffsetSetup(offset, 1)
221 if err != nil {
222 return err
223 }
224 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
225 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement16B)
226 case wazeroir.V128LoadType16Splat:
227 offset, err := c.compileMemoryAccessOffsetSetup(offset, 2)
228 if err != nil {
229 return err
230 }
231 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
232 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement8H)
233 case wazeroir.V128LoadType32Splat:
234 offset, err := c.compileMemoryAccessOffsetSetup(offset, 4)
235 if err != nil {
236 return err
237 }
238 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
239 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement4S)
240 case wazeroir.V128LoadType64Splat:
241 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
242 if err != nil {
243 return err
244 }
245 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
246 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement2D)
247 case wazeroir.V128LoadType32zero:
248 offset, err := c.compileMemoryAccessOffsetSetup(offset, 4)
249 if err != nil {
250 return err
251 }
252 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
253 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementS,
254 )
255 case wazeroir.V128LoadType64zero:
256 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
257 if err != nil {
258 return err
259 }
260 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
261 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
262 )
263 }
264
265 c.pushVectorRuntimeValueLocationOnRegister(result)
266 return
267 }
268
269
270 func (c *arm64Compiler) compileV128LoadLane(o *wazeroir.UnionOperation) (err error) {
271 targetVector := c.locationStack.popV128()
272 if err = c.compileEnsureOnRegister(targetVector); err != nil {
273 return
274 }
275
276 laneSize, laneIndex := o.B1, o.B2
277 offset := uint32(o.U2)
278
279 targetSizeInBytes := int64(laneSize / 8)
280 source, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes)
281 if err != nil {
282 return err
283 }
284
285 var loadInst asm.Instruction
286 var arr arm64.VectorArrangement
287 switch laneSize {
288 case 8:
289 arr = arm64.VectorArrangementB
290 loadInst = arm64.LDRB
291 case 16:
292 arr = arm64.VectorArrangementH
293 loadInst = arm64.LDRH
294 case 32:
295 loadInst = arm64.LDRW
296 arr = arm64.VectorArrangementS
297 case 64:
298 loadInst = arm64.LDRD
299 arr = arm64.VectorArrangementD
300 }
301
302 c.assembler.CompileMemoryWithRegisterOffsetToRegister(loadInst, arm64ReservedRegisterForMemory, source, source)
303 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, source, targetVector.register, arr, arm64.VectorIndex(laneIndex))
304
305 c.pushVectorRuntimeValueLocationOnRegister(targetVector.register)
306 c.locationStack.markRegisterUnused(source)
307 return
308 }
309
310
311 func (c *arm64Compiler) compileV128Store(o *wazeroir.UnionOperation) (err error) {
312 v := c.locationStack.popV128()
313 if err = c.compileEnsureOnRegister(v); err != nil {
314 return
315 }
316
317 const targetSizeInBytes = 16
318 offset := uint32(o.U2)
319 offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes)
320 if err != nil {
321 return err
322 }
323
324 c.assembler.CompileVectorRegisterToMemoryWithRegisterOffset(arm64.VMOV,
325 v.register, arm64ReservedRegisterForMemory, offsetReg, arm64.VectorArrangementQ)
326
327 c.markRegisterUnused(v.register)
328 return
329 }
330
331
332 func (c *arm64Compiler) compileV128StoreLane(o *wazeroir.UnionOperation) (err error) {
333 var arr arm64.VectorArrangement
334 var storeInst asm.Instruction
335 laneSize := o.B1
336 laneIndex := o.B2
337 offset := uint32(o.U2)
338 switch laneSize {
339 case 8:
340 storeInst = arm64.STRB
341 arr = arm64.VectorArrangementB
342 case 16:
343 storeInst = arm64.STRH
344 arr = arm64.VectorArrangementH
345 case 32:
346 storeInst = arm64.STRW
347 arr = arm64.VectorArrangementS
348 case 64:
349 storeInst = arm64.STRD
350 arr = arm64.VectorArrangementD
351 }
352
353 v := c.locationStack.popV128()
354 if err = c.compileEnsureOnRegister(v); err != nil {
355 return
356 }
357
358 targetSizeInBytes := int64(laneSize / 8)
359 offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes)
360 if err != nil {
361 return err
362 }
363
364 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, arm64ReservedRegisterForTemporary, arr,
365 arm64.VectorIndex(laneIndex))
366
367 c.assembler.CompileRegisterToMemoryWithRegisterOffset(storeInst,
368 arm64ReservedRegisterForTemporary, arm64ReservedRegisterForMemory, offsetReg)
369
370 c.locationStack.markRegisterUnused(v.register)
371 return
372 }
373
374
375 func (c *arm64Compiler) compileV128ExtractLane(o *wazeroir.UnionOperation) (err error) {
376 v := c.locationStack.popV128()
377 if err = c.compileEnsureOnRegister(v); err != nil {
378 return
379 }
380
381 shape := o.B1
382 laneIndex := o.B2
383 signed := o.B3
384 switch shape {
385 case wazeroir.ShapeI8x16:
386 result, err := c.allocateRegister(registerTypeGeneralPurpose)
387 if err != nil {
388 return err
389 }
390 var inst asm.Instruction
391 if signed {
392 inst = arm64.SMOV32
393 } else {
394 inst = arm64.UMOV
395 }
396 c.assembler.CompileVectorRegisterToRegister(inst, v.register, result,
397 arm64.VectorArrangementB, arm64.VectorIndex(laneIndex))
398
399 c.locationStack.markRegisterUnused(v.register)
400 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
401 case wazeroir.ShapeI16x8:
402 result, err := c.allocateRegister(registerTypeGeneralPurpose)
403 if err != nil {
404 return err
405 }
406 var inst asm.Instruction
407 if signed {
408 inst = arm64.SMOV32
409 } else {
410 inst = arm64.UMOV
411 }
412 c.assembler.CompileVectorRegisterToRegister(inst, v.register, result,
413 arm64.VectorArrangementH, arm64.VectorIndex(laneIndex))
414
415 c.locationStack.markRegisterUnused(v.register)
416 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
417 case wazeroir.ShapeI32x4:
418 result, err := c.allocateRegister(registerTypeGeneralPurpose)
419 if err != nil {
420 return err
421 }
422 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result,
423 arm64.VectorArrangementS, arm64.VectorIndex(laneIndex))
424
425 c.locationStack.markRegisterUnused(v.register)
426 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
427 case wazeroir.ShapeI64x2:
428 result, err := c.allocateRegister(registerTypeGeneralPurpose)
429 if err != nil {
430 return err
431 }
432 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result,
433 arm64.VectorArrangementD, arm64.VectorIndex(laneIndex))
434
435 c.locationStack.markRegisterUnused(v.register)
436 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64)
437 case wazeroir.ShapeF32x4:
438 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register,
439 arm64.VectorArrangementS, arm64.VectorIndex(laneIndex), 0)
440 c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF32)
441 case wazeroir.ShapeF64x2:
442 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register,
443 arm64.VectorArrangementD, arm64.VectorIndex(laneIndex), 0)
444 c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF64)
445 }
446 return
447 }
448
449
450 func (c *arm64Compiler) compileV128ReplaceLane(o *wazeroir.UnionOperation) (err error) {
451 origin := c.locationStack.pop()
452 if err = c.compileEnsureOnRegister(origin); err != nil {
453 return
454 }
455
456 vector := c.locationStack.popV128()
457 if err = c.compileEnsureOnRegister(vector); err != nil {
458 return
459 }
460
461 shape := o.B1
462 laneIndex := o.B2
463 switch shape {
464 case wazeroir.ShapeI8x16:
465 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
466 arm64.VectorArrangementB, arm64.VectorIndex(laneIndex))
467 case wazeroir.ShapeI16x8:
468 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
469 arm64.VectorArrangementH, arm64.VectorIndex(laneIndex))
470 case wazeroir.ShapeI32x4:
471 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
472 arm64.VectorArrangementS, arm64.VectorIndex(laneIndex))
473 case wazeroir.ShapeI64x2:
474 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
475 arm64.VectorArrangementD, arm64.VectorIndex(laneIndex))
476 case wazeroir.ShapeF32x4:
477 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register,
478 arm64.VectorArrangementS, 0, arm64.VectorIndex(laneIndex))
479 case wazeroir.ShapeF64x2:
480 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register,
481 arm64.VectorArrangementD, 0, arm64.VectorIndex(laneIndex))
482 }
483
484 c.locationStack.markRegisterUnused(origin.register)
485 c.pushVectorRuntimeValueLocationOnRegister(vector.register)
486 return
487 }
488
489
490 func (c *arm64Compiler) compileV128Splat(o *wazeroir.UnionOperation) (err error) {
491 origin := c.locationStack.pop()
492 if err = c.compileEnsureOnRegister(origin); err != nil {
493 return
494 }
495
496 var result asm.Register
497 shape := o.B1
498 switch shape {
499 case wazeroir.ShapeI8x16:
500 result, err = c.allocateRegister(registerTypeVector)
501 if err != nil {
502 return
503 }
504 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
505 arm64.VectorArrangement16B, arm64.VectorIndexNone)
506 case wazeroir.ShapeI16x8:
507 result, err = c.allocateRegister(registerTypeVector)
508 if err != nil {
509 return
510 }
511 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
512 arm64.VectorArrangement8H, arm64.VectorIndexNone)
513 case wazeroir.ShapeI32x4:
514 result, err = c.allocateRegister(registerTypeVector)
515 if err != nil {
516 return
517 }
518 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
519 arm64.VectorArrangement4S, arm64.VectorIndexNone)
520 case wazeroir.ShapeI64x2:
521 result, err = c.allocateRegister(registerTypeVector)
522 if err != nil {
523 return
524 }
525 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
526 arm64.VectorArrangement2D, arm64.VectorIndexNone)
527 case wazeroir.ShapeF32x4:
528 result = origin.register
529 c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result,
530 arm64.VectorArrangementS, 0, arm64.VectorIndexNone)
531 case wazeroir.ShapeF64x2:
532 result = origin.register
533 c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result,
534 arm64.VectorArrangementD, 0, arm64.VectorIndexNone)
535 }
536
537 c.locationStack.markRegisterUnused(origin.register)
538 c.pushVectorRuntimeValueLocationOnRegister(result)
539 return
540 }
541
542 func (c *arm64Compiler) onValueReleaseRegisterToStack(reg asm.Register) {
543 for i := uint64(0); i < c.locationStack.sp; i++ {
544 prevValue := &c.locationStack.stack[i]
545 if prevValue.register == reg {
546 c.compileReleaseRegisterToStack(prevValue)
547 break
548 }
549 }
550 }
551
552
553 func (c *arm64Compiler) compileV128Shuffle(o *wazeroir.UnionOperation) (err error) {
554
555
556 const vReg, wReg = arm64.RegV29, arm64.RegV30
557
558
559 w := c.locationStack.popV128()
560 if w.register != wReg {
561
562 c.onValueReleaseRegisterToStack(wReg)
563
564 if w.onRegister() {
565 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR,
566 w.register, w.register, wReg, arm64.VectorArrangement16B)
567
568 c.markRegisterUnused(w.register)
569 } else {
570 w.setRegister(wReg)
571 c.compileLoadValueOnStackToRegister(w)
572 }
573 }
574
575
576 v := c.locationStack.popV128()
577 if v.register != vReg {
578
579 c.onValueReleaseRegisterToStack(vReg)
580
581 if v.onRegister() {
582 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR,
583 v.register, v.register, vReg, arm64.VectorArrangement16B)
584
585 c.markRegisterUnused(v.register)
586 } else {
587 v.setRegister(vReg)
588 c.compileLoadValueOnStackToRegister(v)
589 }
590 }
591
592 c.locationStack.markRegisterUsed(vReg, wReg)
593 result, err := c.allocateRegister(registerTypeVector)
594 if err != nil {
595 return err
596 }
597
598 lanes := make([]byte, len(o.Us))
599 for i, lane := range o.Us {
600 lanes[i] = byte(lane)
601 }
602 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(lanes), result, arm64.VectorArrangementQ)
603 c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL2, vReg, result, arm64.VectorArrangement16B,
604 arm64.VectorIndexNone, arm64.VectorIndexNone)
605
606 c.locationStack.markRegisterUnused(vReg, wReg)
607 c.pushVectorRuntimeValueLocationOnRegister(result)
608 return
609 }
610
611
612 func (c *arm64Compiler) compileV128Swizzle(*wazeroir.UnionOperation) (err error) {
613 indexVec := c.locationStack.popV128()
614 if err = c.compileEnsureOnRegister(indexVec); err != nil {
615 return
616 }
617 baseVec := c.locationStack.popV128()
618 if err = c.compileEnsureOnRegister(baseVec); err != nil {
619 return
620 }
621
622 c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL1, baseVec.register, indexVec.register,
623 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
624
625 c.markRegisterUnused(baseVec.register)
626 c.pushVectorRuntimeValueLocationOnRegister(indexVec.register)
627 return
628 }
629
630
631 func (c *arm64Compiler) compileV128AnyTrue(*wazeroir.UnionOperation) (err error) {
632 vector := c.locationStack.popV128()
633 if err = c.compileEnsureOnRegister(vector); err != nil {
634 return
635 }
636
637 v := vector.register
638 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMAXP, v, v,
639 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
640 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary,
641 arm64.VectorArrangementD, 0)
642 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary)
643 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE)
644
645 c.locationStack.markRegisterUnused(v)
646 return
647 }
648
649
650 func (c *arm64Compiler) compileV128AllTrue(o *wazeroir.UnionOperation) (err error) {
651 vector := c.locationStack.popV128()
652 if err = c.compileEnsureOnRegister(vector); err != nil {
653 return
654 }
655
656 v := vector.register
657 shape := o.B1
658 if shape == wazeroir.ShapeI64x2 {
659 c.assembler.CompileVectorRegisterToVectorRegister(arm64.CMEQZERO, arm64.RegRZR, v,
660 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
661 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDP, v, v,
662 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
663 c.assembler.CompileTwoRegistersToNone(arm64.FCMPD, v, v)
664 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ)
665 } else {
666 var arr arm64.VectorArrangement
667 switch shape {
668 case wazeroir.ShapeI8x16:
669 arr = arm64.VectorArrangement16B
670 case wazeroir.ShapeI16x8:
671 arr = arm64.VectorArrangement8H
672 case wazeroir.ShapeI32x4:
673 arr = arm64.VectorArrangement4S
674 }
675
676 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMINV, v, v,
677 arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
678 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary,
679 arm64.VectorArrangementD, 0)
680 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary)
681 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE)
682 }
683 c.markRegisterUnused(v)
684 return
685 }
686
687 var (
688 i8x16BitmaskConst = [16]byte{
689 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
690 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
691 }
692 i16x8BitmaskConst = [16]byte{
693 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00,
694 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00,
695 }
696 i32x4BitmaskConst = [16]byte{
697 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
698 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
699 }
700 )
701
702
703 func (c *arm64Compiler) compileV128BitMask(o *wazeroir.UnionOperation) (err error) {
704 vector := c.locationStack.popV128()
705 if err = c.compileEnsureOnRegister(vector); err != nil {
706 return
707 }
708
709 v := vector.register
710
711 result, err := c.allocateRegister(registerTypeGeneralPurpose)
712 if err != nil {
713 return err
714 }
715
716 shape := o.B1
717 switch shape {
718 case wazeroir.ShapeI8x16:
719 vecTmp, err := c.allocateRegister(registerTypeVector)
720 if err != nil {
721 return err
722 }
723
724
725 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement16B, 7)
726
727
728 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i8x16BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ)
729
730
731
732
733
734
735 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B,
736 arm64.VectorIndexNone, arm64.VectorIndexNone)
737
738
739
740
741 c.assembler.CompileTwoVectorRegistersToVectorRegisterWithConst(arm64.EXT, v, v, vecTmp, arm64.VectorArrangement16B, 0x8)
742
743
744 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.ZIP1, vecTmp, v, v, arm64.VectorArrangement16B)
745
746
747 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v,
748 arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone)
749
750
751 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0)
752 case wazeroir.ShapeI16x8:
753 vecTmp, err := c.allocateRegister(registerTypeVector)
754 if err != nil {
755 return err
756 }
757
758
759 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement8H, 15)
760
761
762 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i16x8BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ)
763
764
765
766
767 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B,
768 arm64.VectorIndexNone, arm64.VectorIndexNone)
769
770 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v,
771 arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone)
772
773 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0)
774 case wazeroir.ShapeI32x4:
775 vecTmp, err := c.allocateRegister(registerTypeVector)
776 if err != nil {
777 return err
778 }
779
780
781
782 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement4S, 32)
783
784
785 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV,
786 asm.NewStaticConst(i32x4BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ)
787
788
789
790
791 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B,
792 arm64.VectorIndexNone, arm64.VectorIndexNone)
793
794 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v,
795 arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone)
796
797 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementS, 0)
798 case wazeroir.ShapeI64x2:
799
800 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result,
801 arm64.VectorArrangementD, 0)
802
803 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary,
804 arm64.VectorArrangementD, 1)
805
806
807 c.assembler.CompileConstToRegister(arm64.LSR, 63, result)
808 c.assembler.CompileConstToRegister(arm64.LSR, 63, arm64ReservedRegisterForTemporary)
809
810
811 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
812 arm64ReservedRegisterForTemporary, 1, result, result)
813 }
814
815 c.markRegisterUnused(v)
816 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
817 return
818 }
819
820
821 func (c *arm64Compiler) compileV128And(*wazeroir.UnionOperation) error {
822 return c.compileV128x2BinOp(arm64.VAND, arm64.VectorArrangement16B)
823 }
824
825
826 func (c *arm64Compiler) compileV128Not(*wazeroir.UnionOperation) error {
827 return c.compileV128UniOp(arm64.NOT, arm64.VectorArrangement16B)
828 }
829
830
831 func (c *arm64Compiler) compileV128Or(*wazeroir.UnionOperation) error {
832 return c.compileV128x2BinOp(arm64.VORR, arm64.VectorArrangement16B)
833 }
834
835
836 func (c *arm64Compiler) compileV128Xor(*wazeroir.UnionOperation) error {
837 return c.compileV128x2BinOp(arm64.EOR, arm64.VectorArrangement16B)
838 }
839
840
841 func (c *arm64Compiler) compileV128Bitselect(*wazeroir.UnionOperation) error {
842 selector := c.locationStack.popV128()
843 if err := c.compileEnsureOnRegister(selector); err != nil {
844 return err
845 }
846
847 x2 := c.locationStack.popV128()
848 if err := c.compileEnsureOnRegister(x2); err != nil {
849 return err
850 }
851
852 x1 := c.locationStack.popV128()
853 if err := c.compileEnsureOnRegister(x1); err != nil {
854 return err
855 }
856
857 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL,
858 x2.register, x1.register, selector.register, arm64.VectorArrangement16B)
859
860 c.markRegisterUnused(x1.register, x2.register)
861 c.pushVectorRuntimeValueLocationOnRegister(selector.register)
862 return nil
863 }
864
865
866 func (c *arm64Compiler) compileV128AndNot(*wazeroir.UnionOperation) error {
867 return c.compileV128x2BinOp(arm64.BIC, arm64.VectorArrangement16B)
868 }
869
870 func (c *arm64Compiler) compileV128UniOp(inst asm.Instruction, arr arm64.VectorArrangement) error {
871 v := c.locationStack.popV128()
872 if err := c.compileEnsureOnRegister(v); err != nil {
873 return err
874 }
875
876 c.assembler.CompileVectorRegisterToVectorRegister(inst, v.register, v.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
877
878 c.pushVectorRuntimeValueLocationOnRegister(v.register)
879 return nil
880 }
881
882 func (c *arm64Compiler) compileV128x2BinOp(inst asm.Instruction, arr arm64.VectorArrangement) error {
883 x2 := c.locationStack.popV128()
884 if err := c.compileEnsureOnRegister(x2); err != nil {
885 return err
886 }
887
888 x1 := c.locationStack.popV128()
889 if err := c.compileEnsureOnRegister(x1); err != nil {
890 return err
891 }
892
893 c.assembler.CompileVectorRegisterToVectorRegister(inst, x2.register, x1.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
894
895 c.markRegisterUnused(x2.register)
896 c.pushVectorRuntimeValueLocationOnRegister(x1.register)
897 return nil
898 }
899
900
901 func (c *arm64Compiler) compileV128Shr(o *wazeroir.UnionOperation) error {
902 var inst asm.Instruction
903 shape := o.B1
904 signed := o.B3
905 if signed {
906 inst = arm64.SSHL
907 } else {
908 inst = arm64.USHL
909 }
910 return c.compileV128ShiftImpl(shape, inst, true)
911 }
912
913
914 func (c *arm64Compiler) compileV128Shl(o *wazeroir.UnionOperation) error {
915 return c.compileV128ShiftImpl(o.B1 , arm64.SSHL, false)
916 }
917
918 func (c *arm64Compiler) compileV128ShiftImpl(shape wazeroir.Shape, ins asm.Instruction, rightShift bool) error {
919 s := c.locationStack.pop()
920 if s.register == arm64.RegRZR {
921
922 return nil
923 }
924
925 var modulo asm.ConstantValue
926 var arr arm64.VectorArrangement
927 switch shape {
928 case wazeroir.ShapeI8x16:
929 modulo = 0x7
930 arr = arm64.VectorArrangement16B
931 case wazeroir.ShapeI16x8:
932 modulo = 0xf
933 arr = arm64.VectorArrangement8H
934 case wazeroir.ShapeI32x4:
935 modulo = 0x1f
936 arr = arm64.VectorArrangement4S
937 case wazeroir.ShapeI64x2:
938 modulo = 0x3f
939 arr = arm64.VectorArrangement2D
940 }
941
942 if err := c.compileEnsureOnRegister(s); err != nil {
943 return err
944 }
945
946 v := c.locationStack.popV128()
947 if err := c.compileEnsureOnRegister(v); err != nil {
948 return err
949 }
950
951 tmp, err := c.allocateRegister(registerTypeVector)
952 if err != nil {
953 return err
954 }
955
956 c.assembler.CompileConstToRegister(arm64.ANDIMM32, modulo, s.register)
957
958 if rightShift {
959
960 c.assembler.CompileRegisterToRegister(arm64.NEG, s.register, s.register)
961 }
962
963
964 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, s.register, tmp,
965 arr, arm64.VectorIndexNone)
966
967 c.assembler.CompileVectorRegisterToVectorRegister(ins, tmp, v.register, arr,
968 arm64.VectorIndexNone, arm64.VectorIndexNone)
969
970 c.markRegisterUnused(s.register)
971 c.pushVectorRuntimeValueLocationOnRegister(v.register)
972 return nil
973 }
974
975
976 func (c *arm64Compiler) compileV128Cmp(o *wazeroir.UnionOperation) error {
977 x2 := c.locationStack.popV128()
978 if err := c.compileEnsureOnRegister(x2); err != nil {
979 return err
980 }
981
982 x1 := c.locationStack.popV128()
983 if err := c.compileEnsureOnRegister(x1); err != nil {
984 return err
985 }
986
987 var arr arm64.VectorArrangement
988 v128CmpType := o.B1
989 if v128CmpType <= wazeroir.V128CmpTypeI8x16GeU {
990 arr = arm64.VectorArrangement16B
991 } else if v128CmpType <= wazeroir.V128CmpTypeI16x8GeU {
992 arr = arm64.VectorArrangement8H
993 } else if v128CmpType <= wazeroir.V128CmpTypeI32x4GeU {
994 arr = arm64.VectorArrangement4S
995 } else if v128CmpType <= wazeroir.V128CmpTypeI64x2GeS {
996 arr = arm64.VectorArrangement2D
997 } else if v128CmpType <= wazeroir.V128CmpTypeF32x4Ge {
998 arr = arm64.VectorArrangement4S
999 } else {
1000 arr = arm64.VectorArrangement2D
1001 }
1002
1003 result := x1.register
1004 switch v128CmpType {
1005 case wazeroir.V128CmpTypeI8x16Eq, wazeroir.V128CmpTypeI16x8Eq, wazeroir.V128CmpTypeI32x4Eq, wazeroir.V128CmpTypeI64x2Eq:
1006 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr)
1007 case wazeroir.V128CmpTypeI8x16Ne, wazeroir.V128CmpTypeI16x8Ne, wazeroir.V128CmpTypeI32x4Ne, wazeroir.V128CmpTypeI64x2Ne:
1008 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr)
1009
1010 c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result,
1011 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
1012 case wazeroir.V128CmpTypeI8x16LtS, wazeroir.V128CmpTypeI16x8LtS, wazeroir.V128CmpTypeI32x4LtS, wazeroir.V128CmpTypeI64x2LtS:
1013 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x1.register, x2.register, result, arr)
1014 case wazeroir.V128CmpTypeI8x16LtU, wazeroir.V128CmpTypeI16x8LtU, wazeroir.V128CmpTypeI32x4LtU:
1015 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x1.register, x2.register, result, arr)
1016 case wazeroir.V128CmpTypeI8x16GtS, wazeroir.V128CmpTypeI16x8GtS, wazeroir.V128CmpTypeI32x4GtS, wazeroir.V128CmpTypeI64x2GtS:
1017 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x2.register, x1.register, result, arr)
1018 case wazeroir.V128CmpTypeI8x16GtU, wazeroir.V128CmpTypeI16x8GtU, wazeroir.V128CmpTypeI32x4GtU:
1019 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x2.register, x1.register, result, arr)
1020 case wazeroir.V128CmpTypeI8x16LeS, wazeroir.V128CmpTypeI16x8LeS, wazeroir.V128CmpTypeI32x4LeS, wazeroir.V128CmpTypeI64x2LeS:
1021 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x1.register, x2.register, result, arr)
1022 case wazeroir.V128CmpTypeI8x16LeU, wazeroir.V128CmpTypeI16x8LeU, wazeroir.V128CmpTypeI32x4LeU:
1023 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x1.register, x2.register, result, arr)
1024 case wazeroir.V128CmpTypeI8x16GeS, wazeroir.V128CmpTypeI16x8GeS, wazeroir.V128CmpTypeI32x4GeS, wazeroir.V128CmpTypeI64x2GeS:
1025 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x2.register, x1.register, result, arr)
1026 case wazeroir.V128CmpTypeI8x16GeU, wazeroir.V128CmpTypeI16x8GeU, wazeroir.V128CmpTypeI32x4GeU:
1027 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x2.register, x1.register, result, arr)
1028 case wazeroir.V128CmpTypeF32x4Eq, wazeroir.V128CmpTypeF64x2Eq:
1029 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr)
1030 case wazeroir.V128CmpTypeF32x4Ne, wazeroir.V128CmpTypeF64x2Ne:
1031 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr)
1032
1033 c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result,
1034 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
1035 case wazeroir.V128CmpTypeF32x4Lt, wazeroir.V128CmpTypeF64x2Lt:
1036 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1.register, x2.register, result, arr)
1037 case wazeroir.V128CmpTypeF32x4Le, wazeroir.V128CmpTypeF64x2Le:
1038 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x1.register, x2.register, result, arr)
1039 case wazeroir.V128CmpTypeF32x4Gt, wazeroir.V128CmpTypeF64x2Gt:
1040 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2.register, x1.register, result, arr)
1041 case wazeroir.V128CmpTypeF32x4Ge, wazeroir.V128CmpTypeF64x2Ge:
1042 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x2.register, x1.register, result, arr)
1043 }
1044
1045 c.markRegisterUnused(x2.register)
1046 c.pushVectorRuntimeValueLocationOnRegister(result)
1047 return nil
1048 }
1049
1050
1051 func (c *arm64Compiler) compileV128AddSat(o *wazeroir.UnionOperation) error {
1052 var inst asm.Instruction
1053 shape := o.B1
1054 signed := o.B3
1055 if signed {
1056 inst = arm64.VSQADD
1057 } else {
1058 inst = arm64.VUQADD
1059 }
1060 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
1061 }
1062
1063
1064 func (c *arm64Compiler) compileV128SubSat(o *wazeroir.UnionOperation) error {
1065 var inst asm.Instruction
1066 shape := o.B1
1067 signed := o.B3
1068 if signed {
1069 inst = arm64.VSQSUB
1070 } else {
1071 inst = arm64.VUQSUB
1072 }
1073 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
1074 }
1075
1076
1077 func (c *arm64Compiler) compileV128Mul(o *wazeroir.UnionOperation) (err error) {
1078 shape := o.B1
1079 switch shape {
1080 case wazeroir.ShapeI8x16, wazeroir.ShapeI16x8, wazeroir.ShapeI32x4:
1081 err = c.compileV128x2BinOp(arm64.VMUL, defaultArrangementForShape(shape))
1082 case wazeroir.ShapeF32x4, wazeroir.ShapeF64x2:
1083 err = c.compileV128x2BinOp(arm64.VFMUL, defaultArrangementForShape(shape))
1084 case wazeroir.ShapeI64x2:
1085 x2 := c.locationStack.popV128()
1086 if err = c.compileEnsureOnRegister(x2); err != nil {
1087 return
1088 }
1089
1090 x1 := c.locationStack.popV128()
1091 if err = c.compileEnsureOnRegister(x1); err != nil {
1092 return
1093 }
1094
1095 src1, src2 := x1.register, x2.register
1096
1097 tmp1, err := c.allocateRegister(registerTypeVector)
1098 if err != nil {
1099 return err
1100 }
1101 c.markRegisterUsed(tmp1)
1102
1103 tmp2, err := c.allocateRegister(registerTypeVector)
1104 if err != nil {
1105 return err
1106 }
1107
1108 c.markRegisterUsed(tmp2)
1109
1110 tmp3, err := c.allocateRegister(registerTypeVector)
1111 if err != nil {
1112 return err
1113 }
1114
1115
1116 c.assembler.CompileVectorRegisterToVectorRegister(arm64.REV64, src2, tmp2,
1117 arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone)
1118 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VMUL, src1, tmp2, tmp2, arm64.VectorArrangement4S)
1119
1120 c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src1, tmp1,
1121 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
1122
1123 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VADDP, tmp2, tmp2, arm64.VectorArrangement4S,
1124 arm64.VectorIndexNone, arm64.VectorIndexNone,
1125 )
1126
1127 c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src2, tmp3,
1128 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
1129
1130 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SHLL, tmp2, src1,
1131 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone)
1132
1133 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VUMLAL, tmp3, tmp1, src1, arm64.VectorArrangement2S)
1134
1135 c.markRegisterUnused(src2, tmp1, tmp2)
1136 c.pushVectorRuntimeValueLocationOnRegister(src1)
1137 }
1138 return
1139 }
1140
1141
1142 func (c *arm64Compiler) compileV128Div(o *wazeroir.UnionOperation) error {
1143 var arr arm64.VectorArrangement
1144 var inst asm.Instruction
1145 shape := o.B1
1146 switch shape {
1147 case wazeroir.ShapeF32x4:
1148 arr = arm64.VectorArrangement4S
1149 inst = arm64.VFDIV
1150 case wazeroir.ShapeF64x2:
1151 arr = arm64.VectorArrangement2D
1152 inst = arm64.VFDIV
1153 }
1154 return c.compileV128x2BinOp(inst, arr)
1155 }
1156
1157
1158 func (c *arm64Compiler) compileV128Neg(o *wazeroir.UnionOperation) error {
1159 var inst asm.Instruction
1160 shape := o.B1
1161 if shape <= wazeroir.ShapeI64x2 {
1162 inst = arm64.VNEG
1163 } else {
1164 inst = arm64.VFNEG
1165 }
1166 return c.compileV128UniOp(inst, defaultArrangementForShape(shape))
1167 }
1168
1169
1170 func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.UnionOperation) error {
1171 var arr arm64.VectorArrangement
1172 shape := o.B1
1173 switch shape {
1174 case wazeroir.ShapeF32x4:
1175 arr = arm64.VectorArrangement4S
1176 case wazeroir.ShapeF64x2:
1177 arr = arm64.VectorArrangement2D
1178 }
1179 return c.compileV128UniOp(arm64.VFSQRT, arr)
1180 }
1181
1182
1183 func (c *arm64Compiler) compileV128Abs(o *wazeroir.UnionOperation) error {
1184 var inst asm.Instruction
1185 shape := o.B1
1186 if shape <= wazeroir.ShapeI64x2 {
1187 inst = arm64.VABS
1188 } else {
1189 inst = arm64.VFABS
1190 }
1191 return c.compileV128UniOp(inst, defaultArrangementForShape(shape))
1192 }
1193
1194
1195 func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.UnionOperation) error {
1196 return c.compileV128UniOp(arm64.VCNT, defaultArrangementForShape(o.B1))
1197 }
1198
1199
1200 func (c *arm64Compiler) compileV128Min(o *wazeroir.UnionOperation) error {
1201 var inst asm.Instruction
1202 shape := o.B1
1203 signed := o.B3
1204 if shape <= wazeroir.ShapeI64x2 {
1205 if signed {
1206 inst = arm64.SMIN
1207 } else {
1208 inst = arm64.UMIN
1209 }
1210 } else {
1211 inst = arm64.VFMIN
1212 }
1213 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
1214 }
1215
1216 func defaultArrangementForShape(s wazeroir.Shape) (arr arm64.VectorArrangement) {
1217 switch s {
1218 case wazeroir.ShapeI8x16:
1219 arr = arm64.VectorArrangement16B
1220 case wazeroir.ShapeI16x8:
1221 arr = arm64.VectorArrangement8H
1222 case wazeroir.ShapeI32x4:
1223 arr = arm64.VectorArrangement4S
1224 case wazeroir.ShapeI64x2:
1225 arr = arm64.VectorArrangement2D
1226 case wazeroir.ShapeF32x4:
1227 arr = arm64.VectorArrangement4S
1228 case wazeroir.ShapeF64x2:
1229 arr = arm64.VectorArrangement2D
1230 }
1231 return
1232 }
1233
1234
1235 func (c *arm64Compiler) compileV128Max(o *wazeroir.UnionOperation) error {
1236 var inst asm.Instruction
1237 shape := o.B1
1238 signed := o.B3
1239 if shape <= wazeroir.ShapeI64x2 {
1240 if signed {
1241 inst = arm64.SMAX
1242 } else {
1243 inst = arm64.UMAX
1244 }
1245 } else {
1246 inst = arm64.VFMAX
1247 }
1248 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
1249 }
1250
1251
1252 func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.UnionOperation) error {
1253 return c.compileV128x2BinOp(arm64.URHADD, defaultArrangementForShape(o.B1))
1254 }
1255
1256
1257 func (c *arm64Compiler) compileV128Pmin(o *wazeroir.UnionOperation) error {
1258 return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), false)
1259 }
1260
1261
1262 func (c *arm64Compiler) compileV128Pmax(o *wazeroir.UnionOperation) error {
1263 return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), true)
1264 }
1265
1266
1267 func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error {
1268 x2 := c.locationStack.popV128()
1269 if err := c.compileEnsureOnRegister(x2); err != nil {
1270 return err
1271 }
1272
1273 x1 := c.locationStack.popV128()
1274 if err := c.compileEnsureOnRegister(x1); err != nil {
1275 return err
1276 }
1277
1278 result, err := c.allocateRegister(registerTypeVector)
1279 if err != nil {
1280 return err
1281 }
1282
1283 x1r, x2r := x1.register, x2.register
1284
1285
1286 if max {
1287 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr)
1288 } else {
1289 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr)
1290 }
1291
1292 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B)
1293
1294 c.markRegisterUnused(x1r, x2r)
1295 c.pushVectorRuntimeValueLocationOnRegister(result)
1296 return nil
1297 }
1298
1299
1300 func (c *arm64Compiler) compileV128Ceil(o *wazeroir.UnionOperation) error {
1301 var arr arm64.VectorArrangement
1302 shape := o.B1
1303 switch shape {
1304 case wazeroir.ShapeF32x4:
1305 arr = arm64.VectorArrangement4S
1306 case wazeroir.ShapeF64x2:
1307 arr = arm64.VectorArrangement2D
1308 }
1309 return c.compileV128UniOp(arm64.VFRINTP, arr)
1310 }
1311
1312
1313 func (c *arm64Compiler) compileV128Floor(o *wazeroir.UnionOperation) error {
1314 var arr arm64.VectorArrangement
1315 shape := o.B1
1316 switch shape {
1317 case wazeroir.ShapeF32x4:
1318 arr = arm64.VectorArrangement4S
1319 case wazeroir.ShapeF64x2:
1320 arr = arm64.VectorArrangement2D
1321 }
1322 return c.compileV128UniOp(arm64.VFRINTM, arr)
1323 }
1324
1325
1326 func (c *arm64Compiler) compileV128Trunc(o *wazeroir.UnionOperation) error {
1327 var arr arm64.VectorArrangement
1328 shape := o.B1
1329 switch shape {
1330 case wazeroir.ShapeF32x4:
1331 arr = arm64.VectorArrangement4S
1332 case wazeroir.ShapeF64x2:
1333 arr = arm64.VectorArrangement2D
1334 }
1335 return c.compileV128UniOp(arm64.VFRINTZ, arr)
1336 }
1337
1338
1339 func (c *arm64Compiler) compileV128Nearest(o *wazeroir.UnionOperation) error {
1340 var arr arm64.VectorArrangement
1341 shape := o.B1
1342 switch shape {
1343 case wazeroir.ShapeF32x4:
1344 arr = arm64.VectorArrangement4S
1345 case wazeroir.ShapeF64x2:
1346 arr = arm64.VectorArrangement2D
1347 }
1348 return c.compileV128UniOp(arm64.VFRINTN, arr)
1349 }
1350
1351
1352 func (c *arm64Compiler) compileV128Extend(o *wazeroir.UnionOperation) error {
1353 var inst asm.Instruction
1354 var arr arm64.VectorArrangement
1355 originShape := o.B1
1356 signed := o.B2 == 1
1357 useLow := o.B3
1358 if useLow {
1359 if signed {
1360 inst = arm64.SSHLL
1361 } else {
1362 inst = arm64.USHLL
1363 }
1364
1365 switch originShape {
1366 case wazeroir.ShapeI8x16:
1367 arr = arm64.VectorArrangement8B
1368 case wazeroir.ShapeI16x8:
1369 arr = arm64.VectorArrangement4H
1370 case wazeroir.ShapeI32x4:
1371 arr = arm64.VectorArrangement2S
1372 }
1373 } else {
1374 if signed {
1375 inst = arm64.SSHLL2
1376 } else {
1377 inst = arm64.USHLL2
1378 }
1379 arr = defaultArrangementForShape(originShape)
1380 }
1381
1382 return c.compileV128UniOp(inst, arr)
1383 }
1384
1385
1386 func (c *arm64Compiler) compileV128ExtMul(o *wazeroir.UnionOperation) error {
1387 var inst asm.Instruction
1388 var arr arm64.VectorArrangement
1389 originShape := o.B1
1390 signed := o.B2 == 1
1391 useLow := o.B3
1392 if useLow {
1393 if signed {
1394 inst = arm64.SMULL
1395 } else {
1396 inst = arm64.UMULL
1397 }
1398
1399 switch originShape {
1400 case wazeroir.ShapeI8x16:
1401 arr = arm64.VectorArrangement8B
1402 case wazeroir.ShapeI16x8:
1403 arr = arm64.VectorArrangement4H
1404 case wazeroir.ShapeI32x4:
1405 arr = arm64.VectorArrangement2S
1406 }
1407 } else {
1408 if signed {
1409 inst = arm64.SMULL2
1410 } else {
1411 inst = arm64.UMULL2
1412 }
1413 arr = defaultArrangementForShape(originShape)
1414 }
1415
1416 return c.compileV128x2BinOp(inst, arr)
1417 }
1418
1419
1420 func (c *arm64Compiler) compileV128Q15mulrSatS(*wazeroir.UnionOperation) error {
1421 return c.compileV128x2BinOp(arm64.SQRDMULH, arm64.VectorArrangement8H)
1422 }
1423
1424
1425 func (c *arm64Compiler) compileV128ExtAddPairwise(o *wazeroir.UnionOperation) error {
1426 var inst asm.Instruction
1427 originShape := o.B1
1428 signed := o.B3
1429 if signed {
1430 inst = arm64.SADDLP
1431 } else {
1432 inst = arm64.UADDLP
1433 }
1434 return c.compileV128UniOp(inst, defaultArrangementForShape(originShape))
1435 }
1436
1437
1438 func (c *arm64Compiler) compileV128FloatPromote(*wazeroir.UnionOperation) error {
1439 return c.compileV128UniOp(arm64.FCVTL, arm64.VectorArrangement2S)
1440 }
1441
1442
1443 func (c *arm64Compiler) compileV128FloatDemote(*wazeroir.UnionOperation) error {
1444 return c.compileV128UniOp(arm64.FCVTN, arm64.VectorArrangement2S)
1445 }
1446
1447
1448 func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.UnionOperation) (err error) {
1449 destinationShape := o.B1
1450 signed := o.B3
1451
1452 if destinationShape == wazeroir.ShapeF32x4 {
1453 if signed {
1454 err = c.compileV128UniOp(arm64.VSCVTF, defaultArrangementForShape(destinationShape))
1455 } else {
1456 err = c.compileV128UniOp(arm64.VUCVTF, defaultArrangementForShape(destinationShape))
1457 }
1458 return
1459 } else {
1460 v := c.locationStack.popV128()
1461 if err = c.compileEnsureOnRegister(v); err != nil {
1462 return
1463 }
1464 vr := v.register
1465
1466 var expand, convert asm.Instruction
1467 if signed {
1468 expand, convert = arm64.SSHLL, arm64.VSCVTF
1469 } else {
1470 expand, convert = arm64.USHLL, arm64.VUCVTF
1471 }
1472
1473
1474 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(expand, vr, vr, arm64.VectorArrangement2S, 0)
1475
1476 c.assembler.CompileVectorRegisterToVectorRegister(convert, vr, vr, arm64.VectorArrangement2D,
1477 arm64.VectorIndexNone, arm64.VectorIndexNone)
1478 c.pushVectorRuntimeValueLocationOnRegister(vr)
1479 }
1480 return
1481 }
1482
1483
1484 func (c *arm64Compiler) compileV128Dot(*wazeroir.UnionOperation) error {
1485 x2 := c.locationStack.popV128()
1486 if err := c.compileEnsureOnRegister(x2); err != nil {
1487 return err
1488 }
1489
1490 x1 := c.locationStack.popV128()
1491 if err := c.compileEnsureOnRegister(x1); err != nil {
1492 return err
1493 }
1494
1495 tmp, err := c.allocateRegister(registerTypeVector)
1496 if err != nil {
1497 return err
1498 }
1499
1500 x1r, x2r := x1.register, x2.register
1501
1502
1503 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H)
1504
1505 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H)
1506
1507 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S)
1508
1509 c.markRegisterUnused(x2r)
1510 c.pushVectorRuntimeValueLocationOnRegister(x1r)
1511
1512 return nil
1513 }
1514
1515
1516 func (c *arm64Compiler) compileV128Narrow(o *wazeroir.UnionOperation) error {
1517 x2 := c.locationStack.popV128()
1518 if err := c.compileEnsureOnRegister(x2); err != nil {
1519 return err
1520 }
1521
1522 x1 := c.locationStack.popV128()
1523 if err := c.compileEnsureOnRegister(x1); err != nil {
1524 return err
1525 }
1526
1527 x1r, x2r := x1.register, x2.register
1528
1529 var arr, arr2 arm64.VectorArrangement
1530 originShape := o.B1
1531 signed := o.B3
1532 switch originShape {
1533 case wazeroir.ShapeI16x8:
1534 arr = arm64.VectorArrangement8B
1535 arr2 = arm64.VectorArrangement16B
1536 case wazeroir.ShapeI32x4:
1537 arr = arm64.VectorArrangement4H
1538 arr2 = arm64.VectorArrangement8H
1539 }
1540
1541 var lo, hi asm.Instruction
1542 if signed {
1543 lo, hi = arm64.SQXTN, arm64.SQXTN2
1544 } else {
1545 lo, hi = arm64.SQXTUN, arm64.SQXTUN2
1546 }
1547
1548
1549 c.assembler.CompileVectorRegisterToVectorRegister(lo, x1r, x1r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
1550
1551 c.assembler.CompileVectorRegisterToVectorRegister(hi, x2r, x1r, arr2, arm64.VectorIndexNone, arm64.VectorIndexNone)
1552
1553 c.markRegisterUnused(x2r)
1554 c.pushVectorRuntimeValueLocationOnRegister(x1r)
1555 return nil
1556 }
1557
1558
1559 func (c *arm64Compiler) compileV128ITruncSatFromF(o *wazeroir.UnionOperation) (err error) {
1560 v := c.locationStack.popV128()
1561 if err = c.compileEnsureOnRegister(v); err != nil {
1562 return err
1563 }
1564
1565 originShape := o.B1
1566 signed := o.B3
1567 var cvt asm.Instruction
1568 if signed {
1569 cvt = arm64.VFCVTZS
1570 } else {
1571 cvt = arm64.VFCVTZU
1572 }
1573
1574 c.assembler.CompileVectorRegisterToVectorRegister(cvt, v.register, v.register,
1575 defaultArrangementForShape(originShape), arm64.VectorIndexNone, arm64.VectorIndexNone,
1576 )
1577
1578 if originShape == wazeroir.ShapeF64x2 {
1579 var narrow asm.Instruction
1580 if signed {
1581 narrow = arm64.SQXTN
1582 } else {
1583 narrow = arm64.UQXTN
1584 }
1585 c.assembler.CompileVectorRegisterToVectorRegister(narrow, v.register, v.register,
1586 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone,
1587 )
1588 }
1589
1590 c.pushVectorRuntimeValueLocationOnRegister(v.register)
1591 return
1592 }
1593
View as plain text