1 package arm64
2
3 import (
4 "encoding/binary"
5 "errors"
6 "fmt"
7
8 "github.com/tetratelabs/wazero/internal/asm"
9 )
10
11 type nodeImpl struct {
12
13 jumpTarget *nodeImpl
14
15 next *nodeImpl
16 staticConst *asm.StaticConst
17
18 instruction asm.Instruction
19 types operandTypes
20 srcReg, srcReg2, dstReg, dstReg2 asm.Register
21 srcConst, dstConst asm.ConstantValue
22
23 offsetInBinary asm.NodeOffsetInBinary
24
25
26
27 readInstructionAddressBeforeTargetInstruction asm.Instruction
28
29 vectorArrangement VectorArrangement
30 srcVectorIndex, dstVectorIndex VectorIndex
31 }
32
33
34 func (n *nodeImpl) AssignJumpTarget(target asm.Node) {
35 n.jumpTarget = target.(*nodeImpl)
36 }
37
38
39 func (n *nodeImpl) AssignDestinationConstant(value asm.ConstantValue) {
40 n.dstConst = value
41 }
42
43
44 func (n *nodeImpl) AssignSourceConstant(value asm.ConstantValue) {
45 n.srcConst = value
46 }
47
48
49 func (n *nodeImpl) OffsetInBinary() asm.NodeOffsetInBinary {
50 return n.offsetInBinary
51 }
52
53
54
55
56
57
58
59 func (n *nodeImpl) String() (ret string) {
60 instName := InstructionName(n.instruction)
61 switch n.types {
62 case operandTypesNoneToNone:
63 ret = instName
64 case operandTypesNoneToRegister:
65 ret = fmt.Sprintf("%s %s", instName, RegisterName(n.dstReg))
66 case operandTypesNoneToBranch:
67 ret = fmt.Sprintf("%s {%v}", instName, n.jumpTarget)
68 case operandTypesRegisterToRegister:
69 ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg))
70 case operandTypesLeftShiftedRegisterToRegister:
71 ret = fmt.Sprintf("%s (%s, %s << %d), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), n.srcConst, RegisterName(n.dstReg))
72 case operandTypesTwoRegistersToRegister:
73 ret = fmt.Sprintf("%s (%s, %s), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg))
74 case operandTypesThreeRegistersToRegister:
75 ret = fmt.Sprintf("%s (%s, %s, %s), %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), RegisterName(n.dstReg2))
76 case operandTypesTwoRegistersToNone:
77 ret = fmt.Sprintf("%s (%s, %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2))
78 case operandTypesRegisterAndConstToNone:
79 ret = fmt.Sprintf("%s (%s, 0x%x)", instName, RegisterName(n.srcReg), n.srcConst)
80 case operandTypesRegisterAndConstToRegister:
81 ret = fmt.Sprintf("%s (%s, 0x%x), %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg))
82 case operandTypesRegisterToMemory:
83 if n.dstReg2 != asm.NilRegister {
84 ret = fmt.Sprintf("%s %s, [%s + %s]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), RegisterName(n.dstReg2))
85 } else {
86 ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.dstConst)
87 }
88 case operandTypesMemoryToRegister:
89 if n.srcReg2 != asm.NilRegister {
90 ret = fmt.Sprintf("%s [%s + %s], %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg))
91 } else {
92 ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg))
93 }
94 case operandTypesConstToRegister:
95 ret = fmt.Sprintf("%s 0x%x, %s", instName, n.srcConst, RegisterName(n.dstReg))
96 case operandTypesRegisterToVectorRegister:
97 ret = fmt.Sprintf("%s %s, %s.%s[%d]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement, n.dstVectorIndex)
98 case operandTypesVectorRegisterToRegister:
99 ret = fmt.Sprintf("%s %s.%s[%d], %s", instName, RegisterName(n.srcReg), n.vectorArrangement, n.srcVectorIndex, RegisterName(n.dstReg))
100 case operandTypesVectorRegisterToMemory:
101 if n.dstReg2 != asm.NilRegister {
102 ret = fmt.Sprintf("%s %s.%s, [%s + %s]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), RegisterName(n.dstReg2))
103 } else {
104 ret = fmt.Sprintf("%s %s.%s, [%s + 0x%x]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), n.dstConst)
105 }
106 case operandTypesMemoryToVectorRegister:
107 ret = fmt.Sprintf("%s [%s], %s.%s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement)
108 case operandTypesVectorRegisterToVectorRegister:
109 ret = fmt.Sprintf("%s %[2]s.%[4]s, %[3]s.%[4]s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement)
110 case operandTypesStaticConstToVectorRegister:
111 ret = fmt.Sprintf("%s $%#x %s.%s", instName, n.staticConst.Raw, RegisterName(n.dstReg), n.vectorArrangement)
112 case operandTypesTwoVectorRegistersToVectorRegister:
113 ret = fmt.Sprintf("%s (%s.%[5]s, %[3]s.%[5]s), %[4]s.%[5]s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), n.vectorArrangement)
114 }
115 return
116 }
117
118
119 type operandTypes byte
120
121 const (
122 operandTypesNoneToNone operandTypes = iota
123 operandTypesNoneToRegister
124 operandTypesNoneToBranch
125 operandTypesRegisterToRegister
126 operandTypesLeftShiftedRegisterToRegister
127 operandTypesTwoRegistersToRegister
128 operandTypesThreeRegistersToRegister
129 operandTypesTwoRegistersToNone
130 operandTypesRegisterAndConstToNone
131 operandTypesRegisterAndConstToRegister
132 operandTypesRegisterToMemory
133 operandTypesMemoryToRegister
134 operandTypesConstToRegister
135 operandTypesRegisterToVectorRegister
136 operandTypesVectorRegisterToRegister
137 operandTypesMemoryToVectorRegister
138 operandTypesVectorRegisterToMemory
139 operandTypesVectorRegisterToVectorRegister
140 operandTypesTwoVectorRegistersToVectorRegister
141 operandTypesStaticConstToVectorRegister
142 )
143
144
145 func (o operandTypes) String() (ret string) {
146 switch o {
147 case operandTypesNoneToNone:
148 ret = "NoneToNone"
149 case operandTypesNoneToRegister:
150 ret = "NoneToRegister"
151 case operandTypesNoneToBranch:
152 ret = "NoneToBranch"
153 case operandTypesRegisterToRegister:
154 ret = "RegisterToRegister"
155 case operandTypesLeftShiftedRegisterToRegister:
156 ret = "LeftShiftedRegisterToRegister"
157 case operandTypesTwoRegistersToRegister:
158 ret = "TwoRegistersToRegister"
159 case operandTypesThreeRegistersToRegister:
160 ret = "ThreeRegistersToRegister"
161 case operandTypesTwoRegistersToNone:
162 ret = "TwoRegistersToNone"
163 case operandTypesRegisterAndConstToNone:
164 ret = "RegisterAndConstToNone"
165 case operandTypesRegisterAndConstToRegister:
166 ret = "RegisterAndConstToRegister"
167 case operandTypesRegisterToMemory:
168 ret = "RegisterToMemory"
169 case operandTypesMemoryToRegister:
170 ret = "MemoryToRegister"
171 case operandTypesConstToRegister:
172 ret = "ConstToRegister"
173 case operandTypesRegisterToVectorRegister:
174 ret = "RegisterToVectorRegister"
175 case operandTypesVectorRegisterToRegister:
176 ret = "VectorRegisterToRegister"
177 case operandTypesMemoryToVectorRegister:
178 ret = "MemoryToVectorRegister"
179 case operandTypesVectorRegisterToMemory:
180 ret = "VectorRegisterToMemory"
181 case operandTypesVectorRegisterToVectorRegister:
182 ret = "VectorRegisterToVectorRegister"
183 case operandTypesTwoVectorRegistersToVectorRegister:
184 ret = "TwoVectorRegistersToVectorRegister"
185 case operandTypesStaticConstToVectorRegister:
186 ret = "StaticConstToVectorRegister"
187 }
188 return
189 }
190
191 const (
192 maxSignedInt26 int64 = 1<<25 - 1
193 minSignedInt26 int64 = -(1 << 25)
194
195 maxSignedInt19 int64 = 1<<19 - 1
196 minSignedInt19 int64 = -(1 << 19)
197 )
198
199
200 type AssemblerImpl struct {
201 root *nodeImpl
202 current *nodeImpl
203 asm.BaseAssemblerImpl
204 relativeJumpNodes []*nodeImpl
205 adrInstructionNodes []*nodeImpl
206 nodePool nodePool
207 pool asm.StaticConstPool
208 nodeCount int
209
210
211
212 MaxDisplacementForConstantPool int
213
214 temporaryRegister asm.Register
215 }
216
217 const nodePageSize = 128
218
219 type nodePage = [nodePageSize]nodeImpl
220
221
222
223 type nodePool struct {
224 pages []*nodePage
225 index int
226 }
227
228
229
230 func (n *nodePool) allocNode() *nodeImpl {
231 if n.index == nodePageSize {
232 if len(n.pages) == cap(n.pages) {
233 n.pages = append(n.pages, new(nodePage))
234 } else {
235 i := len(n.pages)
236 n.pages = n.pages[:i+1]
237 if n.pages[i] == nil {
238 n.pages[i] = new(nodePage)
239 }
240 }
241 n.index = 0
242 }
243 ret := &n.pages[len(n.pages)-1][n.index]
244 n.index++
245 return ret
246 }
247
248 func (n *nodePool) reset() {
249 for _, ns := range n.pages {
250 pages := ns[:]
251 for i := range pages {
252 pages[i] = nodeImpl{}
253 }
254 }
255 n.pages = n.pages[:0]
256 n.index = nodePageSize
257 }
258
259 func NewAssembler(temporaryRegister asm.Register) *AssemblerImpl {
260 return &AssemblerImpl{
261 nodePool: nodePool{index: nodePageSize},
262 temporaryRegister: temporaryRegister,
263 pool: asm.NewStaticConstPool(),
264 MaxDisplacementForConstantPool: defaultMaxDisplacementForConstPool,
265 }
266 }
267
268
269 func (a *AssemblerImpl) AllocateNOP() asm.Node {
270 n := a.nodePool.allocNode()
271 n.instruction = NOP
272 n.types = operandTypesNoneToNone
273 return n
274 }
275
276
277 func (a *AssemblerImpl) Add(n asm.Node) {
278 a.addNode(n.(*nodeImpl))
279 }
280
281
282 func (a *AssemblerImpl) Reset() {
283 pool := a.pool
284 pool.Reset()
285 *a = AssemblerImpl{
286 nodePool: a.nodePool,
287 pool: pool,
288 temporaryRegister: a.temporaryRegister,
289 adrInstructionNodes: a.adrInstructionNodes[:0],
290 relativeJumpNodes: a.relativeJumpNodes[:0],
291 BaseAssemblerImpl: asm.BaseAssemblerImpl{
292 SetBranchTargetOnNextNodes: a.SetBranchTargetOnNextNodes[:0],
293 JumpTableEntries: a.JumpTableEntries[:0],
294 },
295 }
296 a.nodePool.reset()
297 }
298
299
300 func (a *AssemblerImpl) newNode(instruction asm.Instruction, types operandTypes) *nodeImpl {
301 n := a.nodePool.allocNode()
302 n.instruction = instruction
303 n.types = types
304
305 a.addNode(n)
306 return n
307 }
308
309
310 func (a *AssemblerImpl) addNode(node *nodeImpl) {
311 a.nodeCount++
312
313 if a.root == nil {
314 a.root = node
315 a.current = node
316 } else {
317 parent := a.current
318 parent.next = node
319 a.current = node
320 }
321
322 for _, o := range a.SetBranchTargetOnNextNodes {
323 origin := o.(*nodeImpl)
324 origin.jumpTarget = node
325 }
326
327 a.SetBranchTargetOnNextNodes = a.SetBranchTargetOnNextNodes[:0]
328 }
329
330
331 func (a *AssemblerImpl) Assemble(buf asm.Buffer) error {
332
333
334
335 buf.Grow(a.nodeCount * 8)
336
337 for n := a.root; n != nil; n = n.next {
338 n.offsetInBinary = uint64(buf.Len())
339 if err := a.encodeNode(buf, n); err != nil {
340 return err
341 }
342 a.maybeFlushConstPool(buf, n.next == nil)
343 }
344
345 code := buf.Bytes()
346
347 if err := a.FinalizeJumpTableEntry(code); err != nil {
348 return err
349 }
350
351 for _, rel := range a.relativeJumpNodes {
352 if err := a.relativeBranchFinalize(code, rel); err != nil {
353 return err
354 }
355 }
356
357 for _, adr := range a.adrInstructionNodes {
358 if err := a.finalizeADRInstructionNode(code, adr); err != nil {
359 return err
360 }
361 }
362 return nil
363 }
364
365 const defaultMaxDisplacementForConstPool = (1 << 20) - 1 - 4
366
367
368 func (a *AssemblerImpl) maybeFlushConstPool(buf asm.Buffer, endOfBinary bool) {
369 if a.pool.Empty() {
370 return
371 }
372
373
374
375 if endOfBinary ||
376
377
378
379 (buf.Len()+a.pool.PoolSizeInBytes-int(a.pool.FirstUseOffsetInBinary)) >= a.MaxDisplacementForConstantPool {
380
381
382
383 skipOffset := a.pool.PoolSizeInBytes/4 + 1
384 if a.pool.PoolSizeInBytes%4 != 0 {
385 skipOffset++
386 }
387 if endOfBinary {
388
389
390 skipOffset = 0
391 }
392
393 buf.Append4Bytes(
394 byte(skipOffset),
395 byte(skipOffset>>8),
396 byte(skipOffset>>16),
397 0x14,
398 )
399
400
401 for _, c := range a.pool.Consts {
402 c.SetOffsetInBinary(uint64(buf.Len()))
403 buf.AppendBytes(c.Raw)
404 }
405
406
407 if pad := buf.Len() % 4; pad != 0 {
408 buf.AppendBytes(make([]byte, 4-pad))
409 }
410
411
412 a.pool.Reset()
413 }
414 }
415
416
417 func (a *AssemblerImpl) encodeNode(buf asm.Buffer, n *nodeImpl) (err error) {
418 switch n.types {
419 case operandTypesNoneToNone:
420 err = a.encodeNoneToNone(buf, n)
421 case operandTypesNoneToRegister:
422 err = a.encodeJumpToRegister(buf, n)
423 case operandTypesNoneToBranch:
424 err = a.encodeRelativeBranch(buf, n)
425 case operandTypesRegisterToRegister:
426 err = a.encodeRegisterToRegister(buf, n)
427 case operandTypesLeftShiftedRegisterToRegister:
428 err = a.encodeLeftShiftedRegisterToRegister(buf, n)
429 case operandTypesTwoRegistersToRegister:
430 err = a.encodeTwoRegistersToRegister(buf, n)
431 case operandTypesThreeRegistersToRegister:
432 err = a.encodeThreeRegistersToRegister(buf, n)
433 case operandTypesTwoRegistersToNone:
434 err = a.encodeTwoRegistersToNone(buf, n)
435 case operandTypesRegisterAndConstToNone:
436 err = a.encodeRegisterAndConstToNone(buf, n)
437 case operandTypesRegisterToMemory:
438 err = a.encodeRegisterToMemory(buf, n)
439 case operandTypesMemoryToRegister:
440 err = a.encodeMemoryToRegister(buf, n)
441 case operandTypesRegisterAndConstToRegister, operandTypesConstToRegister:
442 err = a.encodeConstToRegister(buf, n)
443 case operandTypesRegisterToVectorRegister:
444 err = a.encodeRegisterToVectorRegister(buf, n)
445 case operandTypesVectorRegisterToRegister:
446 err = a.encodeVectorRegisterToRegister(buf, n)
447 case operandTypesMemoryToVectorRegister:
448 err = a.encodeMemoryToVectorRegister(buf, n)
449 case operandTypesVectorRegisterToMemory:
450 err = a.encodeVectorRegisterToMemory(buf, n)
451 case operandTypesVectorRegisterToVectorRegister:
452 err = a.encodeVectorRegisterToVectorRegister(buf, n)
453 case operandTypesStaticConstToVectorRegister:
454 err = a.encodeStaticConstToVectorRegister(buf, n)
455 case operandTypesTwoVectorRegistersToVectorRegister:
456 err = a.encodeTwoVectorRegistersToVectorRegister(buf, n)
457 default:
458 err = fmt.Errorf("encoder undefined for [%s] operand type", n.types)
459 }
460 if err != nil {
461 err = fmt.Errorf("%w: %s", err, n)
462 }
463 return
464 }
465
466
467 func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node {
468 return a.newNode(instruction, operandTypesNoneToNone)
469 }
470
471
472 func (a *AssemblerImpl) CompileConstToRegister(
473 instruction asm.Instruction,
474 value asm.ConstantValue,
475 destinationReg asm.Register,
476 ) (inst asm.Node) {
477 n := a.newNode(instruction, operandTypesConstToRegister)
478 n.srcConst = value
479 n.dstReg = destinationReg
480 return n
481 }
482
483
484 func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) {
485 n := a.newNode(instruction, operandTypesRegisterToRegister)
486 n.srcReg = from
487 n.dstReg = to
488 }
489
490
491 func (a *AssemblerImpl) CompileMemoryToRegister(
492 instruction asm.Instruction,
493 sourceBaseReg asm.Register,
494 sourceOffsetConst asm.ConstantValue,
495 destinationReg asm.Register,
496 ) {
497 n := a.newNode(instruction, operandTypesMemoryToRegister)
498 n.srcReg = sourceBaseReg
499 n.srcConst = sourceOffsetConst
500 n.dstReg = destinationReg
501 }
502
503
504 func (a *AssemblerImpl) CompileRegisterToMemory(
505 instruction asm.Instruction,
506 sourceRegister, destinationBaseRegister asm.Register,
507 destinationOffsetConst asm.ConstantValue,
508 ) {
509 n := a.newNode(instruction, operandTypesRegisterToMemory)
510 n.srcReg = sourceRegister
511 n.dstReg = destinationBaseRegister
512 n.dstConst = destinationOffsetConst
513 }
514
515
516 func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node {
517 return a.newNode(jmpInstruction, operandTypesNoneToBranch)
518 }
519
520
521 func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) {
522 n := a.newNode(jmpInstruction, operandTypesNoneToRegister)
523 n.dstReg = reg
524 }
525
526
527 func (a *AssemblerImpl) CompileReadInstructionAddress(
528 destinationRegister asm.Register,
529 beforeAcquisitionTargetInstruction asm.Instruction,
530 ) {
531 n := a.newNode(ADR, operandTypesMemoryToRegister)
532 n.dstReg = destinationRegister
533 n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction
534 }
535
536
537 func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToRegister(
538 instruction asm.Instruction,
539 srcBaseReg, srcOffsetReg, dstReg asm.Register,
540 ) {
541 n := a.newNode(instruction, operandTypesMemoryToRegister)
542 n.dstReg = dstReg
543 n.srcReg = srcBaseReg
544 n.srcReg2 = srcOffsetReg
545 }
546
547
548 func (a *AssemblerImpl) CompileRegisterToMemoryWithRegisterOffset(
549 instruction asm.Instruction,
550 srcReg, dstBaseReg, dstOffsetReg asm.Register,
551 ) {
552 n := a.newNode(instruction, operandTypesRegisterToMemory)
553 n.srcReg = srcReg
554 n.dstReg = dstBaseReg
555 n.dstReg2 = dstOffsetReg
556 }
557
558
559 func (a *AssemblerImpl) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) {
560 n := a.newNode(instruction, operandTypesTwoRegistersToRegister)
561 n.srcReg = src1
562 n.srcReg2 = src2
563 n.dstReg = dst
564 }
565
566
567 func (a *AssemblerImpl) CompileThreeRegistersToRegister(
568 instruction asm.Instruction,
569 src1, src2, src3, dst asm.Register,
570 ) {
571 n := a.newNode(instruction, operandTypesThreeRegistersToRegister)
572 n.srcReg = src1
573 n.srcReg2 = src2
574 n.dstReg = src3
575 n.dstReg2 = dst
576 }
577
578
579 func (a *AssemblerImpl) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) {
580 n := a.newNode(instruction, operandTypesTwoRegistersToNone)
581 n.srcReg = src1
582 n.srcReg2 = src2
583 }
584
585
586 func (a *AssemblerImpl) CompileRegisterAndConstToNone(
587 instruction asm.Instruction,
588 src asm.Register,
589 srcConst asm.ConstantValue,
590 ) {
591 n := a.newNode(instruction, operandTypesRegisterAndConstToNone)
592 n.srcReg = src
593 n.srcConst = srcConst
594 }
595
596
597 func (a *AssemblerImpl) CompileRegisterAndConstToRegister(
598 instruction asm.Instruction,
599 src asm.Register,
600 srcConst asm.ConstantValue,
601 dst asm.Register,
602 ) {
603 n := a.newNode(instruction, operandTypesRegisterAndConstToRegister)
604 n.srcReg = src
605 n.srcConst = srcConst
606 n.dstReg = dst
607 }
608
609
610 func (a *AssemblerImpl) CompileLeftShiftedRegisterToRegister(
611 instruction asm.Instruction,
612 shiftedSourceReg asm.Register,
613 shiftNum asm.ConstantValue,
614 srcReg, dstReg asm.Register,
615 ) {
616 n := a.newNode(instruction, operandTypesLeftShiftedRegisterToRegister)
617 n.srcReg = srcReg
618 n.srcReg2 = shiftedSourceReg
619 n.srcConst = shiftNum
620 n.dstReg = dstReg
621 }
622
623
624 func (a *AssemblerImpl) CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) {
625 n := a.newNode(CSET, operandTypesRegisterToRegister)
626 n.srcReg = conditionalRegisterStateToRegister(cond)
627 n.dstReg = dstReg
628 }
629
630
631 func (a *AssemblerImpl) CompileMemoryToVectorRegister(
632 instruction asm.Instruction, srcBaseReg asm.Register, dstOffset asm.ConstantValue, dstReg asm.Register, arrangement VectorArrangement,
633 ) {
634 n := a.newNode(instruction, operandTypesMemoryToVectorRegister)
635 n.srcReg = srcBaseReg
636 n.srcConst = dstOffset
637 n.dstReg = dstReg
638 n.vectorArrangement = arrangement
639 }
640
641
642 func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToVectorRegister(instruction asm.Instruction,
643 srcBaseReg, srcOffsetRegister asm.Register, dstReg asm.Register, arrangement VectorArrangement,
644 ) {
645 n := a.newNode(instruction, operandTypesMemoryToVectorRegister)
646 n.srcReg = srcBaseReg
647 n.srcReg2 = srcOffsetRegister
648 n.dstReg = dstReg
649 n.vectorArrangement = arrangement
650 }
651
652
653 func (a *AssemblerImpl) CompileVectorRegisterToMemory(
654 instruction asm.Instruction, srcReg, dstBaseReg asm.Register, dstOffset asm.ConstantValue, arrangement VectorArrangement,
655 ) {
656 n := a.newNode(instruction, operandTypesVectorRegisterToMemory)
657 n.srcReg = srcReg
658 n.dstReg = dstBaseReg
659 n.dstConst = dstOffset
660 n.vectorArrangement = arrangement
661 }
662
663
664 func (a *AssemblerImpl) CompileVectorRegisterToMemoryWithRegisterOffset(instruction asm.Instruction,
665 srcReg, dstBaseReg, dstOffsetRegister asm.Register, arrangement VectorArrangement,
666 ) {
667 n := a.newNode(instruction, operandTypesVectorRegisterToMemory)
668 n.srcReg = srcReg
669 n.dstReg = dstBaseReg
670 n.dstReg2 = dstOffsetRegister
671 n.vectorArrangement = arrangement
672 }
673
674
675 func (a *AssemblerImpl) CompileRegisterToVectorRegister(
676 instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, index VectorIndex,
677 ) {
678 n := a.newNode(instruction, operandTypesRegisterToVectorRegister)
679 n.srcReg = srcReg
680 n.dstReg = dstReg
681 n.vectorArrangement = arrangement
682 n.dstVectorIndex = index
683 }
684
685
686 func (a *AssemblerImpl) CompileVectorRegisterToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register,
687 arrangement VectorArrangement, index VectorIndex,
688 ) {
689 n := a.newNode(instruction, operandTypesVectorRegisterToRegister)
690 n.srcReg = srcReg
691 n.dstReg = dstReg
692 n.vectorArrangement = arrangement
693 n.srcVectorIndex = index
694 }
695
696
697 func (a *AssemblerImpl) CompileVectorRegisterToVectorRegister(
698 instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, srcIndex, dstIndex VectorIndex,
699 ) {
700 n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister)
701 n.srcReg = srcReg
702 n.dstReg = dstReg
703 n.vectorArrangement = arrangement
704 n.srcVectorIndex = srcIndex
705 n.dstVectorIndex = dstIndex
706 }
707
708
709 func (a *AssemblerImpl) CompileVectorRegisterToVectorRegisterWithConst(instruction asm.Instruction,
710 srcReg, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue,
711 ) {
712 n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister)
713 n.srcReg = srcReg
714 n.srcConst = c
715 n.dstReg = dstReg
716 n.vectorArrangement = arrangement
717 }
718
719
720 func (a *AssemblerImpl) CompileStaticConstToRegister(instruction asm.Instruction, c *asm.StaticConst, dstReg asm.Register) {
721 n := a.newNode(instruction, operandTypesMemoryToRegister)
722 n.staticConst = c
723 n.dstReg = dstReg
724 }
725
726
727 func (a *AssemblerImpl) CompileStaticConstToVectorRegister(instruction asm.Instruction,
728 c *asm.StaticConst, dstReg asm.Register, arrangement VectorArrangement,
729 ) {
730 n := a.newNode(instruction, operandTypesStaticConstToVectorRegister)
731 n.staticConst = c
732 n.dstReg = dstReg
733 n.vectorArrangement = arrangement
734 }
735
736
737 func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegister(instruction asm.Instruction, srcReg, srcReg2, dstReg asm.Register,
738 arrangement VectorArrangement,
739 ) {
740 n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister)
741 n.srcReg = srcReg
742 n.srcReg2 = srcReg2
743 n.dstReg = dstReg
744 n.vectorArrangement = arrangement
745 }
746
747
748 func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegisterWithConst(instruction asm.Instruction,
749 srcReg, srcReg2, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue,
750 ) {
751 n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister)
752 n.srcReg = srcReg
753 n.srcReg2 = srcReg2
754 n.srcConst = c
755 n.dstReg = dstReg
756 n.vectorArrangement = arrangement
757 }
758
759 func errorEncodingUnsupported(n *nodeImpl) error {
760 return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.instruction), n.types)
761 }
762
763 func (a *AssemblerImpl) encodeNoneToNone(buf asm.Buffer, n *nodeImpl) error {
764 switch n.instruction {
765 case UDF:
766 buf.Append4Bytes(0, 0, 0, 0)
767 return nil
768 case NOP:
769 return nil
770 default:
771 return errorEncodingUnsupported(n)
772 }
773 }
774
775 func (a *AssemblerImpl) encodeJumpToRegister(buf asm.Buffer, n *nodeImpl) error {
776
777 var opc byte
778 switch n.instruction {
779 case RET:
780 opc = 0b0010
781 case B:
782 opc = 0b0000
783 default:
784 return errorEncodingUnsupported(n)
785 }
786
787 regBits, err := intRegisterBits(n.dstReg)
788 if err != nil {
789 return fmt.Errorf("invalid destination register: %w", err)
790 }
791
792 buf.Append4Bytes(
793 0x00|(regBits<<5),
794 0x00|(regBits>>3),
795 0b000_11111|(opc<<5),
796 0b1101011_0|(opc>>3),
797 )
798 return err
799 }
800
801 func (a *AssemblerImpl) relativeBranchFinalize(code []byte, n *nodeImpl) error {
802 var condBits byte
803 const condBitsUnconditional = 0xff
804
805
806 switch n.instruction {
807 case B:
808 condBits = condBitsUnconditional
809 case BCONDEQ:
810 condBits = 0b0000
811 case BCONDGE:
812 condBits = 0b1010
813 case BCONDGT:
814 condBits = 0b1100
815 case BCONDHI:
816 condBits = 0b1000
817 case BCONDHS:
818 condBits = 0b0010
819 case BCONDLE:
820 condBits = 0b1101
821 case BCONDLO:
822 condBits = 0b0011
823 case BCONDLS:
824 condBits = 0b1001
825 case BCONDLT:
826 condBits = 0b1011
827 case BCONDMI:
828 condBits = 0b0100
829 case BCONDPL:
830 condBits = 0b0101
831 case BCONDNE:
832 condBits = 0b0001
833 case BCONDVS:
834 condBits = 0b0110
835 case BCONDVC:
836 condBits = 0b0111
837 }
838
839 branchInstOffset := int64(n.OffsetInBinary())
840 offset := int64(n.jumpTarget.OffsetInBinary()) - branchInstOffset
841 if offset%4 != 0 {
842 return errors.New("BUG: relative jump offset must be 4 bytes aligned")
843 }
844
845 branchInst := code[branchInstOffset : branchInstOffset+4]
846 if condBits == condBitsUnconditional {
847 imm26 := offset >> 2
848 if imm26 < minSignedInt26 || imm26 > maxSignedInt26 {
849
850
851
852 return fmt.Errorf("relative jump offset %d/4 must be within %d and %d", offset, minSignedInt26, maxSignedInt26)
853 }
854
855 branchInst[0] = byte(imm26)
856 branchInst[1] = byte(imm26 >> 8)
857 branchInst[2] = byte(imm26 >> 16)
858 branchInst[3] = (byte(imm26 >> 24 & 0b000000_11)) | 0b000101_00
859 } else {
860 imm19 := offset >> 2
861 if imm19 < minSignedInt19 || imm19 > maxSignedInt19 {
862
863
864 return fmt.Errorf("BUG: relative jump offset %d/4(=%d) must be within %d and %d", offset, imm19, minSignedInt19, maxSignedInt19)
865 }
866
867 branchInst[0] = (byte(imm19<<5) & 0b111_0_0000) | condBits
868 branchInst[1] = byte(imm19 >> 3)
869 branchInst[2] = byte(imm19 >> 11)
870 branchInst[3] = 0b01010100
871 }
872 return nil
873 }
874
875 func (a *AssemblerImpl) encodeRelativeBranch(buf asm.Buffer, n *nodeImpl) error {
876 switch n.instruction {
877 case B, BCONDEQ, BCONDGE, BCONDGT, BCONDHI, BCONDHS, BCONDLE, BCONDLO, BCONDLS, BCONDLT, BCONDMI, BCONDNE, BCONDVS, BCONDVC, BCONDPL:
878 default:
879 return errorEncodingUnsupported(n)
880 }
881
882 if n.jumpTarget == nil {
883 return fmt.Errorf("branch target must be set for %s", InstructionName(n.instruction))
884 }
885
886
887 buf.Append4Bytes(0, 0, 0, 0)
888 a.relativeJumpNodes = append(a.relativeJumpNodes, n)
889 return nil
890 }
891
892 func checkRegisterToRegisterType(src, dst asm.Register, requireSrcInt, requireDstInt bool) (err error) {
893 isSrcInt, isDstInt := isIntRegister(src), isIntRegister(dst)
894 if isSrcInt && !requireSrcInt {
895 err = fmt.Errorf("src requires float register but got %s", RegisterName(src))
896 } else if !isSrcInt && requireSrcInt {
897 err = fmt.Errorf("src requires int register but got %s", RegisterName(src))
898 } else if isDstInt && !requireDstInt {
899 err = fmt.Errorf("dst requires float register but got %s", RegisterName(dst))
900 } else if !isDstInt && requireDstInt {
901 err = fmt.Errorf("dst requires int register but got %s", RegisterName(dst))
902 }
903 return
904 }
905
906 func (a *AssemblerImpl) encodeRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
907 switch inst := n.instruction; inst {
908 case ADD, ADDW, SUB:
909 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
910 return
911 }
912
913
914 var sfops byte
915 switch inst {
916 case ADD:
917 sfops = 0b100
918 case ADDW:
919 case SUB:
920 sfops = 0b110
921 }
922
923 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
924 buf.Append4Bytes(
925 (dstRegBits<<5)|dstRegBits,
926 dstRegBits>>3,
927 srcRegBits,
928 (sfops<<5)|0b01011,
929 )
930 case CLZ, CLZW, RBIT, RBITW:
931 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
932 return
933 }
934
935 var sf, opcode byte
936 switch inst {
937 case CLZ:
938
939 sf, opcode = 0b1, 0b000_100
940 case CLZW:
941
942 sf, opcode = 0b0, 0b000_100
943 case RBIT:
944
945 sf, opcode = 0b1, 0b000_000
946 case RBITW:
947
948 sf, opcode = 0b0, 0b000_000
949 }
950 if inst == CLZ {
951 sf = 1
952 }
953
954 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
955 buf.Append4Bytes(
956 (srcRegBits<<5)|dstRegBits,
957 opcode<<2|(srcRegBits>>3),
958 0b110_00000,
959 (sf<<7)|0b0_1011010,
960 )
961 case CSET:
962 if !isConditionalRegister(n.srcReg) {
963 return fmt.Errorf("CSET requires conditional register but got %s", RegisterName(n.srcReg))
964 }
965
966 dstRegBits, err := intRegisterBits(n.dstReg)
967 if err != nil {
968 return err
969 }
970
971
972
973
974
975 var conditionalBits byte
976 switch n.srcReg {
977 case RegCondEQ:
978 conditionalBits = 0b0001
979 case RegCondNE:
980 conditionalBits = 0b0000
981 case RegCondHS:
982 conditionalBits = 0b0011
983 case RegCondLO:
984 conditionalBits = 0b0010
985 case RegCondMI:
986 conditionalBits = 0b0101
987 case RegCondPL:
988 conditionalBits = 0b0100
989 case RegCondVS:
990 conditionalBits = 0b0111
991 case RegCondVC:
992 conditionalBits = 0b0110
993 case RegCondHI:
994 conditionalBits = 0b1001
995 case RegCondLS:
996 conditionalBits = 0b1000
997 case RegCondGE:
998 conditionalBits = 0b1011
999 case RegCondLT:
1000 conditionalBits = 0b1010
1001 case RegCondGT:
1002 conditionalBits = 0b1101
1003 case RegCondLE:
1004 conditionalBits = 0b1100
1005 case RegCondAL:
1006 conditionalBits = 0b1111
1007 case RegCondNV:
1008 conditionalBits = 0b1110
1009 }
1010
1011
1012 buf.Append4Bytes(
1013 0b111_00000|dstRegBits,
1014 (conditionalBits<<4)|0b0000_0111,
1015 0b100_11111,
1016 0b10011010,
1017 )
1018
1019 case FABSD, FABSS, FNEGD, FNEGS, FSQRTD, FSQRTS, FCVTSD, FCVTDS, FRINTMD, FRINTMS,
1020 FRINTND, FRINTNS, FRINTPD, FRINTPS, FRINTZD, FRINTZS:
1021 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil {
1022 return
1023 }
1024
1025 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1026
1027
1028 var tp, opcode byte
1029 switch inst {
1030 case FABSD:
1031 opcode, tp = 0b000001, 0b01
1032 case FABSS:
1033 opcode, tp = 0b000001, 0b00
1034 case FNEGD:
1035 opcode, tp = 0b000010, 0b01
1036 case FNEGS:
1037 opcode, tp = 0b000010, 0b00
1038 case FSQRTD:
1039 opcode, tp = 0b000011, 0b01
1040 case FSQRTS:
1041 opcode, tp = 0b000011, 0b00
1042 case FCVTSD:
1043 opcode, tp = 0b000101, 0b00
1044 case FCVTDS:
1045 opcode, tp = 0b000100, 0b01
1046 case FRINTMD:
1047 opcode, tp = 0b001010, 0b01
1048 case FRINTMS:
1049 opcode, tp = 0b001010, 0b00
1050 case FRINTND:
1051 opcode, tp = 0b001000, 0b01
1052 case FRINTNS:
1053 opcode, tp = 0b001000, 0b00
1054 case FRINTPD:
1055 opcode, tp = 0b001001, 0b01
1056 case FRINTPS:
1057 opcode, tp = 0b001001, 0b00
1058 case FRINTZD:
1059 opcode, tp = 0b001011, 0b01
1060 case FRINTZS:
1061 opcode, tp = 0b001011, 0b00
1062 }
1063 buf.Append4Bytes(
1064 (srcRegBits<<5)|dstRegBits,
1065 (opcode<<7)|0b0_10000_00|(srcRegBits>>3),
1066 tp<<6|0b00_1_00000|opcode>>1,
1067 0b0_00_11110,
1068 )
1069
1070 case FADDD, FADDS, FDIVS, FDIVD, FMAXD, FMAXS, FMIND, FMINS, FMULS, FMULD:
1071 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil {
1072 return
1073 }
1074
1075 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1076
1077
1078
1079 var tp, opcode byte
1080 switch inst {
1081 case FADDD:
1082 opcode, tp = 0b0010, 0b01
1083 case FADDS:
1084 opcode, tp = 0b0010, 0b00
1085 case FDIVD:
1086 opcode, tp = 0b0001, 0b01
1087 case FDIVS:
1088 opcode, tp = 0b0001, 0b00
1089 case FMAXD:
1090 opcode, tp = 0b0100, 0b01
1091 case FMAXS:
1092 opcode, tp = 0b0100, 0b00
1093 case FMIND:
1094 opcode, tp = 0b0101, 0b01
1095 case FMINS:
1096 opcode, tp = 0b0101, 0b00
1097 case FMULS:
1098 opcode, tp = 0b0000, 0b00
1099 case FMULD:
1100 opcode, tp = 0b0000, 0b01
1101 }
1102
1103 buf.Append4Bytes(
1104 (dstRegBits<<5)|dstRegBits,
1105 opcode<<4|0b0000_10_00|(dstRegBits>>3),
1106 tp<<6|0b00_1_00000|srcRegBits,
1107 0b0001_1110,
1108 )
1109
1110 case FCVTZSD, FCVTZSDW, FCVTZSS, FCVTZSSW, FCVTZUD, FCVTZUDW, FCVTZUS, FCVTZUSW:
1111 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, true); err != nil {
1112 return
1113 }
1114
1115 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1116
1117
1118
1119 var sf, tp, opcode byte
1120 switch inst {
1121 case FCVTZSD:
1122 sf, tp, opcode = 0b1, 0b01, 0b000
1123 case FCVTZSDW:
1124 sf, tp, opcode = 0b0, 0b01, 0b000
1125 case FCVTZSS:
1126 sf, tp, opcode = 0b1, 0b00, 0b000
1127 case FCVTZSSW:
1128 sf, tp, opcode = 0b0, 0b00, 0b000
1129 case FCVTZUD:
1130 sf, tp, opcode = 0b1, 0b01, 0b001
1131 case FCVTZUDW:
1132 sf, tp, opcode = 0b0, 0b01, 0b001
1133 case FCVTZUS:
1134 sf, tp, opcode = 0b1, 0b00, 0b001
1135 case FCVTZUSW:
1136 sf, tp, opcode = 0b0, 0b00, 0b001
1137 }
1138
1139 buf.Append4Bytes(
1140 (srcRegBits<<5)|dstRegBits,
1141 0|(srcRegBits>>3),
1142 tp<<6|0b00_1_11_000|opcode,
1143 sf<<7|0b0_0_0_11110,
1144 )
1145
1146 case FMOVD, FMOVS:
1147 isSrcInt, isDstInt := isIntRegister(n.srcReg), isIntRegister(n.dstReg)
1148 if isSrcInt && isDstInt {
1149 return errors.New("FMOV needs at least one of operands to be integer")
1150 }
1151
1152 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1153
1154 if !isSrcInt && !isDstInt {
1155 var tp byte
1156 if inst == FMOVD {
1157 tp = 0b01
1158 }
1159 buf.Append4Bytes(
1160 (srcRegBits<<5)|dstRegBits,
1161 0b0_10000_00|(srcRegBits>>3),
1162 tp<<6|0b00_1_00000,
1163 0b000_11110,
1164 )
1165 } else if isSrcInt && !isDstInt {
1166 var tp, sf byte
1167 if inst == FMOVD {
1168 tp, sf = 0b01, 0b1
1169 }
1170 buf.Append4Bytes(
1171 (srcRegBits<<5)|dstRegBits,
1172 srcRegBits>>3,
1173 tp<<6|0b00_1_00_111,
1174 sf<<7|0b0_00_11110,
1175 )
1176 } else {
1177 var tp, sf byte
1178 if inst == FMOVD {
1179 tp, sf = 0b01, 0b1
1180 }
1181 buf.Append4Bytes(
1182 (srcRegBits<<5)|dstRegBits,
1183 srcRegBits>>3,
1184 tp<<6|0b00_1_00_110,
1185 sf<<7|0b0_00_11110,
1186 )
1187 }
1188
1189 case MOVD, MOVW:
1190 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
1191 return
1192 }
1193 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1194
1195 if n.srcReg == RegSP || n.dstReg == RegSP {
1196
1197
1198 buf.Append4Bytes(
1199 (srcRegBits<<5)|dstRegBits,
1200 srcRegBits>>3,
1201 0x0,
1202 0b1001_0001,
1203 )
1204 return
1205 }
1206
1207 if n.srcReg == RegRZR && inst == MOVD {
1208
1209
1210
1211 buf.Append4Bytes(
1212 dstRegBits,
1213 0x0,
1214 0b1000_0000,
1215 0b1_10_10010,
1216 )
1217 } else {
1218
1219
1220 var sf byte
1221 if inst == MOVD {
1222 sf = 0b1
1223 }
1224 buf.Append4Bytes(
1225 (zeroRegisterBits<<5)|dstRegBits,
1226 zeroRegisterBits>>3,
1227 0b000_00000|srcRegBits,
1228 sf<<7|0b0_01_01010,
1229 )
1230 }
1231
1232 case MRS:
1233 if n.srcReg != RegFPSR {
1234 return fmt.Errorf("MRS has only support for FPSR register as a src but got %s", RegisterName(n.srcReg))
1235 }
1236
1237
1238
1239 dstRegBits := registerBits(n.dstReg)
1240 buf.Append4Bytes(
1241 0b001<<5|dstRegBits,
1242 0b0100<<4|0b0100,
1243 0b0011_0000|0b11<<3|0b011,
1244 0b1101_0101,
1245 )
1246
1247 case MSR:
1248 if n.dstReg != RegFPSR {
1249 return fmt.Errorf("MSR has only support for FPSR register as a dst but got %s", RegisterName(n.srcReg))
1250 }
1251
1252
1253
1254 srcRegBits := registerBits(n.srcReg)
1255 buf.Append4Bytes(
1256 0b001<<5|srcRegBits,
1257 0b0100<<4|0b0100,
1258 0b0001_0000|0b11<<3|0b011,
1259 0b1101_0101,
1260 )
1261
1262 case MUL, MULW:
1263
1264
1265
1266 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
1267 return
1268 }
1269
1270 var sf byte
1271 if inst == MUL {
1272 sf = 0b1
1273 }
1274
1275 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1276
1277 buf.Append4Bytes(
1278 dstRegBits<<5|dstRegBits,
1279 zeroRegisterBits<<2|dstRegBits>>3,
1280 srcRegBits,
1281 sf<<7|0b11011,
1282 )
1283
1284 case NEG, NEGW:
1285 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1286
1287 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
1288 return
1289 }
1290
1291
1292
1293 var sf byte
1294 if inst == NEG {
1295 sf = 0b1
1296 }
1297
1298 buf.Append4Bytes(
1299 (zeroRegisterBits<<5)|dstRegBits,
1300 zeroRegisterBits>>3,
1301 srcRegBits,
1302 sf<<7|0b0_10_00000|0b0_00_01011,
1303 )
1304
1305 case SDIV, SDIVW, UDIV, UDIVW:
1306 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1307
1308 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
1309 return
1310 }
1311
1312
1313
1314 var sf, opcode byte
1315 switch inst {
1316 case SDIV:
1317 sf, opcode = 0b1, 0b000011
1318 case SDIVW:
1319 sf, opcode = 0b0, 0b000011
1320 case UDIV:
1321 sf, opcode = 0b1, 0b000010
1322 case UDIVW:
1323 sf, opcode = 0b0, 0b000010
1324 }
1325
1326 buf.Append4Bytes(
1327 (dstRegBits<<5)|dstRegBits,
1328 opcode<<2|(dstRegBits>>3),
1329 0b110_00000|srcRegBits,
1330 sf<<7|0b0_00_11010,
1331 )
1332
1333 case SCVTFD, SCVTFWD, SCVTFS, SCVTFWS, UCVTFD, UCVTFS, UCVTFWD, UCVTFWS:
1334 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1335
1336 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, false); err != nil {
1337 return
1338 }
1339
1340
1341
1342 var sf, tp, opcode byte
1343 switch inst {
1344 case SCVTFD:
1345 sf, tp, opcode = 0b1, 0b01, 0b010
1346 case SCVTFWD:
1347 sf, tp, opcode = 0b0, 0b01, 0b010
1348 case SCVTFS:
1349 sf, tp, opcode = 0b1, 0b00, 0b010
1350 case SCVTFWS:
1351 sf, tp, opcode = 0b0, 0b00, 0b010
1352 case UCVTFD:
1353 sf, tp, opcode = 0b1, 0b01, 0b011
1354 case UCVTFWD:
1355 sf, tp, opcode = 0b0, 0b01, 0b011
1356 case UCVTFS:
1357 sf, tp, opcode = 0b1, 0b00, 0b011
1358 case UCVTFWS:
1359 sf, tp, opcode = 0b0, 0b00, 0b011
1360 }
1361
1362 buf.Append4Bytes(
1363 (srcRegBits<<5)|dstRegBits,
1364 srcRegBits>>3,
1365 tp<<6|0b00_1_00_000|opcode,
1366 sf<<7|0b0_0_0_11110,
1367 )
1368
1369 case SXTB, SXTBW, SXTH, SXTHW, SXTW:
1370 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
1371 return
1372 }
1373
1374 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
1375 if n.srcReg == RegRZR {
1376
1377 var sf byte
1378 if inst == MOVD {
1379 sf = 0b1
1380 }
1381 buf.Append4Bytes(
1382 (zeroRegisterBits<<5)|dstRegBits,
1383 zeroRegisterBits>>3,
1384 0b000_00000|srcRegBits,
1385 sf<<7|0b0_01_01010,
1386 )
1387 return
1388 }
1389
1390
1391
1392
1393
1394
1395
1396
1397 var n, sf, imms, opc byte
1398 switch inst {
1399 case SXTB:
1400 n, sf, imms = 0b1, 0b1, 0x7
1401 case SXTBW:
1402 n, sf, imms = 0b0, 0b0, 0x7
1403 case SXTH:
1404 n, sf, imms = 0b1, 0b1, 0xf
1405 case SXTHW:
1406 n, sf, imms = 0b0, 0b0, 0xf
1407 case SXTW:
1408 n, sf, imms = 0b1, 0b1, 0x1f
1409 }
1410
1411 buf.Append4Bytes(
1412 (srcRegBits<<5)|dstRegBits,
1413 imms<<2|(srcRegBits>>3),
1414 n<<6,
1415 sf<<7|opc<<5|0b10011,
1416 )
1417 default:
1418 return errorEncodingUnsupported(n)
1419 }
1420 return
1421 }
1422
1423 func (a *AssemblerImpl) encodeLeftShiftedRegisterToRegister(buf asm.Buffer, n *nodeImpl) error {
1424 baseRegBits, err := intRegisterBits(n.srcReg)
1425 if err != nil {
1426 return err
1427 }
1428 shiftTargetRegBits, err := intRegisterBits(n.srcReg2)
1429 if err != nil {
1430 return err
1431 }
1432 dstRegBits, err := intRegisterBits(n.dstReg)
1433 if err != nil {
1434 return err
1435 }
1436
1437 switch n.instruction {
1438 case ADD:
1439
1440 const logicalLeftShiftBits = 0b00
1441 if n.srcConst < 0 || n.srcConst > 64 {
1442 return fmt.Errorf("shift amount must fit in unsigned 6-bit integer (0-64) but got %d", n.srcConst)
1443 }
1444 shiftByte := byte(n.srcConst)
1445 buf.Append4Bytes(
1446 (baseRegBits<<5)|dstRegBits,
1447 (shiftByte<<2)|(baseRegBits>>3),
1448 (logicalLeftShiftBits<<6)|shiftTargetRegBits,
1449 0b1000_1011,
1450 )
1451 return err
1452 default:
1453 return errorEncodingUnsupported(n)
1454 }
1455 }
1456
1457 func (a *AssemblerImpl) encodeTwoRegistersToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
1458 switch inst := n.instruction; inst {
1459 case AND, ANDW, ORR, ORRW, EOR, EORW:
1460
1461
1462 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
1463 var sf, opc byte
1464 switch inst {
1465 case AND:
1466 sf, opc = 0b1, 0b00
1467 case ANDW:
1468 sf, opc = 0b0, 0b00
1469 case ORR:
1470 sf, opc = 0b1, 0b01
1471 case ORRW:
1472 sf, opc = 0b0, 0b01
1473 case EOR:
1474 sf, opc = 0b1, 0b10
1475 case EORW:
1476 sf, opc = 0b0, 0b10
1477 }
1478 buf.Append4Bytes(
1479 (srcReg2Bits<<5)|dstRegBits,
1480 srcReg2Bits>>3,
1481 srcRegBits,
1482 sf<<7|opc<<5|0b01010,
1483 )
1484 case ASR, ASRW, LSL, LSLW, LSR, LSRW, ROR, RORW:
1485
1486
1487 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
1488
1489 var sf, opcode byte
1490 switch inst {
1491 case ASR:
1492 sf, opcode = 0b1, 0b001010
1493 case ASRW:
1494 sf, opcode = 0b0, 0b001010
1495 case LSL:
1496 sf, opcode = 0b1, 0b001000
1497 case LSLW:
1498 sf, opcode = 0b0, 0b001000
1499 case LSR:
1500 sf, opcode = 0b1, 0b001001
1501 case LSRW:
1502 sf, opcode = 0b0, 0b001001
1503 case ROR:
1504 sf, opcode = 0b1, 0b001011
1505 case RORW:
1506 sf, opcode = 0b0, 0b001011
1507 }
1508 buf.Append4Bytes(
1509 (srcReg2Bits<<5)|dstRegBits,
1510 opcode<<2|(srcReg2Bits>>3),
1511 0b110_00000|srcRegBits,
1512 sf<<7|0b0_00_11010,
1513 )
1514 case SDIV, SDIVW, UDIV, UDIVW:
1515 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
1516
1517
1518
1519 var sf, opcode byte
1520 switch inst {
1521 case SDIV:
1522 sf, opcode = 0b1, 0b000011
1523 case SDIVW:
1524 sf, opcode = 0b0, 0b000011
1525 case UDIV:
1526 sf, opcode = 0b1, 0b000010
1527 case UDIVW:
1528 sf, opcode = 0b0, 0b000010
1529 }
1530
1531 buf.Append4Bytes(
1532 (srcReg2Bits<<5)|dstRegBits,
1533 opcode<<2|(srcReg2Bits>>3),
1534 0b110_00000|srcRegBits,
1535 sf<<7|0b0_00_11010,
1536 )
1537 case SUB, SUBW:
1538 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
1539
1540
1541
1542 var sf byte
1543 if inst == SUB {
1544 sf = 0b1
1545 }
1546
1547 buf.Append4Bytes(
1548 (srcReg2Bits<<5)|dstRegBits,
1549 srcReg2Bits>>3,
1550 srcRegBits,
1551 sf<<7|0b0_10_01011,
1552 )
1553 case FSUBD, FSUBS:
1554 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
1555
1556
1557
1558 var tp byte
1559 if inst == FSUBD {
1560 tp = 0b01
1561 }
1562 buf.Append4Bytes(
1563 (srcReg2Bits<<5)|dstRegBits,
1564 0b0011_10_00|(srcReg2Bits>>3),
1565 tp<<6|0b00_1_00000|srcRegBits,
1566 0b0_00_11110,
1567 )
1568 default:
1569 return errorEncodingUnsupported(n)
1570 }
1571 return
1572 }
1573
1574 func (a *AssemblerImpl) encodeThreeRegistersToRegister(buf asm.Buffer, n *nodeImpl) error {
1575 switch n.instruction {
1576 case MSUB, MSUBW:
1577
1578
1579
1580 src1RegBits, err := intRegisterBits(n.srcReg)
1581 if err != nil {
1582 return err
1583 }
1584 src2RegBits, err := intRegisterBits(n.srcReg2)
1585 if err != nil {
1586 return err
1587 }
1588 src3RegBits, err := intRegisterBits(n.dstReg)
1589 if err != nil {
1590 return err
1591 }
1592 dstRegBits, err := intRegisterBits(n.dstReg2)
1593 if err != nil {
1594 return err
1595 }
1596
1597 var sf byte
1598 if n.instruction == MSUB {
1599 sf = 0b1
1600 }
1601
1602 buf.Append4Bytes(
1603 (src3RegBits<<5)|dstRegBits,
1604 0b1_0000000|(src2RegBits<<2)|(src3RegBits>>3),
1605 src1RegBits,
1606 sf<<7|0b00_11011,
1607 )
1608 return nil
1609 default:
1610 return errorEncodingUnsupported(n)
1611 }
1612 }
1613
1614 func (a *AssemblerImpl) encodeTwoRegistersToNone(buf asm.Buffer, n *nodeImpl) error {
1615 switch n.instruction {
1616 case CMPW, CMP:
1617
1618
1619
1620 src1RegBits, err := intRegisterBits(n.srcReg)
1621 if err != nil {
1622 return err
1623 }
1624 src2RegBits, err := intRegisterBits(n.srcReg2)
1625 if err != nil {
1626 return err
1627 }
1628
1629 var op byte
1630 if n.instruction == CMP {
1631 op = 0b111
1632 } else {
1633 op = 0b011
1634 }
1635
1636 buf.Append4Bytes(
1637 (src2RegBits<<5)|zeroRegisterBits,
1638 src2RegBits>>3,
1639 src1RegBits,
1640 0b01011|(op<<5),
1641 )
1642 return nil
1643 case FCMPS, FCMPD:
1644
1645
1646 src1RegBits, err := vectorRegisterBits(n.srcReg)
1647 if err != nil {
1648 return err
1649 }
1650 src2RegBits, err := vectorRegisterBits(n.srcReg2)
1651 if err != nil {
1652 return err
1653 }
1654
1655 var ftype byte
1656 if n.instruction == FCMPD {
1657 ftype = 0b01
1658 }
1659 buf.Append4Bytes(
1660 src2RegBits<<5,
1661 0b001000_00|(src2RegBits>>3),
1662 ftype<<6|0b1_00000|src1RegBits,
1663 0b000_11110,
1664 )
1665 return nil
1666 default:
1667 return errorEncodingUnsupported(n)
1668 }
1669 }
1670
1671 func (a *AssemblerImpl) encodeRegisterAndConstToNone(buf asm.Buffer, n *nodeImpl) error {
1672 if n.instruction != CMP {
1673 return errorEncodingUnsupported(n)
1674 }
1675
1676
1677 if n.srcConst < 0 || n.srcConst > 4095 {
1678 return fmt.Errorf("immediate for CMP must fit in 0 to 4095 but got %d", n.srcConst)
1679 } else if n.srcReg == RegRZR {
1680 return errors.New("zero register is not supported for CMP (immediate)")
1681 }
1682
1683 srcRegBits, err := intRegisterBits(n.srcReg)
1684 if err != nil {
1685 return err
1686 }
1687
1688 buf.Append4Bytes(
1689 (srcRegBits<<5)|zeroRegisterBits,
1690 (byte(n.srcConst)<<2)|(srcRegBits>>3),
1691 byte(n.srcConst>>6),
1692 0b111_10001,
1693 )
1694 return nil
1695 }
1696
1697 func fitInSigned9Bits(v int64) bool {
1698 return v >= -256 && v <= 255
1699 }
1700
1701 func (a *AssemblerImpl) encodeLoadOrStoreWithRegisterOffset(
1702 buf asm.Buffer, baseRegBits, offsetRegBits, targetRegBits byte, opcode, size, v byte,
1703 ) {
1704
1705
1706 buf.Append4Bytes(
1707 (baseRegBits<<5)|targetRegBits,
1708 0b011_010_00|(baseRegBits>>3),
1709 opcode<<6|0b00_1_00000|offsetRegBits,
1710 size<<6|v<<2|0b00_111_0_00,
1711 )
1712 }
1713
1714
1715
1716
1717 func validateMemoryOffset(offset int64) error {
1718 if offset > 255 && offset%4 != 0 {
1719
1720
1721 return fmt.Errorf("large memory offset (>255) must be a multiple of 4 but got %d", offset)
1722 } else if offset < -256 {
1723 return fmt.Errorf("negative memory offset must be larget than or equal -256 but got %d", offset)
1724 } else if offset > 1<<31-1 {
1725 return fmt.Errorf("large memory offset must be less than %d but got %d", 1<<31-1, offset)
1726 } else {
1727 return nil
1728 }
1729 }
1730
1731
1732
1733
1734 func (a *AssemblerImpl) encodeLoadOrStoreWithConstOffset(
1735 buf asm.Buffer,
1736 baseRegBits, targetRegBits byte,
1737 offset int64,
1738 opcode, size, v byte,
1739 datasize, datasizeLog2 int64,
1740 ) (err error) {
1741 if err = validateMemoryOffset(offset); err != nil {
1742 return
1743 }
1744
1745 if fitInSigned9Bits(offset) {
1746
1747
1748 if offset < 0 || offset%datasize != 0 {
1749
1750 buf.Append4Bytes(
1751 (baseRegBits<<5)|targetRegBits,
1752 byte(offset<<4)|(baseRegBits>>3),
1753 opcode<<6|(0b00_00_11111&byte(offset>>4)),
1754 size<<6|v<<2|0b00_1_11_0_00,
1755 )
1756 return
1757 }
1758 }
1759
1760
1761
1762 if offset%datasize == 0 &&
1763 offset < (1<<12)<<datasizeLog2 {
1764 m := offset / datasize
1765 buf.Append4Bytes(
1766 (baseRegBits<<5)|targetRegBits,
1767 (byte(m<<2))|(baseRegBits>>3),
1768 opcode<<6|0b00_111111&byte(m>>6),
1769 size<<6|v<<2|0b00_1_11_0_01,
1770 )
1771 return
1772 }
1773
1774
1775 tmpRegBits := registerBits(a.temporaryRegister)
1776 offset32 := int32(offset)
1777
1778
1779
1780
1781 c := asm.NewStaticConst(make([]byte, 4))
1782 binary.LittleEndian.PutUint32(c.Raw, uint32(offset))
1783 a.pool.AddConst(c, uint64(buf.Len()))
1784
1785
1786
1787 hi := offset32 - (offset32 & (0xfff << uint(datasizeLog2)))
1788 if hi&^0xfff000 == 0 {
1789 var sfops byte = 0b100
1790 m := ((offset32 - hi) >> datasizeLog2) & 0xfff
1791 hi >>= 12
1792
1793
1794 buf.Append4Bytes(
1795 (baseRegBits<<5)|tmpRegBits,
1796 (byte(hi)<<2)|(baseRegBits>>3),
1797 0b01<<6 |byte(hi>>6),
1798 sfops<<5|0b10001,
1799 )
1800
1801 buf.Append4Bytes(
1802 (tmpRegBits<<5)|targetRegBits,
1803 (byte(m<<2))|(tmpRegBits>>3),
1804 opcode<<6|0b00_111111&byte(m>>6),
1805 size<<6|v<<2|0b00_1_11_0_01,
1806 )
1807 } else {
1808
1809
1810 loadLiteralOffsetInBinary := uint64(buf.Len())
1811
1812
1813
1814 buf.Append4Bytes(tmpRegBits, 0x0, 0x0, 0b00_011_0_00)
1815
1816
1817
1818 c.AddOffsetFinalizedCallback(func(offsetOfConst uint64) {
1819
1820 offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4
1821 bin := buf.Bytes()
1822 bin[loadLiteralOffsetInBinary] |= byte(offset << 5)
1823 bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3)
1824 bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11)
1825 })
1826
1827
1828
1829 buf.Append4Bytes(
1830 (baseRegBits<<5)|targetRegBits,
1831 0b011_010_00|(baseRegBits>>3),
1832 opcode<<6|0b00_1_00000|tmpRegBits,
1833 size<<6|v<<2|0b00_111_0_00,
1834 )
1835 }
1836 return
1837 }
1838
1839 func (a *AssemblerImpl) encodeRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
1840
1841 var (
1842 size, v byte
1843 datasize, datasizeLog2 int64
1844 isTargetFloat bool
1845 )
1846 switch n.instruction {
1847 case STRD:
1848 size, v, datasize, datasizeLog2 = 0b11, 0x0, 8, 3
1849 case STRW:
1850 size, v, datasize, datasizeLog2 = 0b10, 0x0, 4, 2
1851 case STRH:
1852 size, v, datasize, datasizeLog2 = 0b01, 0x0, 2, 1
1853 case STRB:
1854 size, v, datasize, datasizeLog2 = 0b00, 0x0, 1, 0
1855 case FSTRD:
1856 size, v, datasize, datasizeLog2, isTargetFloat = 0b11, 0x1, 8, 3, true
1857 case FSTRS:
1858 size, v, datasize, datasizeLog2, isTargetFloat = 0b10, 0x1, 4, 2, true
1859 default:
1860 return errorEncodingUnsupported(n)
1861 }
1862
1863 var srcRegBits byte
1864 if isTargetFloat {
1865 srcRegBits, err = vectorRegisterBits(n.srcReg)
1866 } else {
1867 srcRegBits, err = intRegisterBits(n.srcReg)
1868 }
1869 if err != nil {
1870 return
1871 }
1872
1873 baseRegBits, err := intRegisterBits(n.dstReg)
1874 if err != nil {
1875 return err
1876 }
1877
1878 const opcode = 0x00
1879 if n.dstReg2 != asm.NilRegister {
1880 offsetRegBits, err := intRegisterBits(n.dstReg2)
1881 if err != nil {
1882 return err
1883 }
1884 a.encodeLoadOrStoreWithRegisterOffset(buf, baseRegBits, offsetRegBits, srcRegBits, opcode, size, v)
1885 } else {
1886 err = a.encodeLoadOrStoreWithConstOffset(buf, baseRegBits, srcRegBits, n.dstConst, opcode, size, v, datasize, datasizeLog2)
1887 }
1888 return
1889 }
1890
1891 func (a *AssemblerImpl) encodeADR(buf asm.Buffer, n *nodeImpl) (err error) {
1892 dstRegBits, err := intRegisterBits(n.dstReg)
1893 if err != nil {
1894 return err
1895 }
1896
1897 adrInstructionOffsetInBinary := uint64(buf.Len())
1898
1899
1900
1901
1902 buf.Append4Bytes(dstRegBits, 0x0, 0x0, 0b10000)
1903
1904
1905 if sc := n.staticConst; sc != nil {
1906 a.pool.AddConst(sc, adrInstructionOffsetInBinary)
1907 sc.AddOffsetFinalizedCallback(func(offsetOfConst uint64) {
1908 adrInstructionBytes := buf.Bytes()[adrInstructionOffsetInBinary : adrInstructionOffsetInBinary+4]
1909 offset := int(offsetOfConst) - int(adrInstructionOffsetInBinary)
1910
1911
1912 adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5)
1913 offset >>= 2
1914 adrInstructionBytes[0] |= byte(offset << 5)
1915 offset >>= 3
1916 adrInstructionBytes[1] |= byte(offset)
1917 offset >>= 8
1918 adrInstructionBytes[2] |= byte(offset)
1919 })
1920 return
1921 } else {
1922 a.adrInstructionNodes = append(a.adrInstructionNodes, n)
1923 }
1924 return
1925 }
1926
1927 func (a *AssemblerImpl) finalizeADRInstructionNode(code []byte, n *nodeImpl) (err error) {
1928
1929 targetNode := n
1930 for ; targetNode != nil; targetNode = targetNode.next {
1931 if targetNode.instruction == n.readInstructionAddressBeforeTargetInstruction {
1932 targetNode = targetNode.next
1933 break
1934 }
1935 }
1936
1937 if targetNode == nil {
1938 return fmt.Errorf("BUG: target instruction %s not found for ADR", InstructionName(n.readInstructionAddressBeforeTargetInstruction))
1939 }
1940
1941 offset := targetNode.OffsetInBinary() - n.OffsetInBinary()
1942 if i64 := int64(offset); i64 >= 1<<20 || i64 < -1<<20 {
1943
1944
1945
1946 return fmt.Errorf("BUG: too large offset for ADR: %#x", offset)
1947 }
1948
1949 adrInstructionBytes := code[n.OffsetInBinary() : n.OffsetInBinary()+4]
1950
1951
1952 adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5)
1953 offset >>= 2
1954 adrInstructionBytes[0] |= byte(offset << 5)
1955 offset >>= 3
1956 adrInstructionBytes[1] |= byte(offset)
1957 offset >>= 8
1958 adrInstructionBytes[2] |= byte(offset)
1959 return nil
1960 }
1961
1962 func (a *AssemblerImpl) encodeMemoryToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
1963
1964 var (
1965 size, v, opcode byte
1966 datasize, datasizeLog2 int64
1967 isTargetFloat bool
1968 )
1969 switch n.instruction {
1970 case ADR:
1971 return a.encodeADR(buf, n)
1972 case FLDRD:
1973 size, v, datasize, datasizeLog2, opcode, isTargetFloat = 0b11, 0x1, 8, 3, 0b01, true
1974 case FLDRS:
1975 size, v, datasize, datasizeLog2, opcode, isTargetFloat = 0b10, 0x1, 4, 2, 0b01, true
1976 case LDRD:
1977 size, v, datasize, datasizeLog2, opcode = 0b11, 0x0, 8, 3, 0b01
1978 case LDRW:
1979 size, v, datasize, datasizeLog2, opcode = 0b10, 0x0, 4, 2, 0b01
1980 case LDRSHD:
1981 size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b10
1982 case LDRSHW:
1983 size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b11
1984 case LDRH:
1985 size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b01
1986 case LDRSBD:
1987 size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b10
1988 case LDRSBW:
1989 size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b11
1990 case LDRB:
1991 size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b01
1992 case LDRSW:
1993 size, v, datasize, datasizeLog2, opcode = 0b10, 0x0, 4, 2, 0b10
1994 default:
1995 return errorEncodingUnsupported(n)
1996 }
1997
1998 var dstRegBits byte
1999 if isTargetFloat {
2000 dstRegBits, err = vectorRegisterBits(n.dstReg)
2001 } else {
2002 dstRegBits, err = intRegisterBits(n.dstReg)
2003 }
2004 if err != nil {
2005 return
2006 }
2007 baseRegBits, err := intRegisterBits(n.srcReg)
2008 if err != nil {
2009 return err
2010 }
2011
2012 if n.srcReg2 != asm.NilRegister {
2013 offsetRegBits, err := intRegisterBits(n.srcReg2)
2014 if err != nil {
2015 return err
2016 }
2017 a.encodeLoadOrStoreWithRegisterOffset(buf, baseRegBits, offsetRegBits, dstRegBits, opcode,
2018 size, v)
2019 } else {
2020 err = a.encodeLoadOrStoreWithConstOffset(buf, baseRegBits, dstRegBits, n.srcConst, opcode,
2021 size, v, datasize, datasizeLog2)
2022 }
2023 return
2024 }
2025
2026
2027
2028 func const16bitAligned(v int64) (ret int) {
2029 ret = -1
2030 for s := 0; s < 64; s += 16 {
2031 if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
2032 ret = s / 16
2033 break
2034 }
2035 }
2036 return
2037 }
2038
2039
2040
2041
2042
2043
2044
2045 func isBitMaskImmediate(x uint64) bool {
2046
2047 if x == 0 || x == 0xffff_ffff_ffff_ffff {
2048 return false
2049 }
2050
2051 switch {
2052 case x != x>>32|x<<32:
2053
2054 case x != x>>16|x<<48:
2055
2056
2057 x = uint64(int32(x))
2058 case x != x>>8|x<<56:
2059
2060
2061 x = uint64(int16(x))
2062 case x != x>>4|x<<60:
2063
2064
2065 x = uint64(int8(x))
2066 default:
2067
2068 return true
2069 }
2070 return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
2071 }
2072
2073
2074
2075 func sequenceOfSetbits(x uint64) bool {
2076 y := getLowestBit(x)
2077
2078
2079 y += x
2080 return (y-1)&y == 0
2081 }
2082
2083 func getLowestBit(x uint64) uint64 {
2084
2085 return x & (^x + 1)
2086 }
2087
2088 func (a *AssemblerImpl) addOrSub64BitRegisters(buf asm.Buffer, sfops byte, sp bool, dstRegBits, src1RegBits, src2RegBits byte) {
2089
2090 if sp {
2091
2092 buf.Append4Bytes(
2093 (src1RegBits<<5)|dstRegBits,
2094 0b011<<5|src1RegBits>>3,
2095 1<<5|src2RegBits,
2096 sfops<<5|0b01011,
2097 )
2098 } else {
2099 buf.Append4Bytes(
2100 (src1RegBits<<5)|dstRegBits,
2101 src1RegBits>>3,
2102 src2RegBits,
2103 sfops<<5|0b01011,
2104 )
2105 }
2106 }
2107
2108 func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) {
2109 var size uint32
2110 switch {
2111 case c != c>>32|c<<32:
2112 size = 64
2113 case c != c>>16|c<<48:
2114 size = 32
2115 c = uint64(int32(c))
2116 case c != c>>8|c<<56:
2117 size = 16
2118 c = uint64(int16(c))
2119 case c != c>>4|c<<60:
2120 size = 8
2121 c = uint64(int8(c))
2122 case c != c>>2|c<<62:
2123 size = 4
2124 c = uint64(int64(c<<60) >> 60)
2125 default:
2126 size = 2
2127 c = uint64(int64(c<<62) >> 62)
2128 }
2129
2130 neg := false
2131 if int64(c) < 0 {
2132 c = ^c
2133 neg = true
2134 }
2135
2136 onesSize, nonZeroPos := getOnesSequenceSize(c)
2137 if neg {
2138 nonZeroPos = onesSize + nonZeroPos
2139 onesSize = size - onesSize
2140 }
2141
2142 var mode byte = 32
2143 if is64bit {
2144 N, mode = 0b1, 64
2145 }
2146
2147 immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1))
2148 imms = byte((onesSize - 1) | 63&^(size<<1-1))
2149 return
2150 }
2151
2152 func (a *AssemblerImpl) encodeConstToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
2153
2154 c := n.srcConst
2155
2156 dstRegBits, err := intRegisterBits(n.dstReg)
2157 if err != nil {
2158 return err
2159 }
2160
2161
2162
2163 switch n.instruction {
2164 case ANDIMM32:
2165 var sf, opc byte = 0b0, 0b00
2166 if !isBitMaskImmediate(uint64(c)) {
2167 err = fmt.Errorf("const %d must be valid bitmask immediate for %s", c, InstructionName(ANDIMM64))
2168 return
2169 }
2170 immr, imms, N := bitmaskImmediate(uint64(c), false)
2171 buf.Append4Bytes(
2172 (dstRegBits<<5)|dstRegBits,
2173 imms<<2|dstRegBits>>3,
2174 N<<6|immr,
2175 sf<<7|opc<<5|0b10010,
2176 )
2177 return
2178 case ANDIMM64:
2179 var sf, opc byte = 0b1, 0b00
2180 if !isBitMaskImmediate(uint64(c)) {
2181 err = fmt.Errorf("const %d must be valid bitmask immediate for %s", c, InstructionName(ANDIMM64))
2182 return
2183 }
2184 immr, imms, N := bitmaskImmediate(uint64(c), true)
2185 buf.Append4Bytes(
2186 (dstRegBits<<5)|dstRegBits,
2187 imms<<2|dstRegBits>>3,
2188 N<<6|immr,
2189 sf<<7|opc<<5|0b10010,
2190 )
2191 return
2192 }
2193
2194 switch inst := n.instruction; inst {
2195 case ADD, ADDS, SUB, SUBS:
2196 srcRegBits := dstRegBits
2197 if n.srcReg != asm.NilRegister {
2198 srcRegBits, err = intRegisterBits(n.srcReg)
2199 if err != nil {
2200 return err
2201 }
2202 }
2203
2204 var sfops byte
2205 if inst == ADD {
2206 sfops = 0b100
2207 } else if inst == ADDS {
2208 sfops = 0b101
2209 } else if inst == SUB {
2210 sfops = 0b110
2211 } else if inst == SUBS {
2212 sfops = 0b111
2213 }
2214
2215 isSP := n.srcReg == RegSP || n.dstReg == RegSP
2216 if c == 0 {
2217
2218 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, zeroRegisterBits)
2219 return
2220 }
2221
2222 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
2223
2224
2225
2226 if c <= 0xfff {
2227 buf.Append4Bytes(
2228 (srcRegBits<<5)|dstRegBits,
2229 (byte(c)<<2)|(srcRegBits>>3),
2230 byte(c>>6),
2231 sfops<<5|0b10001,
2232 )
2233 } else {
2234 c >>= 12
2235 buf.Append4Bytes(
2236 (srcRegBits<<5)|dstRegBits,
2237 (byte(c)<<2)|(srcRegBits>>3),
2238 0b01<<6 |byte(c>>6),
2239 sfops<<5|0b10001,
2240 )
2241 }
2242 return
2243 }
2244
2245 if t := const16bitAligned(c); t >= 0 {
2246
2247
2248
2249 tmpRegBits := registerBits(a.temporaryRegister)
2250
2251
2252 a.load16bitAlignedConst(buf, c>>(16*t), byte(t), tmpRegBits, false, true)
2253
2254
2255 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
2256 return
2257 } else if t := const16bitAligned(^c); t >= 0 {
2258
2259
2260 tmpRegBits := registerBits(a.temporaryRegister)
2261
2262
2263 a.load16bitAlignedConst(buf, ^c>>(16*t), byte(t), tmpRegBits, true, true)
2264
2265
2266 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
2267 return
2268 }
2269
2270 if uc := uint64(c); isBitMaskImmediate(uc) {
2271
2272
2273 tmpRegBits := registerBits(a.temporaryRegister)
2274
2275 a.loadConstViaBitMaskImmediate(buf, uc, tmpRegBits, true)
2276
2277
2278 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
2279 return
2280 }
2281
2282
2283 if 0 <= c && c <= 0xffffff && inst != SUBS && inst != ADDS {
2284
2285 buf.Append4Bytes(
2286 (dstRegBits<<5)|dstRegBits,
2287 (byte(c)<<2)|(dstRegBits>>3),
2288 byte(c&0xfff>>6),
2289 sfops<<5|0b10001,
2290 )
2291 c = c >> 12
2292 buf.Append4Bytes(
2293 (dstRegBits<<5)|dstRegBits,
2294 (byte(c)<<2)|(dstRegBits>>3),
2295 0b01_000000 |byte(c>>6),
2296 sfops<<5|0b10001,
2297 )
2298 return
2299 }
2300
2301
2302
2303 tmpRegBits := registerBits(a.temporaryRegister)
2304 a.load64bitConst(buf, c, tmpRegBits)
2305 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
2306 case MOVW:
2307 if c == 0 {
2308 buf.Append4Bytes(
2309 (zeroRegisterBits<<5)|dstRegBits,
2310 zeroRegisterBits>>3,
2311 0b000_00000|zeroRegisterBits,
2312 0b0_01_01010,
2313 )
2314 return
2315 }
2316
2317
2318
2319 c32 := uint32(c)
2320 ic := int64(c32)
2321 if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
2322 if isBitMaskImmediate(uint64(c)) {
2323 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, false)
2324 return
2325 }
2326 }
2327
2328 if t := const16bitAligned(int64(c32)); t >= 0 {
2329
2330
2331 a.load16bitAlignedConst(buf, int64(c32)>>(16*t), byte(t), dstRegBits, false, false)
2332 } else if t := const16bitAligned(int64(^c32)); t >= 0 {
2333
2334 a.load16bitAlignedConst(buf, int64(^c32)>>(16*t), byte(t), dstRegBits, true, false)
2335 } else if isBitMaskImmediate(uint64(c)) {
2336 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, false)
2337 } else {
2338
2339
2340 c16 := uint16(c32)
2341
2342 buf.Append4Bytes(
2343 (byte(c16)<<5)|dstRegBits,
2344 byte(c16>>3),
2345 1<<7|byte(c16>>11),
2346 0b0_10_10010,
2347 )
2348
2349 c16 = uint16(c32 >> 16)
2350 if c16 != 0 {
2351 buf.Append4Bytes(
2352 (byte(c16)<<5)|dstRegBits,
2353 byte(c16>>3),
2354 1<<7|0b0_01_00000 |byte(c16>>11),
2355 0b0_11_10010,
2356 )
2357 }
2358 }
2359 case MOVD:
2360
2361
2362 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
2363 if isBitMaskImmediate(uint64(c)) {
2364 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, true)
2365 return
2366 }
2367 }
2368
2369 if t := const16bitAligned(c); t >= 0 {
2370
2371
2372 a.load16bitAlignedConst(buf, c>>(16*t), byte(t), dstRegBits, false, true)
2373 } else if t := const16bitAligned(^c); t >= 0 {
2374
2375 a.load16bitAlignedConst(buf, (^c)>>(16*t), byte(t), dstRegBits, true, true)
2376 } else if isBitMaskImmediate(uint64(c)) {
2377 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, true)
2378 } else {
2379 a.load64bitConst(buf, c, dstRegBits)
2380 }
2381 case LSR:
2382 if c == 0 {
2383 err = errors.New("LSR with zero constant should be optimized out")
2384 return
2385 } else if c < 0 || c > 63 {
2386 err = fmt.Errorf("LSR requires immediate to be within 0 to 63, but got %d", c)
2387 return
2388 }
2389
2390
2391
2392 buf.Append4Bytes(
2393 (dstRegBits<<5)|dstRegBits,
2394 0b111111_00|dstRegBits>>3,
2395 0b01_000000|byte(c),
2396 0b110_10011,
2397 )
2398 case LSL:
2399 if c == 0 {
2400 err = errors.New("LSL with zero constant should be optimized out")
2401 return
2402 } else if c < 0 || c > 63 {
2403 err = fmt.Errorf("LSL requires immediate to be within 0 to 63, but got %d", c)
2404 return
2405 }
2406
2407
2408
2409 cb := byte(c)
2410 buf.Append4Bytes(
2411 (dstRegBits<<5)|dstRegBits,
2412 (0b111111-cb)<<2|dstRegBits>>3,
2413 0b01_000000|(64-cb),
2414 0b110_10011,
2415 )
2416
2417 default:
2418 return errorEncodingUnsupported(n)
2419 }
2420 return
2421 }
2422
2423 func (a *AssemblerImpl) movk(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) {
2424
2425 buf.Append4Bytes(
2426 (byte(v)<<5)|dstRegBits,
2427 byte(v>>3),
2428 1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)),
2429 0b1_11_10010,
2430 )
2431 }
2432
2433 func (a *AssemblerImpl) movz(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) {
2434
2435 buf.Append4Bytes(
2436 (byte(v)<<5)|dstRegBits,
2437 byte(v>>3),
2438 1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)),
2439 0b1_10_10010,
2440 )
2441 }
2442
2443 func (a *AssemblerImpl) movn(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) {
2444
2445 buf.Append4Bytes(
2446 (byte(v)<<5)|dstRegBits,
2447 byte(v>>3),
2448 1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)),
2449 0b1_00_10010,
2450 )
2451 }
2452
2453
2454
2455
2456
2457 func (a *AssemblerImpl) load64bitConst(buf asm.Buffer, c int64, dstRegBits byte) {
2458 var bits [4]uint64
2459 var zeros, negs int
2460 for i := 0; i < 4; i++ {
2461 bits[i] = uint64((c >> uint(i*16)) & 0xffff)
2462 if v := bits[i]; v == 0 {
2463 zeros++
2464 } else if v == 0xffff {
2465 negs++
2466 }
2467 }
2468
2469 if zeros == 3 {
2470
2471 for i, v := range bits {
2472 if v != 0 {
2473 a.movz(buf, v, i, dstRegBits)
2474 }
2475 }
2476 } else if negs == 3 {
2477
2478 for i, v := range bits {
2479 if v != 0xffff {
2480 v = ^v
2481 a.movn(buf, v, i, dstRegBits)
2482 }
2483 }
2484 } else if zeros == 2 {
2485
2486 var movz bool
2487 for i, v := range bits {
2488 if !movz && v != 0 {
2489
2490 a.movz(buf, v, i, dstRegBits)
2491 movz = true
2492 } else if v != 0 {
2493 a.movk(buf, v, i, dstRegBits)
2494 }
2495 }
2496
2497 } else if negs == 2 {
2498
2499 var movn bool
2500 for i, v := range bits {
2501 if !movn && v != 0xffff {
2502 v = ^v
2503
2504 a.movn(buf, v, i, dstRegBits)
2505 movn = true
2506 } else if v != 0xffff {
2507 a.movk(buf, v, i, dstRegBits)
2508 }
2509 }
2510
2511 } else if zeros == 1 {
2512
2513 var movz bool
2514 for i, v := range bits {
2515 if !movz && v != 0 {
2516
2517 a.movz(buf, v, i, dstRegBits)
2518 movz = true
2519 } else if v != 0 {
2520 a.movk(buf, v, i, dstRegBits)
2521 }
2522 }
2523
2524 } else if negs == 1 {
2525
2526 var movn bool
2527 for i, v := range bits {
2528 if !movn && v != 0xffff {
2529 v = ^v
2530
2531 a.movn(buf, v, i, dstRegBits)
2532 movn = true
2533 } else if v != 0xffff {
2534 a.movk(buf, v, i, dstRegBits)
2535 }
2536 }
2537
2538 } else {
2539
2540 var movz bool
2541 for i, v := range bits {
2542 if !movz && v != 0 {
2543
2544 a.movz(buf, v, i, dstRegBits)
2545 movz = true
2546 } else if v != 0 {
2547 a.movk(buf, v, i, dstRegBits)
2548 }
2549 }
2550
2551 }
2552 }
2553
2554 func (a *AssemblerImpl) load16bitAlignedConst(buf asm.Buffer, c int64, shiftNum byte, regBits byte, reverse bool, dst64bit bool) {
2555 var lastByte byte
2556 if reverse {
2557
2558 lastByte = 0b0_00_10010
2559 } else {
2560
2561 lastByte = 0b0_10_10010
2562 }
2563 if dst64bit {
2564 lastByte |= 0b1 << 7
2565 }
2566 buf.Append4Bytes(
2567 (byte(c)<<5)|regBits,
2568 byte(c>>3),
2569 1<<7|(shiftNum<<5)|byte(c>>11),
2570 lastByte,
2571 )
2572 }
2573
2574
2575
2576 func (a *AssemblerImpl) loadConstViaBitMaskImmediate(buf asm.Buffer, c uint64, regBits byte, dst64bit bool) {
2577 var size uint32
2578 switch {
2579 case c != c>>32|c<<32:
2580 size = 64
2581 case c != c>>16|c<<48:
2582 size = 32
2583 c = uint64(int32(c))
2584 case c != c>>8|c<<56:
2585 size = 16
2586 c = uint64(int16(c))
2587 case c != c>>4|c<<60:
2588 size = 8
2589 c = uint64(int8(c))
2590 case c != c>>2|c<<62:
2591 size = 4
2592 c = uint64(int64(c<<60) >> 60)
2593 default:
2594 size = 2
2595 c = uint64(int64(c<<62) >> 62)
2596 }
2597
2598 neg := false
2599 if int64(c) < 0 {
2600 c = ^c
2601 neg = true
2602 }
2603
2604 onesSize, nonZeroPos := getOnesSequenceSize(c)
2605 if neg {
2606 nonZeroPos = onesSize + nonZeroPos
2607 onesSize = size - onesSize
2608 }
2609
2610
2611
2612 var n byte
2613 mode := 32
2614 if dst64bit && size == 64 {
2615 n = 0b1
2616 mode = 64
2617 }
2618
2619 r := byte((size - nonZeroPos) & (size - 1) & uint32(mode-1))
2620 s := byte((onesSize - 1) | 63&^(size<<1-1))
2621
2622 var sf byte
2623 if dst64bit {
2624 sf = 0b1
2625 }
2626 buf.Append4Bytes(
2627 (zeroRegisterBits<<5)|regBits,
2628 s<<2|(zeroRegisterBits>>3),
2629 n<<6|r,
2630 sf<<7|0b0_01_10010,
2631 )
2632 }
2633
2634 func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) {
2635
2636 y := getLowestBit(x)
2637 nonZeroPos = setBitPos(y)
2638 size = setBitPos(x+y) - nonZeroPos
2639 return
2640 }
2641
2642 func setBitPos(x uint64) (ret uint32) {
2643 for ; ; ret++ {
2644 if x == 0b1 {
2645 break
2646 }
2647 x = x >> 1
2648 }
2649 return
2650 }
2651
2652 func checkArrangementIndexPair(arr VectorArrangement, index VectorIndex) (err error) {
2653 if arr == VectorArrangementNone {
2654 return nil
2655 }
2656 var valid bool
2657 switch arr {
2658 case VectorArrangement8B:
2659 valid = index < 8
2660 case VectorArrangement16B:
2661 valid = index < 16
2662 case VectorArrangement4H:
2663 valid = index < 4
2664 case VectorArrangement8H:
2665 valid = index < 8
2666 case VectorArrangement2S:
2667 valid = index < 2
2668 case VectorArrangement4S:
2669 valid = index < 4
2670 case VectorArrangement1D:
2671 valid = index < 1
2672 case VectorArrangement2D:
2673 valid = index < 2
2674 case VectorArrangementB:
2675 valid = index < 16
2676 case VectorArrangementH:
2677 valid = index < 8
2678 case VectorArrangementS:
2679 valid = index < 4
2680 case VectorArrangementD:
2681 valid = index < 2
2682 }
2683 if !valid {
2684 err = fmt.Errorf("invalid arrangement and index pair: %s[%d]", arr, index)
2685 }
2686 return
2687 }
2688
2689 func (a *AssemblerImpl) encodeMemoryToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
2690 srcBaseRegBits, err := intRegisterBits(n.srcReg)
2691 if err != nil {
2692 return err
2693 }
2694
2695 dstVectorRegBits, err := vectorRegisterBits(n.dstReg)
2696 if err != nil {
2697 return err
2698 }
2699
2700 switch n.instruction {
2701 case VMOV:
2702
2703 var size, opcode byte
2704 var dataSize, dataSizeLog2 int64
2705 switch n.vectorArrangement {
2706 case VectorArrangementB:
2707 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b01, 1, 0
2708 case VectorArrangementH:
2709 size, opcode, dataSize, dataSizeLog2 = 0b01, 0b01, 2, 1
2710 case VectorArrangementS:
2711 size, opcode, dataSize, dataSizeLog2 = 0b10, 0b01, 4, 2
2712 case VectorArrangementD:
2713 size, opcode, dataSize, dataSizeLog2 = 0b11, 0b01, 8, 3
2714 case VectorArrangementQ:
2715 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b11, 16, 4
2716 }
2717 const v = 1
2718 if n.srcReg2 != asm.NilRegister {
2719 offsetRegBits, err := intRegisterBits(n.srcReg2)
2720 if err != nil {
2721 return err
2722 }
2723 a.encodeLoadOrStoreWithRegisterOffset(buf, srcBaseRegBits, offsetRegBits, dstVectorRegBits, opcode, size, v)
2724 } else {
2725 err = a.encodeLoadOrStoreWithConstOffset(buf, srcBaseRegBits, dstVectorRegBits,
2726 n.srcConst, opcode, size, v, dataSize, dataSizeLog2)
2727 }
2728 case LD1R:
2729 if n.srcReg2 != asm.NilRegister || n.srcConst != 0 {
2730 return fmt.Errorf("offset for %s is not implemented", InstructionName(LD1R))
2731 }
2732
2733 var size, q byte
2734 switch n.vectorArrangement {
2735 case VectorArrangement8B:
2736 size, q = 0b00, 0b0
2737 case VectorArrangement16B:
2738 size, q = 0b00, 0b1
2739 case VectorArrangement4H:
2740 size, q = 0b01, 0b0
2741 case VectorArrangement8H:
2742 size, q = 0b01, 0b1
2743 case VectorArrangement2S:
2744 size, q = 0b10, 0b0
2745 case VectorArrangement4S:
2746 size, q = 0b10, 0b1
2747 case VectorArrangement1D:
2748 size, q = 0b11, 0b0
2749 case VectorArrangement2D:
2750 size, q = 0b11, 0b1
2751 }
2752
2753
2754
2755 buf.Append4Bytes(
2756 (srcBaseRegBits<<5)|dstVectorRegBits,
2757 0b11_000000|size<<2|srcBaseRegBits>>3,
2758 0b01_000000,
2759 q<<6|0b1101,
2760 )
2761 default:
2762 return errorEncodingUnsupported(n)
2763 }
2764 return
2765 }
2766
2767 func arrangementSizeQ(arr VectorArrangement) (size, q byte) {
2768 switch arr {
2769 case VectorArrangement8B:
2770 size, q = 0b00, 0
2771 case VectorArrangement16B:
2772 size, q = 0b00, 1
2773 case VectorArrangement4H:
2774 size, q = 0b01, 0
2775 case VectorArrangement8H:
2776 size, q = 0b01, 1
2777 case VectorArrangement2S:
2778 size, q = 0b10, 0
2779 case VectorArrangement4S:
2780 size, q = 0b10, 1
2781 case VectorArrangement1D:
2782 size, q = 0b11, 0
2783 case VectorArrangement2D:
2784 size, q = 0b11, 1
2785 }
2786 return
2787 }
2788
2789 func (a *AssemblerImpl) encodeVectorRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
2790 srcVectorRegBits, err := vectorRegisterBits(n.srcReg)
2791 if err != nil {
2792 return err
2793 }
2794
2795 dstBaseRegBits, err := intRegisterBits(n.dstReg)
2796 if err != nil {
2797 return err
2798 }
2799
2800 switch n.instruction {
2801 case VMOV:
2802
2803 var size, opcode byte
2804 var dataSize, dataSizeLog2 int64
2805 switch n.vectorArrangement {
2806 case VectorArrangementB:
2807 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b00, 1, 0
2808 case VectorArrangementH:
2809 size, opcode, dataSize, dataSizeLog2 = 0b01, 0b00, 2, 1
2810 case VectorArrangementS:
2811 size, opcode, dataSize, dataSizeLog2 = 0b10, 0b00, 4, 2
2812 case VectorArrangementD:
2813 size, opcode, dataSize, dataSizeLog2 = 0b11, 0b00, 8, 3
2814 case VectorArrangementQ:
2815 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b10, 16, 4
2816 }
2817 const v = 1
2818
2819 if n.dstReg2 != asm.NilRegister {
2820 offsetRegBits, err := intRegisterBits(n.dstReg2)
2821 if err != nil {
2822 return err
2823 }
2824 a.encodeLoadOrStoreWithRegisterOffset(buf, dstBaseRegBits, offsetRegBits, srcVectorRegBits, opcode, size, v)
2825 } else {
2826 err = a.encodeLoadOrStoreWithConstOffset(buf, dstBaseRegBits, srcVectorRegBits,
2827 n.dstConst, opcode, size, v, dataSize, dataSizeLog2)
2828 }
2829 default:
2830 return errorEncodingUnsupported(n)
2831 }
2832 return
2833 }
2834
2835 func (a *AssemblerImpl) encodeStaticConstToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
2836 if n.instruction != VMOV {
2837 return errorEncodingUnsupported(n)
2838 }
2839
2840 dstRegBits, err := vectorRegisterBits(n.dstReg)
2841 if err != nil {
2842 return err
2843 }
2844
2845
2846
2847 var opc byte
2848 var constLength int
2849 switch n.vectorArrangement {
2850 case VectorArrangementS:
2851 opc, constLength = 0b00, 4
2852 case VectorArrangementD:
2853 opc, constLength = 0b01, 8
2854 case VectorArrangementQ:
2855 opc, constLength = 0b10, 16
2856 }
2857
2858 loadLiteralOffsetInBinary := uint64(buf.Len())
2859 a.pool.AddConst(n.staticConst, loadLiteralOffsetInBinary)
2860
2861 if len(n.staticConst.Raw) != constLength {
2862 return fmt.Errorf("invalid const length for %s: want %d but was %d",
2863 n.vectorArrangement, constLength, len(n.staticConst.Raw))
2864 }
2865
2866 buf.Append4Bytes(dstRegBits, 0x0, 0x0, opc<<6|0b11100)
2867 n.staticConst.AddOffsetFinalizedCallback(func(offsetOfConst uint64) {
2868
2869 offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4
2870 bin := buf.Bytes()
2871 bin[loadLiteralOffsetInBinary] |= byte(offset << 5)
2872 bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3)
2873 bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11)
2874 })
2875 return
2876 }
2877
2878
2879
2880 var advancedSIMDTwoRegisterMisc = map[asm.Instruction]struct {
2881 qAndSize map[VectorArrangement]qAndSize
2882 u, opcode byte
2883 }{
2884
2885 NOT: {
2886 u: 0b1, opcode: 0b00101,
2887 qAndSize: map[VectorArrangement]qAndSize{
2888 VectorArrangement16B: {size: 0b00, q: 0b1},
2889 VectorArrangement8B: {size: 0b00, q: 0b0},
2890 },
2891 },
2892
2893 VFNEG: {
2894 u: 0b1, opcode: 0b01111,
2895 qAndSize: map[VectorArrangement]qAndSize{
2896 VectorArrangement4S: {size: 0b10, q: 0b1},
2897 VectorArrangement2S: {size: 0b10, q: 0b0},
2898 VectorArrangement2D: {size: 0b11, q: 0b1},
2899 },
2900 },
2901
2902 VFABS: {u: 0, opcode: 0b01111, qAndSize: map[VectorArrangement]qAndSize{
2903 VectorArrangement2D: {size: 0b11, q: 0b1},
2904 VectorArrangement4S: {size: 0b10, q: 0b1},
2905 VectorArrangement2S: {size: 0b10, q: 0b0},
2906 }},
2907
2908 VFSQRT: {u: 1, opcode: 0b11111, qAndSize: map[VectorArrangement]qAndSize{
2909 VectorArrangement2D: {size: 0b11, q: 0b1},
2910 VectorArrangement4S: {size: 0b10, q: 0b1},
2911 VectorArrangement2S: {size: 0b10, q: 0b0},
2912 }},
2913
2914 VFRINTM: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{
2915 VectorArrangement2D: {size: 0b01, q: 0b1},
2916 VectorArrangement4S: {size: 0b00, q: 0b1},
2917 VectorArrangement2S: {size: 0b00, q: 0b0},
2918 }},
2919
2920 VFRINTN: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{
2921 VectorArrangement2D: {size: 0b01, q: 0b1},
2922 VectorArrangement4S: {size: 0b00, q: 0b1},
2923 VectorArrangement2S: {size: 0b00, q: 0b0},
2924 }},
2925
2926 VFRINTP: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{
2927 VectorArrangement2D: {size: 0b11, q: 0b1},
2928 VectorArrangement4S: {size: 0b10, q: 0b1},
2929 VectorArrangement2S: {size: 0b10, q: 0b0},
2930 }},
2931
2932 VFRINTZ: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{
2933 VectorArrangement2D: {size: 0b11, q: 0b1},
2934 VectorArrangement4S: {size: 0b10, q: 0b1},
2935 VectorArrangement2S: {size: 0b10, q: 0b0},
2936 }},
2937
2938 VCNT: {u: 0b0, opcode: 0b00101, qAndSize: map[VectorArrangement]qAndSize{
2939 VectorArrangement8B: {size: 0b00, q: 0b0},
2940 VectorArrangement16B: {size: 0b00, q: 0b1},
2941 }},
2942
2943 VNEG: {u: 0b1, opcode: 0b01011, qAndSize: defaultQAndSize},
2944
2945 VABS: {u: 0b0, opcode: 0b01011, qAndSize: defaultQAndSize},
2946
2947 REV64: {u: 0b0, opcode: 0b00000, qAndSize: defaultQAndSize},
2948
2949 XTN: {u: 0b0, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{
2950 VectorArrangement2D: {q: 0, size: 0b10},
2951 VectorArrangement4S: {q: 0, size: 0b01},
2952 VectorArrangement8H: {q: 0, size: 0b00},
2953 }},
2954 SHLL: {u: 0b1, opcode: 0b10011, qAndSize: map[VectorArrangement]qAndSize{
2955 VectorArrangement8B: {q: 0b00, size: 0b00},
2956 VectorArrangement4H: {q: 0b00, size: 0b01},
2957 VectorArrangement2S: {q: 0b00, size: 0b10},
2958 }},
2959
2960 CMEQZERO: {u: 0b0, opcode: 0b01001, qAndSize: defaultQAndSize},
2961
2962 SADDLP: {u: 0b0, opcode: 0b00010, qAndSize: defaultQAndSize},
2963
2964 UADDLP: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize},
2965
2966 VFCVTZS: {u: 0b0, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{
2967 VectorArrangement4S: {size: 0b10, q: 0b1},
2968 VectorArrangement2S: {size: 0b10, q: 0b0},
2969 VectorArrangement2D: {size: 0b11, q: 0b1},
2970 }},
2971
2972 VFCVTZU: {u: 0b1, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{
2973 VectorArrangement4S: {size: 0b10, q: 0b1},
2974 VectorArrangement2S: {size: 0b10, q: 0b0},
2975 VectorArrangement2D: {size: 0b11, q: 0b1},
2976 }},
2977
2978 SQXTN: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{
2979 VectorArrangement8B: {q: 0b0, size: 0b00},
2980 VectorArrangement4H: {q: 0b0, size: 0b01},
2981 VectorArrangement2S: {q: 0b0, size: 0b10},
2982 }},
2983
2984
2985 SQXTN2: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{
2986 VectorArrangement16B: {q: 0b1, size: 0b00},
2987 VectorArrangement8H: {q: 0b1, size: 0b01},
2988 VectorArrangement4S: {q: 0b1, size: 0b10},
2989 }},
2990
2991 UQXTN: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize},
2992
2993 SQXTUN: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{
2994 VectorArrangement8B: {q: 0b0, size: 0b00},
2995 VectorArrangement4H: {q: 0b0, size: 0b01},
2996 VectorArrangement2S: {q: 0b0, size: 0b10},
2997 }},
2998
2999 SQXTUN2: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{
3000 VectorArrangement16B: {q: 0b1, size: 0b00},
3001 VectorArrangement8H: {q: 0b1, size: 0b01},
3002 VectorArrangement4S: {q: 0b1, size: 0b10},
3003 }},
3004
3005 VSCVTF: {u: 0b0, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{
3006 VectorArrangement2D: {q: 0b1, size: 0b01},
3007 VectorArrangement4S: {q: 0b1, size: 0b00},
3008 VectorArrangement2S: {q: 0b0, size: 0b00},
3009 }},
3010
3011 VUCVTF: {u: 0b1, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{
3012 VectorArrangement2D: {q: 0b1, size: 0b01},
3013 VectorArrangement4S: {q: 0b1, size: 0b00},
3014 VectorArrangement2S: {q: 0b0, size: 0b00},
3015 }},
3016
3017 FCVTL: {u: 0b0, opcode: 0b10111, qAndSize: map[VectorArrangement]qAndSize{
3018 VectorArrangement2S: {size: 0b01, q: 0b0},
3019 VectorArrangement4H: {size: 0b00, q: 0b0},
3020 }},
3021
3022 FCVTN: {u: 0b0, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{
3023 VectorArrangement2S: {size: 0b01, q: 0b0},
3024 VectorArrangement4H: {size: 0b00, q: 0b0},
3025 }},
3026 }
3027
3028
3029
3030 var advancedSIMDThreeDifferent = map[asm.Instruction]struct {
3031 qAndSize map[VectorArrangement]qAndSize
3032 u, opcode byte
3033 }{
3034
3035 VUMLAL: {u: 0b1, opcode: 0b1000, qAndSize: map[VectorArrangement]qAndSize{
3036 VectorArrangement2S: {q: 0b0, size: 0b10},
3037 VectorArrangement4H: {q: 0b0, size: 0b01},
3038 VectorArrangement8B: {q: 0b0, size: 0b00},
3039 }},
3040
3041 SMULL: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
3042 VectorArrangement8B: {q: 0b0, size: 0b00},
3043 VectorArrangement4H: {q: 0b0, size: 0b01},
3044 VectorArrangement2S: {q: 0b0, size: 0b10},
3045 }},
3046
3047 SMULL2: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
3048 VectorArrangement16B: {q: 0b1, size: 0b00},
3049 VectorArrangement8H: {q: 0b1, size: 0b01},
3050 VectorArrangement4S: {q: 0b1, size: 0b10},
3051 }},
3052
3053 UMULL: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
3054 VectorArrangement8B: {q: 0b0, size: 0b00},
3055 VectorArrangement4H: {q: 0b0, size: 0b01},
3056 VectorArrangement2S: {q: 0b0, size: 0b10},
3057 }},
3058
3059 UMULL2: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
3060 VectorArrangement16B: {q: 0b1, size: 0b00},
3061 VectorArrangement8H: {q: 0b1, size: 0b01},
3062 VectorArrangement4S: {q: 0b1, size: 0b10},
3063 }},
3064 }
3065
3066
3067
3068 var advancedSIMDThreeSame = map[asm.Instruction]struct {
3069 qAndSize map[VectorArrangement]qAndSize
3070 u, opcode byte
3071 }{
3072
3073 VAND: {
3074 u: 0b0, opcode: 0b00011,
3075 qAndSize: map[VectorArrangement]qAndSize{
3076 VectorArrangement16B: {size: 0b00, q: 0b1},
3077 VectorArrangement8B: {size: 0b00, q: 0b0},
3078 },
3079 },
3080
3081 BSL: {
3082 u: 0b1, opcode: 0b00011,
3083 qAndSize: map[VectorArrangement]qAndSize{
3084 VectorArrangement16B: {size: 0b01, q: 0b1},
3085 VectorArrangement8B: {size: 0b01, q: 0b0},
3086 },
3087 },
3088
3089 EOR: {
3090 u: 0b1, opcode: 0b00011,
3091 qAndSize: map[VectorArrangement]qAndSize{
3092 VectorArrangement16B: {size: 0b00, q: 0b1},
3093 VectorArrangement8B: {size: 0b00, q: 0b0},
3094 },
3095 },
3096
3097 VORR: {
3098 u: 0b0, opcode: 0b00011,
3099 qAndSize: map[VectorArrangement]qAndSize{
3100 VectorArrangement16B: {size: 0b10, q: 0b1},
3101 VectorArrangement8B: {size: 0b10, q: 0b0},
3102 },
3103 },
3104
3105 BIC: {
3106 u: 0b0, opcode: 0b00011,
3107 qAndSize: map[VectorArrangement]qAndSize{
3108 VectorArrangement16B: {size: 0b01, q: 0b1},
3109 VectorArrangement8B: {size: 0b01, q: 0b0},
3110 },
3111 },
3112
3113 VFADDS: {
3114 u: 0b0, opcode: 0b11010,
3115 qAndSize: map[VectorArrangement]qAndSize{
3116 VectorArrangement4S: {size: 0b00, q: 0b1},
3117 VectorArrangement2S: {size: 0b00, q: 0b0},
3118 },
3119 },
3120
3121 VFADDD: {
3122 u: 0b0, opcode: 0b11010,
3123 qAndSize: map[VectorArrangement]qAndSize{
3124 VectorArrangement2D: {size: 0b01, q: 0b1},
3125 },
3126 },
3127
3128 VFSUBS: {
3129 u: 0b0, opcode: 0b11010,
3130 qAndSize: map[VectorArrangement]qAndSize{
3131 VectorArrangement4S: {size: 0b10, q: 0b1},
3132 VectorArrangement2S: {size: 0b10, q: 0b0},
3133 },
3134 },
3135
3136 VFSUBD: {
3137 u: 0b0, opcode: 0b11010,
3138 qAndSize: map[VectorArrangement]qAndSize{
3139 VectorArrangement2D: {size: 0b11, q: 0b1},
3140 },
3141 },
3142
3143 UMAXP: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize},
3144
3145 CMEQ: {u: 0b1, opcode: 0b10001, qAndSize: defaultQAndSize},
3146
3147 VADDP: {u: 0b0, opcode: 0b10111, qAndSize: defaultQAndSize},
3148
3149 VADD: {u: 0, opcode: 0b10000, qAndSize: defaultQAndSize},
3150
3151 VSUB: {u: 1, opcode: 0b10000, qAndSize: defaultQAndSize},
3152
3153 SSHL: {u: 0, opcode: 0b01000, qAndSize: defaultQAndSize},
3154
3155 USHL: {u: 0b1, opcode: 0b01000, qAndSize: defaultQAndSize},
3156
3157 CMGT: {u: 0b0, opcode: 0b00110, qAndSize: defaultQAndSize},
3158
3159 CMHI: {u: 0b1, opcode: 0b00110, qAndSize: defaultQAndSize},
3160
3161 CMGE: {u: 0b0, opcode: 0b00111, qAndSize: defaultQAndSize},
3162
3163 CMHS: {u: 0b1, opcode: 0b00111, qAndSize: defaultQAndSize},
3164
3165 FCMEQ: {
3166 u: 0b0, opcode: 0b11100,
3167 qAndSize: map[VectorArrangement]qAndSize{
3168 VectorArrangement4S: {size: 0b00, q: 0b1},
3169 VectorArrangement2S: {size: 0b00, q: 0b0},
3170 VectorArrangement2D: {size: 0b01, q: 0b1},
3171 },
3172 },
3173
3174 FCMGT: {
3175 u: 0b1, opcode: 0b11100,
3176 qAndSize: map[VectorArrangement]qAndSize{
3177 VectorArrangement4S: {size: 0b10, q: 0b1},
3178 VectorArrangement2S: {size: 0b10, q: 0b0},
3179 VectorArrangement2D: {size: 0b11, q: 0b1},
3180 },
3181 },
3182
3183 FCMGE: {
3184 u: 0b1, opcode: 0b11100,
3185 qAndSize: map[VectorArrangement]qAndSize{
3186 VectorArrangement4S: {size: 0b00, q: 0b1},
3187 VectorArrangement2S: {size: 0b00, q: 0b0},
3188 VectorArrangement2D: {size: 0b01, q: 0b1},
3189 },
3190 },
3191
3192 VFMIN: {
3193 u: 0b0, opcode: 0b11110,
3194 qAndSize: map[VectorArrangement]qAndSize{
3195 VectorArrangement4S: {size: 0b10, q: 0b1},
3196 VectorArrangement2S: {size: 0b10, q: 0b0},
3197 VectorArrangement2D: {size: 0b11, q: 0b1},
3198 },
3199 },
3200
3201 VFMAX: {
3202 u: 0b0, opcode: 0b11110,
3203 qAndSize: map[VectorArrangement]qAndSize{
3204 VectorArrangement4S: {size: 0b00, q: 0b1},
3205 VectorArrangement2S: {size: 0b00, q: 0b0},
3206 VectorArrangement2D: {size: 0b01, q: 0b1},
3207 },
3208 },
3209
3210 VFMUL: {
3211 u: 0b1, opcode: 0b11011,
3212 qAndSize: map[VectorArrangement]qAndSize{
3213 VectorArrangement4S: {size: 0b00, q: 0b1},
3214 VectorArrangement2S: {size: 0b00, q: 0b0},
3215 VectorArrangement2D: {size: 0b01, q: 0b1},
3216 },
3217 },
3218
3219 VFDIV: {
3220 u: 0b1, opcode: 0b11111,
3221 qAndSize: map[VectorArrangement]qAndSize{
3222 VectorArrangement4S: {size: 0b00, q: 0b1},
3223 VectorArrangement2S: {size: 0b00, q: 0b0},
3224 VectorArrangement2D: {size: 0b01, q: 0b1},
3225 },
3226 },
3227
3228 VMUL: {u: 0b0, opcode: 0b10011, qAndSize: defaultQAndSize},
3229
3230 VSQADD: {u: 0b0, opcode: 0b00001, qAndSize: defaultQAndSize},
3231
3232 VUQADD: {u: 0b1, opcode: 0b00001, qAndSize: defaultQAndSize},
3233
3234 SMIN: {u: 0b0, opcode: 0b01101, qAndSize: defaultQAndSize},
3235
3236 SMAX: {u: 0b0, opcode: 0b01100, qAndSize: defaultQAndSize},
3237
3238 UMIN: {u: 0b1, opcode: 0b01101, qAndSize: defaultQAndSize},
3239
3240 UMAX: {u: 0b1, opcode: 0b01100, qAndSize: defaultQAndSize},
3241
3242 URHADD: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize},
3243
3244 VSQSUB: {u: 0b0, opcode: 0b00101, qAndSize: defaultQAndSize},
3245
3246 VUQSUB: {u: 0b1, opcode: 0b00101, qAndSize: defaultQAndSize},
3247
3248 VBIT: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{
3249 VectorArrangement8B: {q: 0b0, size: 0b10},
3250 VectorArrangement16B: {q: 0b1, size: 0b10},
3251 }},
3252 SQRDMULH: {u: 0b1, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{
3253 VectorArrangement4H: {q: 0b0, size: 0b01},
3254 VectorArrangement8H: {q: 0b1, size: 0b01},
3255 VectorArrangement2S: {q: 0b0, size: 0b10},
3256 VectorArrangement4S: {q: 0b1, size: 0b10},
3257 }},
3258 }
3259
3260
3261 type qAndSize struct{ q, size byte }
3262
3263
3264 var defaultQAndSize = map[VectorArrangement]qAndSize{
3265 VectorArrangement8B: {size: 0b00, q: 0b0},
3266 VectorArrangement16B: {size: 0b00, q: 0b1},
3267 VectorArrangement4H: {size: 0b01, q: 0b0},
3268 VectorArrangement8H: {size: 0b01, q: 0b1},
3269 VectorArrangement2S: {size: 0b10, q: 0b0},
3270 VectorArrangement4S: {size: 0b10, q: 0b1},
3271 VectorArrangement1D: {size: 0b11, q: 0b0},
3272 VectorArrangement2D: {size: 0b11, q: 0b1},
3273 }
3274
3275
3276
3277 var advancedSIMDAcrossLanes = map[asm.Instruction]struct {
3278 qAndSize map[VectorArrangement]qAndSize
3279 u, opcode byte
3280 }{
3281
3282 ADDV: {
3283 u: 0b0, opcode: 0b11011,
3284 qAndSize: map[VectorArrangement]qAndSize{
3285 VectorArrangement16B: {size: 0b00, q: 0b1},
3286 VectorArrangement8B: {size: 0b00, q: 0b0},
3287 VectorArrangement8H: {size: 0b01, q: 0b1},
3288 VectorArrangement4H: {size: 0b01, q: 0b0},
3289 VectorArrangement4S: {size: 0b10, q: 0b1},
3290 },
3291 },
3292
3293 UMINV: {
3294 u: 0b1, opcode: 0b11010,
3295 qAndSize: map[VectorArrangement]qAndSize{
3296 VectorArrangement16B: {size: 0b00, q: 0b1},
3297 VectorArrangement8B: {size: 0b00, q: 0b0},
3298 VectorArrangement8H: {size: 0b01, q: 0b1},
3299 VectorArrangement4H: {size: 0b01, q: 0b0},
3300 VectorArrangement4S: {size: 0b10, q: 0b1},
3301 },
3302 },
3303 UADDLV: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{
3304 VectorArrangement16B: {size: 0b00, q: 0b1},
3305 VectorArrangement8B: {size: 0b00, q: 0b0},
3306 VectorArrangement8H: {size: 0b01, q: 0b1},
3307 VectorArrangement4H: {size: 0b01, q: 0b0},
3308 VectorArrangement4S: {size: 0b10, q: 0b1},
3309 }},
3310 }
3311
3312
3313
3314 var advancedSIMDScalarPairwise = map[asm.Instruction]struct {
3315 size map[VectorArrangement]byte
3316 u, opcode byte
3317 }{
3318
3319 ADDP: {u: 0b0, opcode: 0b11011, size: map[VectorArrangement]byte{VectorArrangement2D: 0b11}},
3320 }
3321
3322
3323
3324 var advancedSIMDCopy = map[asm.Instruction]struct {
3325
3326 resolver func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error)
3327 op byte
3328 }{
3329
3330 DUPELEM: {op: 0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
3331 imm4 = 0b0000
3332 q = 0b1
3333
3334 switch arr {
3335 case VectorArrangementB:
3336 imm5 |= 0b1
3337 imm5 |= byte(srcIndex) << 1
3338 case VectorArrangementH:
3339 imm5 |= 0b10
3340 imm5 |= byte(srcIndex) << 2
3341 case VectorArrangementS:
3342 imm5 |= 0b100
3343 imm5 |= byte(srcIndex) << 3
3344 case VectorArrangementD:
3345 imm5 |= 0b1000
3346 imm5 |= byte(srcIndex) << 4
3347 default:
3348 err = fmt.Errorf("unsupported arrangement for DUPELEM: %d", arr)
3349 }
3350
3351 return
3352 }},
3353
3354 DUPGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
3355 imm4 = 0b0001
3356 switch arr {
3357 case VectorArrangement8B:
3358 imm5 = 0b1
3359 case VectorArrangement16B:
3360 imm5 = 0b1
3361 q = 0b1
3362 case VectorArrangement4H:
3363 imm5 = 0b10
3364 case VectorArrangement8H:
3365 imm5 = 0b10
3366 q = 0b1
3367 case VectorArrangement2S:
3368 imm5 = 0b100
3369 case VectorArrangement4S:
3370 imm5 = 0b100
3371 q = 0b1
3372 case VectorArrangement2D:
3373 imm5 = 0b1000
3374 q = 0b1
3375 default:
3376 err = fmt.Errorf("unsupported arrangement for DUPGEN: %s", arr)
3377 }
3378 return
3379 }},
3380
3381 INSGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
3382 imm4, q = 0b0011, 0b1
3383 switch arr {
3384 case VectorArrangementB:
3385 imm5 |= 0b1
3386 imm5 |= byte(dstIndex) << 1
3387 case VectorArrangementH:
3388 imm5 |= 0b10
3389 imm5 |= byte(dstIndex) << 2
3390 case VectorArrangementS:
3391 imm5 |= 0b100
3392 imm5 |= byte(dstIndex) << 3
3393 case VectorArrangementD:
3394 imm5 |= 0b1000
3395 imm5 |= byte(dstIndex) << 4
3396 default:
3397 err = fmt.Errorf("unsupported arrangement for INSGEN: %s", arr)
3398 }
3399 return
3400 }},
3401
3402 UMOV: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
3403 imm4 = 0b0111
3404 switch arr {
3405 case VectorArrangementB:
3406 imm5 |= 0b1
3407 imm5 |= byte(srcIndex) << 1
3408 case VectorArrangementH:
3409 imm5 |= 0b10
3410 imm5 |= byte(srcIndex) << 2
3411 case VectorArrangementS:
3412 imm5 |= 0b100
3413 imm5 |= byte(srcIndex) << 3
3414 case VectorArrangementD:
3415 imm5 |= 0b1000
3416 imm5 |= byte(srcIndex) << 4
3417 q = 0b1
3418 default:
3419 err = fmt.Errorf("unsupported arrangement for UMOV: %s", arr)
3420 }
3421 return
3422 }},
3423
3424 SMOV32: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
3425 imm4 = 0b0101
3426 switch arr {
3427 case VectorArrangementB:
3428 imm5 |= 0b1
3429 imm5 |= byte(srcIndex) << 1
3430 case VectorArrangementH:
3431 imm5 |= 0b10
3432 imm5 |= byte(srcIndex) << 2
3433 default:
3434 err = fmt.Errorf("unsupported arrangement for SMOV32: %s", arr)
3435 }
3436 return
3437 }},
3438
3439 INSELEM: {op: 0b1, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
3440 q = 0b1
3441 switch arr {
3442 case VectorArrangementB:
3443 imm5 |= 0b1
3444 imm5 |= byte(dstIndex) << 1
3445 imm4 = byte(srcIndex)
3446 case VectorArrangementH:
3447 imm5 |= 0b10
3448 imm5 |= byte(dstIndex) << 2
3449 imm4 = byte(srcIndex) << 1
3450 case VectorArrangementS:
3451 imm5 |= 0b100
3452 imm5 |= byte(dstIndex) << 3
3453 imm4 = byte(srcIndex) << 2
3454 case VectorArrangementD:
3455 imm5 |= 0b1000
3456 imm5 |= byte(dstIndex) << 4
3457 imm4 = byte(srcIndex) << 3
3458 default:
3459 err = fmt.Errorf("unsupported arrangement for INSELEM: %d", arr)
3460 }
3461 return
3462 }},
3463 }
3464
3465
3466
3467 var advancedSIMDTableLookup = map[asm.Instruction]struct {
3468 q map[VectorArrangement]byte
3469 op, op2, Len byte
3470 }{
3471 TBL1: {op: 0, op2: 0, Len: 0b00, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}},
3472 TBL2: {op: 0, op2: 0, Len: 0b01, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}},
3473 }
3474
3475
3476
3477 var advancedSIMDShiftByImmediate = map[asm.Instruction]struct {
3478 q map[VectorArrangement]byte
3479 immResolver func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error)
3480 U, opcode byte
3481 }{
3482
3483 SSHLL: {
3484 U: 0b0, opcode: 0b10100,
3485 q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0},
3486 immResolver: immResolverForSIMDSiftLeftByImmediate,
3487 },
3488
3489 SSHLL2: {
3490 U: 0b0, opcode: 0b10100,
3491 q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1},
3492 immResolver: immResolverForSIMDSiftLeftByImmediate,
3493 },
3494
3495 USHLL: {
3496 U: 0b1, opcode: 0b10100,
3497 q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0},
3498 immResolver: immResolverForSIMDSiftLeftByImmediate,
3499 },
3500
3501 USHLL2: {
3502 U: 0b1, opcode: 0b10100,
3503 q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1},
3504 immResolver: immResolverForSIMDSiftLeftByImmediate,
3505 },
3506
3507 SSHR: {
3508 U: 0b0, opcode: 0b00000,
3509 q: map[VectorArrangement]byte{
3510 VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1, VectorArrangement2D: 0b1,
3511 VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0,
3512 },
3513 immResolver: func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) {
3514 switch arr {
3515 case VectorArrangement16B, VectorArrangement8B:
3516 immh = 0b0001
3517 immb = 8 - byte(shiftAmount&0b111)
3518 case VectorArrangement8H, VectorArrangement4H:
3519 v := 16 - byte(shiftAmount&0b1111)
3520 immb = v & 0b111
3521 immh = 0b0010 | (v >> 3)
3522 case VectorArrangement4S, VectorArrangement2S:
3523 v := 32 - byte(shiftAmount&0b11111)
3524 immb = v & 0b111
3525 immh = 0b0100 | (v >> 3)
3526 case VectorArrangement2D:
3527 v := 64 - byte(shiftAmount&0b111111)
3528 immb = v & 0b111
3529 immh = 0b1000 | (v >> 3)
3530 default:
3531 err = fmt.Errorf("unsupported arrangement %s", arr)
3532 }
3533 return
3534 },
3535 },
3536 }
3537
3538
3539
3540 var advancedSIMDPermute = map[asm.Instruction]struct {
3541 opcode byte
3542 }{
3543 ZIP1: {opcode: 0b011},
3544 }
3545
3546 func immResolverForSIMDSiftLeftByImmediate(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) {
3547 switch arr {
3548 case VectorArrangement16B, VectorArrangement8B:
3549 immb = byte(shiftAmount)
3550 immh = 0b0001
3551 case VectorArrangement8H, VectorArrangement4H:
3552 immb = byte(shiftAmount) & 0b111
3553 immh = 0b0010 | byte(shiftAmount>>3)
3554 case VectorArrangement4S, VectorArrangement2S:
3555 immb = byte(shiftAmount) & 0b111
3556 immh = 0b0100 | byte(shiftAmount>>3)
3557 default:
3558 err = fmt.Errorf("unsupported arrangement %s", arr)
3559 }
3560 return
3561 }
3562
3563
3564
3565 func (a *AssemblerImpl) encodeAdvancedSIMDCopy(buf asm.Buffer, srcRegBits, dstRegBits, op, imm5, imm4, q byte) {
3566 buf.Append4Bytes(
3567 (srcRegBits<<5)|dstRegBits,
3568 imm4<<3|0b1<<2|srcRegBits>>3,
3569 imm5,
3570 q<<6|op<<5|0b1110,
3571 )
3572 }
3573
3574
3575
3576 func (a *AssemblerImpl) encodeAdvancedSIMDThreeSame(buf asm.Buffer, src1, src2, dst, opcode, size, q, u byte) {
3577 buf.Append4Bytes(
3578 (src2<<5)|dst,
3579 opcode<<3|1<<2|src2>>3,
3580 size<<6|0b1<<5|src1,
3581 q<<6|u<<5|0b1110,
3582 )
3583 }
3584
3585
3586
3587 func (a *AssemblerImpl) encodeAdvancedSIMDThreeDifferent(buf asm.Buffer, src1, src2, dst, opcode, size, q, u byte) {
3588 buf.Append4Bytes(
3589 (src2<<5)|dst,
3590 opcode<<4|src2>>3,
3591 size<<6|0b1<<5|src1,
3592 q<<6|u<<5|0b1110,
3593 )
3594 }
3595
3596
3597
3598 func (a *AssemblerImpl) encodeAdvancedSIMDPermute(buf asm.Buffer, src1, src2, dst, opcode, size, q byte) {
3599 buf.Append4Bytes(
3600 (src2<<5)|dst,
3601 opcode<<4|0b1<<3|src2>>3,
3602 size<<6|src1,
3603 q<<6|0b1110,
3604 )
3605 }
3606
3607 func (a *AssemblerImpl) encodeVectorRegisterToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
3608 var srcVectorRegBits byte
3609 if n.srcReg != RegRZR {
3610 srcVectorRegBits, err = vectorRegisterBits(n.srcReg)
3611 } else if n.instruction == CMEQZERO {
3612
3613 srcVectorRegBits, err = vectorRegisterBits(n.dstReg)
3614 }
3615
3616 if err != nil {
3617 return err
3618 }
3619
3620 dstVectorRegBits, err := vectorRegisterBits(n.dstReg)
3621 if err != nil {
3622 return err
3623 }
3624
3625 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok {
3626 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement)
3627 if err != nil {
3628 return err
3629 }
3630 a.encodeAdvancedSIMDCopy(buf, srcVectorRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q)
3631 return nil
3632 }
3633
3634 if scalarPairwise, ok := advancedSIMDScalarPairwise[n.instruction]; ok {
3635
3636
3637 size, ok := scalarPairwise.size[n.vectorArrangement]
3638 if !ok {
3639 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3640 }
3641 buf.Append4Bytes(
3642 (srcVectorRegBits<<5)|dstVectorRegBits,
3643 scalarPairwise.opcode<<4|1<<3|srcVectorRegBits>>3,
3644 size<<6|0b11<<4|scalarPairwise.opcode>>4,
3645 0b1<<6|scalarPairwise.u<<5|0b11110,
3646 )
3647 return
3648 }
3649
3650 if twoRegMisc, ok := advancedSIMDTwoRegisterMisc[n.instruction]; ok {
3651
3652
3653 qs, ok := twoRegMisc.qAndSize[n.vectorArrangement]
3654 if !ok {
3655 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3656 }
3657 buf.Append4Bytes(
3658 (srcVectorRegBits<<5)|dstVectorRegBits,
3659 twoRegMisc.opcode<<4|0b1<<3|srcVectorRegBits>>3,
3660 qs.size<<6|0b1<<5|twoRegMisc.opcode>>4,
3661 qs.q<<6|twoRegMisc.u<<5|0b01110,
3662 )
3663 return nil
3664 }
3665
3666 if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok {
3667 qs, ok := threeSame.qAndSize[n.vectorArrangement]
3668 if !ok {
3669 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3670 }
3671 a.encodeAdvancedSIMDThreeSame(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u)
3672 return nil
3673 }
3674
3675 if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok {
3676 qs, ok := threeDifferent.qAndSize[n.vectorArrangement]
3677 if !ok {
3678 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3679 }
3680 a.encodeAdvancedSIMDThreeDifferent(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u)
3681 return nil
3682 }
3683
3684 if acrossLanes, ok := advancedSIMDAcrossLanes[n.instruction]; ok {
3685
3686
3687 qs, ok := acrossLanes.qAndSize[n.vectorArrangement]
3688 if !ok {
3689 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3690 }
3691 buf.Append4Bytes(
3692 (srcVectorRegBits<<5)|dstVectorRegBits,
3693 acrossLanes.opcode<<4|0b1<<3|srcVectorRegBits>>3,
3694 qs.size<<6|0b11000<<1|acrossLanes.opcode>>4,
3695 qs.q<<6|acrossLanes.u<<5|0b01110,
3696 )
3697 return nil
3698 }
3699
3700 if lookup, ok := advancedSIMDTableLookup[n.instruction]; ok {
3701 q, ok := lookup.q[n.vectorArrangement]
3702 if !ok {
3703 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3704 }
3705 buf.Append4Bytes(
3706 (srcVectorRegBits<<5)|dstVectorRegBits,
3707 lookup.Len<<5|lookup.op<<4|srcVectorRegBits>>3,
3708 lookup.op2<<6|dstVectorRegBits,
3709 q<<6|0b1110,
3710 )
3711 return
3712 }
3713
3714 if shiftByImmediate, ok := advancedSIMDShiftByImmediate[n.instruction]; ok {
3715 immh, immb, err := shiftByImmediate.immResolver(n.srcConst, n.vectorArrangement)
3716 if err != nil {
3717 return err
3718 }
3719
3720 q, ok := shiftByImmediate.q[n.vectorArrangement]
3721 if !ok {
3722 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3723 }
3724
3725 buf.Append4Bytes(
3726 (srcVectorRegBits<<5)|dstVectorRegBits,
3727 shiftByImmediate.opcode<<3|0b1<<2|srcVectorRegBits>>3,
3728 immh<<3|immb,
3729 q<<6|shiftByImmediate.U<<5|0b1111,
3730 )
3731 return nil
3732 }
3733
3734 if permute, ok := advancedSIMDPermute[n.instruction]; ok {
3735 size, q := arrangementSizeQ(n.vectorArrangement)
3736 a.encodeAdvancedSIMDPermute(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, permute.opcode, size, q)
3737 return
3738 }
3739 return errorEncodingUnsupported(n)
3740 }
3741
3742 func (a *AssemblerImpl) encodeTwoVectorRegistersToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
3743 var srcRegBits, srcRegBits2, dstRegBits byte
3744 srcRegBits, err = vectorRegisterBits(n.srcReg)
3745 if err != nil {
3746 return err
3747 }
3748
3749 srcRegBits2, err = vectorRegisterBits(n.srcReg2)
3750 if err != nil {
3751 return err
3752 }
3753
3754 dstRegBits, err = vectorRegisterBits(n.dstReg)
3755 if err != nil {
3756 return err
3757 }
3758
3759 if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok {
3760 qs, ok := threeSame.qAndSize[n.vectorArrangement]
3761 if !ok {
3762 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3763 }
3764 a.encodeAdvancedSIMDThreeSame(buf, srcRegBits, srcRegBits2, dstRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u)
3765 return nil
3766 }
3767
3768 if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok {
3769 qs, ok := threeDifferent.qAndSize[n.vectorArrangement]
3770 if !ok {
3771 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
3772 }
3773 a.encodeAdvancedSIMDThreeDifferent(buf, srcRegBits, srcRegBits2, dstRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u)
3774 return nil
3775 }
3776
3777 if permute, ok := advancedSIMDPermute[n.instruction]; ok {
3778 size, q := arrangementSizeQ(n.vectorArrangement)
3779 a.encodeAdvancedSIMDPermute(buf, srcRegBits, srcRegBits2, dstRegBits, permute.opcode, size, q)
3780 return
3781 }
3782
3783 if n.instruction == EXT {
3784
3785
3786 var q, imm4 byte
3787 switch n.vectorArrangement {
3788 case VectorArrangement16B:
3789 imm4 = 0b1111 & byte(n.srcConst)
3790 q = 0b1
3791 case VectorArrangement8B:
3792 imm4 = 0b111 & byte(n.srcConst)
3793 default:
3794 return fmt.Errorf("invalid arrangement %s for EXT", n.vectorArrangement)
3795 }
3796 buf.Append4Bytes(
3797 (srcRegBits2<<5)|dstRegBits,
3798 imm4<<3|srcRegBits2>>3,
3799 srcRegBits,
3800 q<<6|0b101110,
3801 )
3802 return
3803 }
3804 return
3805 }
3806
3807 func (a *AssemblerImpl) encodeVectorRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
3808 if err = checkArrangementIndexPair(n.vectorArrangement, n.srcVectorIndex); err != nil {
3809 return
3810 }
3811
3812 srcVecRegBits, err := vectorRegisterBits(n.srcReg)
3813 if err != nil {
3814 return err
3815 }
3816
3817 dstRegBits, err := intRegisterBits(n.dstReg)
3818 if err != nil {
3819 return err
3820 }
3821
3822 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok {
3823 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement)
3824 if err != nil {
3825 return err
3826 }
3827 a.encodeAdvancedSIMDCopy(buf, srcVecRegBits, dstRegBits, simdCopy.op, imm5, imm4, q)
3828 return nil
3829 }
3830 return errorEncodingUnsupported(n)
3831 }
3832
3833 func (a *AssemblerImpl) encodeRegisterToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
3834 srcRegBits, err := intRegisterBits(n.srcReg)
3835 if err != nil {
3836 return err
3837 }
3838
3839 dstVectorRegBits, err := vectorRegisterBits(n.dstReg)
3840 if err != nil {
3841 return err
3842 }
3843
3844 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok {
3845 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement)
3846 if err != nil {
3847 return err
3848 }
3849 a.encodeAdvancedSIMDCopy(buf, srcRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q)
3850 return nil
3851 }
3852 return errorEncodingUnsupported(n)
3853 }
3854
3855 var zeroRegisterBits byte = 0b11111
3856
3857 func isIntRegister(r asm.Register) bool {
3858 return RegR0 <= r && r <= RegSP
3859 }
3860
3861 func isVectorRegister(r asm.Register) bool {
3862 return RegV0 <= r && r <= RegV31
3863 }
3864
3865 func isConditionalRegister(r asm.Register) bool {
3866 return RegCondEQ <= r && r <= RegCondNV
3867 }
3868
3869 func intRegisterBits(r asm.Register) (ret byte, err error) {
3870 if !isIntRegister(r) {
3871 err = fmt.Errorf("%s is not integer", RegisterName(r))
3872 } else if r == RegSP {
3873
3874 r = RegRZR
3875 }
3876 ret = byte(r - RegR0)
3877 return
3878 }
3879
3880 func vectorRegisterBits(r asm.Register) (ret byte, err error) {
3881 if !isVectorRegister(r) {
3882 err = fmt.Errorf("%s is not vector", RegisterName(r))
3883 } else {
3884 ret = byte(r - RegV0)
3885 }
3886 return
3887 }
3888
3889 func registerBits(r asm.Register) (ret byte) {
3890 if isIntRegister(r) {
3891 if r == RegSP {
3892
3893 r = RegRZR
3894 }
3895 ret = byte(r - RegR0)
3896 } else {
3897 ret = byte(r - RegV0)
3898 }
3899 return
3900 }
3901
View as plain text