1 package amd64
2
3 import (
4 "encoding/binary"
5 "errors"
6 "fmt"
7 "math"
8
9 "github.com/tetratelabs/wazero/internal/asm"
10 )
11
12
13 type nodeImpl struct {
14
15 jumpTarget *nodeImpl
16
17
18 prev, next *nodeImpl
19
20
21
22 forwardJumpOrigins *nodeImpl
23
24 staticConst *asm.StaticConst
25
26 dstConst asm.ConstantValue
27 offsetInBinary asm.NodeOffsetInBinary
28 srcConst asm.ConstantValue
29 instruction asm.Instruction
30
31
32
33 readInstructionAddressBeforeTargetInstruction asm.Instruction
34 flag nodeFlag
35 types operandTypes
36 srcReg, dstReg asm.Register
37 srcMemIndex, dstMemIndex asm.Register
38 srcMemScale, dstMemScale byte
39 arg byte
40
41
42
43
44 staticConstReferrersAdded bool
45 }
46
47 type nodeFlag byte
48
49 const (
50
51
52 nodeFlagInitializedForEncoding nodeFlag = 1 << iota
53 nodeFlagBackwardJump
54
55
56
57 nodeFlagShortForwardJump
58 )
59
60 func (n *nodeImpl) isInitializedForEncoding() bool {
61 return n.flag&nodeFlagInitializedForEncoding != 0
62 }
63
64 func (n *nodeImpl) isJumpNode() bool {
65 return n.jumpTarget != nil
66 }
67
68 func (n *nodeImpl) isBackwardJump() bool {
69 return n.isJumpNode() && (n.flag&nodeFlagBackwardJump != 0)
70 }
71
72 func (n *nodeImpl) isForwardJump() bool {
73 return n.isJumpNode() && (n.flag&nodeFlagBackwardJump == 0)
74 }
75
76 func (n *nodeImpl) isForwardShortJump() bool {
77 return n.isForwardJump() && n.flag&nodeFlagShortForwardJump != 0
78 }
79
80
81 func (n *nodeImpl) AssignJumpTarget(target asm.Node) {
82 n.jumpTarget = target.(*nodeImpl)
83 }
84
85
86 func (n *nodeImpl) AssignDestinationConstant(value asm.ConstantValue) {
87 n.dstConst = value
88 }
89
90
91 func (n *nodeImpl) AssignSourceConstant(value asm.ConstantValue) {
92 n.srcConst = value
93 }
94
95
96 func (n *nodeImpl) OffsetInBinary() asm.NodeOffsetInBinary {
97 return n.offsetInBinary
98 }
99
100
101
102
103
104
105 func (n *nodeImpl) String() (ret string) {
106 instName := InstructionName(n.instruction)
107 switch n.types {
108 case operandTypesNoneToNone:
109 ret = instName
110 case operandTypesNoneToRegister:
111 ret = fmt.Sprintf("%s %s", instName, RegisterName(n.dstReg))
112 case operandTypesNoneToMemory:
113 if n.dstMemIndex != asm.NilRegister {
114 ret = fmt.Sprintf("%s [%s + 0x%x + %s*0x%x]", instName,
115 RegisterName(n.dstReg), n.dstConst, RegisterName(n.dstMemIndex), n.dstMemScale)
116 } else {
117 ret = fmt.Sprintf("%s [%s + 0x%x]", instName, RegisterName(n.dstReg), n.dstConst)
118 }
119 case operandTypesNoneToBranch:
120 ret = fmt.Sprintf("%s {%v}", instName, n.jumpTarget)
121 case operandTypesRegisterToNone:
122 ret = fmt.Sprintf("%s %s", instName, RegisterName(n.srcReg))
123 case operandTypesRegisterToRegister:
124 ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg))
125 case operandTypesRegisterToMemory:
126 if n.dstMemIndex != asm.NilRegister {
127 ret = fmt.Sprintf("%s %s, [%s + 0x%x + %s*0x%x]", instName, RegisterName(n.srcReg),
128 RegisterName(n.dstReg), n.dstConst, RegisterName(n.dstMemIndex), n.dstMemScale)
129 } else {
130 ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.dstConst)
131 }
132 case operandTypesRegisterToConst:
133 ret = fmt.Sprintf("%s %s, 0x%x", instName, RegisterName(n.srcReg), n.dstConst)
134 case operandTypesMemoryToRegister:
135 if n.srcMemIndex != asm.NilRegister {
136 ret = fmt.Sprintf("%s [%s + %#x + %s*%#x], %s", instName,
137 RegisterName(n.srcReg), n.srcConst, RegisterName(n.srcMemIndex), n.srcMemScale, RegisterName(n.dstReg))
138 } else {
139 ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg))
140 }
141 case operandTypesMemoryToConst:
142 if n.srcMemIndex != asm.NilRegister {
143 ret = fmt.Sprintf("%s [%s + %#x + %s*0x%x], 0x%x", instName,
144 RegisterName(n.srcReg), n.srcConst, RegisterName(n.srcMemIndex), n.srcMemScale, n.dstConst)
145 } else {
146 ret = fmt.Sprintf("%s [%s + %#x], 0x%x", instName, RegisterName(n.srcReg), n.srcConst, n.dstConst)
147 }
148 case operandTypesConstToMemory:
149 if n.dstMemIndex != asm.NilRegister {
150 ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x + %s*0x%x]", instName, n.srcConst,
151 RegisterName(n.dstReg), n.dstConst, RegisterName(n.dstMemIndex), n.dstMemScale)
152 } else {
153 ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x]", instName, n.srcConst, RegisterName(n.dstReg), n.dstConst)
154 }
155 case operandTypesConstToRegister:
156 ret = fmt.Sprintf("%s 0x%x, %s", instName, n.srcConst, RegisterName(n.dstReg))
157 case operandTypesStaticConstToRegister:
158 ret = fmt.Sprintf("%s $%#x, %s", instName, n.staticConst.Raw, RegisterName(n.dstReg))
159 case operandTypesRegisterToStaticConst:
160 ret = fmt.Sprintf("%s %s, $%#x", instName, RegisterName(n.srcReg), n.staticConst.Raw)
161 }
162 return
163 }
164
165 type operandTypes byte
166
167 const (
168 operandTypesNoneToNone operandTypes = iota
169 operandTypesNoneToRegister
170 operandTypesNoneToMemory
171 operandTypesNoneToBranch
172 operandTypesRegisterToNone
173 operandTypesRegisterToRegister
174 operandTypesRegisterToMemory
175 operandTypesRegisterToConst
176 operandTypesMemoryToRegister
177 operandTypesMemoryToConst
178 operandTypesConstToRegister
179 operandTypesConstToMemory
180 operandTypesStaticConstToRegister
181 operandTypesRegisterToStaticConst
182 )
183
184
185 func (o operandTypes) String() (ret string) {
186 switch o {
187 case operandTypesNoneToNone:
188 ret = "NoneToNone"
189 case operandTypesNoneToRegister:
190 ret = "NoneToRegister"
191 case operandTypesNoneToMemory:
192 ret = "NoneToMemory"
193 case operandTypesNoneToBranch:
194 ret = "NoneToBranch"
195 case operandTypesRegisterToNone:
196 ret = "RegisterToNone"
197 case operandTypesRegisterToRegister:
198 ret = "RegisterToRegister"
199 case operandTypesRegisterToMemory:
200 ret = "RegisterToMemory"
201 case operandTypesRegisterToConst:
202 ret = "RegisterToConst"
203 case operandTypesMemoryToRegister:
204 ret = "MemoryToRegister"
205 case operandTypesMemoryToConst:
206 ret = "MemoryToConst"
207 case operandTypesConstToRegister:
208 ret = "ConstToRegister"
209 case operandTypesConstToMemory:
210 ret = "ConstToMemory"
211 case operandTypesStaticConstToRegister:
212 ret = "StaticConstToRegister"
213 case operandTypesRegisterToStaticConst:
214 ret = "RegisterToStaticConst"
215 }
216 return
217 }
218
219 type (
220
221 AssemblerImpl struct {
222 root *nodeImpl
223 current *nodeImpl
224 asm.BaseAssemblerImpl
225 readInstructionAddressNodes []*nodeImpl
226
227
228
229 staticConstReferrers []staticConstReferrer
230
231 nodePool nodePool
232 pool asm.StaticConstPool
233
234
235
236 MaxDisplacementForConstantPool int
237
238 forceReAssemble bool
239 }
240
241
242 staticConstReferrer struct {
243 n *nodeImpl
244
245 instLen int
246 }
247 )
248
249 func NewAssembler() *AssemblerImpl {
250 return &AssemblerImpl{
251 nodePool: nodePool{index: nodePageSize},
252 pool: asm.NewStaticConstPool(),
253 MaxDisplacementForConstantPool: defaultMaxDisplacementForConstantPool,
254 }
255 }
256
257 const nodePageSize = 128
258
259 type nodePage = [nodePageSize]nodeImpl
260
261
262
263 type nodePool struct {
264 pages []*nodePage
265 index int
266 }
267
268
269
270 func (n *nodePool) allocNode() *nodeImpl {
271 if n.index == nodePageSize {
272 if len(n.pages) == cap(n.pages) {
273 n.pages = append(n.pages, new(nodePage))
274 } else {
275 i := len(n.pages)
276 n.pages = n.pages[:i+1]
277 if n.pages[i] == nil {
278 n.pages[i] = new(nodePage)
279 }
280 }
281 n.index = 0
282 }
283 ret := &n.pages[len(n.pages)-1][n.index]
284 n.index++
285 return ret
286 }
287
288 func (n *nodePool) reset() {
289 for _, ns := range n.pages {
290 pages := ns[:]
291 for i := range pages {
292 pages[i] = nodeImpl{}
293 }
294 }
295 n.pages = n.pages[:0]
296 n.index = nodePageSize
297 }
298
299
300 func (a *AssemblerImpl) AllocateNOP() asm.Node {
301 n := a.nodePool.allocNode()
302 n.instruction = NOP
303 n.types = operandTypesNoneToNone
304 return n
305 }
306
307
308 func (a *AssemblerImpl) Add(n asm.Node) {
309 a.addNode(n.(*nodeImpl))
310 }
311
312
313 func (a *AssemblerImpl) Reset() {
314 pool := a.pool
315 pool.Reset()
316 *a = AssemblerImpl{
317 nodePool: a.nodePool,
318 pool: pool,
319 readInstructionAddressNodes: a.readInstructionAddressNodes[:0],
320 staticConstReferrers: a.staticConstReferrers[:0],
321 BaseAssemblerImpl: asm.BaseAssemblerImpl{
322 SetBranchTargetOnNextNodes: a.SetBranchTargetOnNextNodes[:0],
323 JumpTableEntries: a.JumpTableEntries[:0],
324 },
325 }
326 a.nodePool.reset()
327 }
328
329
330 func (a *AssemblerImpl) newNode(instruction asm.Instruction, types operandTypes) *nodeImpl {
331 n := a.nodePool.allocNode()
332 n.instruction = instruction
333 n.types = types
334 a.addNode(n)
335 return n
336 }
337
338
339 func (a *AssemblerImpl) addNode(node *nodeImpl) {
340 if a.root == nil {
341 a.root = node
342 a.current = node
343 } else {
344 parent := a.current
345 parent.next = node
346 node.prev = parent
347 a.current = node
348 }
349
350 for _, o := range a.SetBranchTargetOnNextNodes {
351 origin := o.(*nodeImpl)
352 origin.jumpTarget = node
353 }
354
355 a.SetBranchTargetOnNextNodes = a.SetBranchTargetOnNextNodes[:0]
356 }
357
358
359 func (a *AssemblerImpl) encodeNode(buf asm.Buffer, n *nodeImpl) (err error) {
360 switch n.types {
361 case operandTypesNoneToNone:
362 err = a.encodeNoneToNone(buf, n)
363 case operandTypesNoneToRegister:
364 err = a.encodeNoneToRegister(buf, n)
365 case operandTypesNoneToMemory:
366 err = a.encodeNoneToMemory(buf, n)
367 case operandTypesNoneToBranch:
368
369 err = a.encodeRelativeJump(buf, n)
370 case operandTypesRegisterToNone:
371 err = a.encodeRegisterToNone(buf, n)
372 case operandTypesRegisterToRegister:
373 err = a.encodeRegisterToRegister(buf, n)
374 case operandTypesRegisterToMemory:
375 err = a.encodeRegisterToMemory(buf, n)
376 case operandTypesRegisterToConst:
377 err = a.encodeRegisterToConst(buf, n)
378 case operandTypesMemoryToRegister:
379 err = a.encodeMemoryToRegister(buf, n)
380 case operandTypesMemoryToConst:
381 err = a.encodeMemoryToConst(buf, n)
382 case operandTypesConstToRegister:
383 err = a.encodeConstToRegister(buf, n)
384 case operandTypesConstToMemory:
385 err = a.encodeConstToMemory(buf, n)
386 case operandTypesStaticConstToRegister:
387 err = a.encodeStaticConstToRegister(buf, n)
388 case operandTypesRegisterToStaticConst:
389 err = a.encodeRegisterToStaticConst(buf, n)
390 default:
391 err = fmt.Errorf("encoder undefined for [%s] operand type", n.types)
392 }
393 if err != nil {
394 err = fmt.Errorf("%w: %s", err, n)
395 }
396 return
397 }
398
399
400 func (a *AssemblerImpl) Assemble(buf asm.Buffer) error {
401 a.initializeNodesForEncoding()
402
403
404
405 for {
406 err := a.encode(buf)
407 if err != nil {
408 return err
409 }
410
411 if !a.forceReAssemble {
412 break
413 } else {
414
415
416 buf.Reset()
417
418 a.forceReAssemble = false
419 }
420 }
421
422 code := buf.Bytes()
423 for _, n := range a.readInstructionAddressNodes {
424 if err := a.finalizeReadInstructionAddressNode(code, n); err != nil {
425 return err
426 }
427 }
428
429
430 for i := range a.staticConstReferrers {
431 ref := &a.staticConstReferrers[i]
432 n, instLen := ref.n, ref.instLen
433
434 displacement := int(n.staticConst.OffsetInBinary) - int(n.OffsetInBinary()) - instLen
435
436 displacementOffsetInInstruction := n.OffsetInBinary() + uint64(instLen-4)
437 binary.LittleEndian.PutUint32(code[displacementOffsetInInstruction:], uint32(int32(displacement)))
438 }
439
440 return a.FinalizeJumpTableEntry(code)
441 }
442
443
444
445 func (a *AssemblerImpl) initializeNodesForEncoding() {
446 for n := a.root; n != nil; n = n.next {
447 n.flag |= nodeFlagInitializedForEncoding
448 if target := n.jumpTarget; target != nil {
449 if target.isInitializedForEncoding() {
450
451 n.flag |= nodeFlagBackwardJump
452 } else {
453
454
455
456 n.flag |= nodeFlagShortForwardJump
457
458
459
460 if target.types == operandTypesNoneToBranch {
461
462 nop := a.nodePool.allocNode()
463 nop.instruction = NOP
464 nop.types = operandTypesNoneToNone
465
466 prev := target.prev
467 nop.prev = prev
468 prev.next = nop
469 nop.next = target
470 target.prev = nop
471 n.jumpTarget = nop
472 target = nop
473 }
474
475
476
477 n.forwardJumpOrigins = target.forwardJumpOrigins
478 target.forwardJumpOrigins = n
479 }
480 }
481 }
482 }
483
484 func (a *AssemblerImpl) encode(buf asm.Buffer) error {
485 for n := a.root; n != nil; n = n.next {
486
487
488
489
490
491
492
493
494
495
496
497 switch info := nopPaddingInfo[n.instruction]; {
498 case info.jmp:
499 if err := a.encodeJmpNOPPadding(buf, n); err != nil {
500 return err
501 }
502 case info.onNextJmp:
503 if err := a.encodeOnNextJmpNOPPAdding(buf, n); err != nil {
504 return err
505 }
506 }
507
508
509 n.offsetInBinary = uint64(buf.Len())
510
511 if err := a.encodeNode(buf, n); err != nil {
512 return err
513 }
514
515 if n.forwardJumpOrigins != nil {
516 if err := a.resolveForwardRelativeJumps(buf, n); err != nil {
517 return fmt.Errorf("invalid relative forward jumps: %w", err)
518 }
519 }
520
521 a.maybeFlushConstants(buf, n.next == nil)
522 }
523 return nil
524 }
525
526 var nopPaddingInfo = [instructionEnd]struct {
527 jmp, onNextJmp bool
528 }{
529 RET: {jmp: true},
530 JMP: {jmp: true},
531 JCC: {jmp: true},
532 JCS: {jmp: true},
533 JEQ: {jmp: true},
534 JGE: {jmp: true},
535 JGT: {jmp: true},
536 JHI: {jmp: true},
537 JLE: {jmp: true},
538 JLS: {jmp: true},
539 JLT: {jmp: true},
540 JMI: {jmp: true},
541 JNE: {jmp: true},
542 JPC: {jmp: true},
543 JPS: {jmp: true},
544
545 CMPL: {onNextJmp: true},
546 CMPQ: {onNextJmp: true},
547 TESTL: {onNextJmp: true},
548 TESTQ: {onNextJmp: true},
549 ADDL: {onNextJmp: true},
550 ADDQ: {onNextJmp: true},
551 SUBL: {onNextJmp: true},
552 SUBQ: {onNextJmp: true},
553 ANDL: {onNextJmp: true},
554 ANDQ: {onNextJmp: true},
555 INCQ: {onNextJmp: true},
556 DECQ: {onNextJmp: true},
557 }
558
559 func (a *AssemblerImpl) encodeJmpNOPPadding(buf asm.Buffer, n *nodeImpl) error {
560
561
562 prevLen := buf.Len()
563
564
565 n.offsetInBinary = uint64(prevLen)
566
567
568 if err := a.encodeNode(buf, n); err != nil {
569 return err
570 }
571 instructionLen := int32(buf.Len() - prevLen)
572
573
574 buf.Truncate(prevLen)
575 return a.encodeNOPPadding(buf, instructionLen)
576 }
577
578 func (a *AssemblerImpl) encodeOnNextJmpNOPPAdding(buf asm.Buffer, n *nodeImpl) error {
579 instructionLen, err := a.fusedInstructionLength(buf, n)
580 if err != nil {
581 return err
582 }
583 return a.encodeNOPPadding(buf, instructionLen)
584 }
585
586
587
588
589 func (a *AssemblerImpl) encodeNOPPadding(buf asm.Buffer, instructionLen int32) error {
590 const boundaryInBytes int32 = 32
591 const mask = boundaryInBytes - 1
592 var padNum int
593 currentPos := int32(buf.Len())
594 if used := currentPos & mask; used+instructionLen >= boundaryInBytes {
595 padNum = int(boundaryInBytes - used)
596 }
597 a.padNOP(buf, padNum)
598 return nil
599 }
600
601
602
603
604 func (a *AssemblerImpl) fusedInstructionLength(buf asm.Buffer, n *nodeImpl) (ret int32, err error) {
605
606 next := n.next
607 for ; next != nil && next.instruction == NOP; next = next.next {
608 }
609
610 if next == nil {
611 return
612 }
613
614 inst, jmpInst := n.instruction, next.instruction
615
616 if !nopPaddingInfo[jmpInst].jmp {
617
618 return
619 }
620
621
622
623
624 isTest := inst == TESTL || inst == TESTQ
625 isCmp := inst == CMPQ || inst == CMPL
626 isTestCmp := isTest || isCmp
627 if isTestCmp && (n.types == operandTypesMemoryToConst || n.types == operandTypesConstToMemory) {
628
629 return
630 }
631
632
633 isAnd := inst == ANDL || inst == ANDQ
634 if !isTest && !isAnd {
635 if jmpInst == JMI || jmpInst == JPL || jmpInst == JPS || jmpInst == JPC {
636
637 return
638 }
639 isAdd := inst == ADDL || inst == ADDQ
640 isSub := inst == SUBL || inst == SUBQ
641 if !isCmp && !isAdd && !isSub {
642 if jmpInst == JCS || jmpInst == JCC || jmpInst == JHI || jmpInst == JLS {
643
644 return
645 }
646 }
647 }
648
649
650
651
652 savedLen := uint64(buf.Len())
653
654
655 if err = a.encodeNode(buf, n); err != nil {
656 return
657 }
658 if err = a.encodeNode(buf, next); err != nil {
659 return
660 }
661
662 ret = int32(uint64(buf.Len()) - savedLen)
663
664
665 buf.Truncate(int(savedLen))
666 return
667 }
668
669
670
671 var nopOpcodes = [][11]byte{
672 {0x90},
673 {0x66, 0x90},
674 {0x0f, 0x1f, 0x00},
675 {0x0f, 0x1f, 0x40, 0x00},
676 {0x0f, 0x1f, 0x44, 0x00, 0x00},
677 {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
678 {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
679 {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
680 {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
681 {0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
682 {0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
683 }
684
685 func (a *AssemblerImpl) padNOP(buf asm.Buffer, num int) {
686 for num > 0 {
687 singleNopNum := num
688 if singleNopNum > len(nopOpcodes) {
689 singleNopNum = len(nopOpcodes)
690 }
691 buf.AppendBytes(nopOpcodes[singleNopNum-1][:singleNopNum])
692 num -= singleNopNum
693 }
694 }
695
696
697 func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node {
698 return a.newNode(instruction, operandTypesNoneToNone)
699 }
700
701
702 func (a *AssemblerImpl) CompileConstToRegister(
703 instruction asm.Instruction,
704 value asm.ConstantValue,
705 destinationReg asm.Register,
706 ) (inst asm.Node) {
707 n := a.newNode(instruction, operandTypesConstToRegister)
708 n.srcConst = value
709 n.dstReg = destinationReg
710 return n
711 }
712
713
714 func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) {
715 n := a.newNode(instruction, operandTypesRegisterToRegister)
716 n.srcReg = from
717 n.dstReg = to
718 }
719
720
721 func (a *AssemblerImpl) CompileMemoryToRegister(
722 instruction asm.Instruction,
723 sourceBaseReg asm.Register,
724 sourceOffsetConst asm.ConstantValue,
725 destinationReg asm.Register,
726 ) {
727 n := a.newNode(instruction, operandTypesMemoryToRegister)
728 n.srcReg = sourceBaseReg
729 n.srcConst = sourceOffsetConst
730 n.dstReg = destinationReg
731 }
732
733
734 func (a *AssemblerImpl) CompileRegisterToMemory(
735 instruction asm.Instruction,
736 sourceRegister, destinationBaseRegister asm.Register,
737 destinationOffsetConst asm.ConstantValue,
738 ) {
739 n := a.newNode(instruction, operandTypesRegisterToMemory)
740 n.srcReg = sourceRegister
741 n.dstReg = destinationBaseRegister
742 n.dstConst = destinationOffsetConst
743 }
744
745
746 func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node {
747 return a.newNode(jmpInstruction, operandTypesNoneToBranch)
748 }
749
750
751 func (a *AssemblerImpl) CompileJumpToMemory(
752 jmpInstruction asm.Instruction,
753 baseReg asm.Register,
754 offset asm.ConstantValue,
755 ) {
756 n := a.newNode(jmpInstruction, operandTypesNoneToMemory)
757 n.dstReg = baseReg
758 n.dstConst = offset
759 }
760
761
762 func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) {
763 n := a.newNode(jmpInstruction, operandTypesNoneToRegister)
764 n.dstReg = reg
765 }
766
767
768 func (a *AssemblerImpl) CompileReadInstructionAddress(
769 destinationRegister asm.Register,
770 beforeAcquisitionTargetInstruction asm.Instruction,
771 ) {
772 n := a.newNode(LEAQ, operandTypesMemoryToRegister)
773 n.dstReg = destinationRegister
774 n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction
775 }
776
777
778 func (a *AssemblerImpl) CompileRegisterToRegisterWithArg(
779 instruction asm.Instruction,
780 from, to asm.Register,
781 arg byte,
782 ) {
783 n := a.newNode(instruction, operandTypesRegisterToRegister)
784 n.srcReg = from
785 n.dstReg = to
786 n.arg = arg
787 }
788
789
790 func (a *AssemblerImpl) CompileMemoryWithIndexToRegister(
791 instruction asm.Instruction,
792 srcBaseReg asm.Register,
793 srcOffsetConst asm.ConstantValue,
794 srcIndex asm.Register,
795 srcScale int16,
796 dstReg asm.Register,
797 ) {
798 n := a.newNode(instruction, operandTypesMemoryToRegister)
799 n.srcReg = srcBaseReg
800 n.srcConst = srcOffsetConst
801 n.srcMemIndex = srcIndex
802 n.srcMemScale = byte(srcScale)
803 n.dstReg = dstReg
804 }
805
806
807 func (a *AssemblerImpl) CompileMemoryWithIndexAndArgToRegister(
808 instruction asm.Instruction,
809 srcBaseReg asm.Register,
810 srcOffsetConst asm.ConstantValue,
811 srcIndex asm.Register,
812 srcScale int16,
813 dstReg asm.Register,
814 arg byte,
815 ) {
816 n := a.newNode(instruction, operandTypesMemoryToRegister)
817 n.srcReg = srcBaseReg
818 n.srcConst = srcOffsetConst
819 n.srcMemIndex = srcIndex
820 n.srcMemScale = byte(srcScale)
821 n.dstReg = dstReg
822 n.arg = arg
823 }
824
825
826 func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex(
827 instruction asm.Instruction,
828 srcReg, dstBaseReg asm.Register,
829 dstOffsetConst asm.ConstantValue,
830 dstIndex asm.Register,
831 dstScale int16,
832 ) {
833 n := a.newNode(instruction, operandTypesRegisterToMemory)
834 n.srcReg = srcReg
835 n.dstReg = dstBaseReg
836 n.dstConst = dstOffsetConst
837 n.dstMemIndex = dstIndex
838 n.dstMemScale = byte(dstScale)
839 }
840
841
842 func (a *AssemblerImpl) CompileRegisterToMemoryWithIndexAndArg(
843 instruction asm.Instruction,
844 srcReg, dstBaseReg asm.Register,
845 dstOffsetConst asm.ConstantValue,
846 dstIndex asm.Register,
847 dstScale int16,
848 arg byte,
849 ) {
850 n := a.newNode(instruction, operandTypesRegisterToMemory)
851 n.srcReg = srcReg
852 n.dstReg = dstBaseReg
853 n.dstConst = dstOffsetConst
854 n.dstMemIndex = dstIndex
855 n.dstMemScale = byte(dstScale)
856 n.arg = arg
857 }
858
859
860 func (a *AssemblerImpl) CompileRegisterToConst(
861 instruction asm.Instruction,
862 srcRegister asm.Register,
863 value asm.ConstantValue,
864 ) asm.Node {
865 n := a.newNode(instruction, operandTypesRegisterToConst)
866 n.srcReg = srcRegister
867 n.dstConst = value
868 return n
869 }
870
871
872 func (a *AssemblerImpl) CompileRegisterToNone(instruction asm.Instruction, register asm.Register) {
873 n := a.newNode(instruction, operandTypesRegisterToNone)
874 n.srcReg = register
875 }
876
877
878 func (a *AssemblerImpl) CompileNoneToRegister(instruction asm.Instruction, register asm.Register) {
879 n := a.newNode(instruction, operandTypesNoneToRegister)
880 n.dstReg = register
881 }
882
883
884 func (a *AssemblerImpl) CompileNoneToMemory(
885 instruction asm.Instruction,
886 baseReg asm.Register,
887 offset asm.ConstantValue,
888 ) {
889 n := a.newNode(instruction, operandTypesNoneToMemory)
890 n.dstReg = baseReg
891 n.dstConst = offset
892 }
893
894
895 func (a *AssemblerImpl) CompileConstToMemory(
896 instruction asm.Instruction,
897 value asm.ConstantValue,
898 dstbaseReg asm.Register,
899 dstOffset asm.ConstantValue,
900 ) asm.Node {
901 n := a.newNode(instruction, operandTypesConstToMemory)
902 n.srcConst = value
903 n.dstReg = dstbaseReg
904 n.dstConst = dstOffset
905 return n
906 }
907
908
909 func (a *AssemblerImpl) CompileMemoryToConst(
910 instruction asm.Instruction,
911 srcBaseReg asm.Register,
912 srcOffset, value asm.ConstantValue,
913 ) asm.Node {
914 n := a.newNode(instruction, operandTypesMemoryToConst)
915 n.srcReg = srcBaseReg
916 n.srcConst = srcOffset
917 n.dstConst = value
918 return n
919 }
920
921 func errorEncodingUnsupported(n *nodeImpl) error {
922 return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.instruction), n.types)
923 }
924
925 func (a *AssemblerImpl) encodeNoneToNone(buf asm.Buffer, n *nodeImpl) (err error) {
926
927
928
929
930
931
932
933
934
935
936
937
938
939 base := buf.Len()
940 code := buf.Append(4)[:0]
941
942 switch n.instruction {
943 case CDQ:
944
945 code = append(code, 0x99)
946 case CQO:
947
948 code = append(code, rexPrefixW, 0x99)
949 case NOP:
950
951 case RET:
952
953 code = append(code, 0xc3)
954 case UD2:
955
956 code = append(code, 0x0f, 0x0b)
957 case REPMOVSQ:
958 code = append(code, 0xf3, rexPrefixW, 0xa5)
959 case REPSTOSQ:
960 code = append(code, 0xf3, rexPrefixW, 0xab)
961 case STD:
962 code = append(code, 0xfd)
963 case CLD:
964 code = append(code, 0xfc)
965 default:
966 err = errorEncodingUnsupported(n)
967 }
968
969 buf.Truncate(base + len(code))
970 return
971 }
972
973 func (a *AssemblerImpl) encodeNoneToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
974 regBits, prefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldRM)
975
976
977 modRM := 0b11_000_000 |
978 regBits
979 if n.instruction == JMP {
980
981
982 modRM |= 0b00_100_000
983 } else if n.instruction == NEGQ {
984 prefix |= rexPrefixW
985 modRM |= 0b00_011_000
986 } else if n.instruction == INCQ {
987 prefix |= rexPrefixW
988 } else if n.instruction == DECQ {
989 prefix |= rexPrefixW
990 modRM |= 0b00_001_000
991 } else {
992 if RegSP <= n.dstReg && n.dstReg <= RegDI {
993
994
995 prefix |= rexPrefixDefault
996 }
997 }
998
999 base := buf.Len()
1000 code := buf.Append(4)[:0]
1001
1002 if prefix != rexPrefixNone {
1003
1004 code = append(code, prefix)
1005 }
1006
1007 switch n.instruction {
1008 case JMP:
1009
1010 code = append(code, 0xff, modRM)
1011 case SETCC:
1012
1013 code = append(code, 0x0f, 0x93, modRM)
1014 case SETCS:
1015
1016 code = append(code, 0x0f, 0x92, modRM)
1017 case SETEQ:
1018
1019 code = append(code, 0x0f, 0x94, modRM)
1020 case SETGE:
1021
1022 code = append(code, 0x0f, 0x9d, modRM)
1023 case SETGT:
1024
1025 code = append(code, 0x0f, 0x9f, modRM)
1026 case SETHI:
1027
1028 code = append(code, 0x0f, 0x97, modRM)
1029 case SETLE:
1030
1031 code = append(code, 0x0f, 0x9e, modRM)
1032 case SETLS:
1033
1034 code = append(code, 0x0f, 0x96, modRM)
1035 case SETLT:
1036
1037 code = append(code, 0x0f, 0x9c, modRM)
1038 case SETNE:
1039
1040 code = append(code, 0x0f, 0x95, modRM)
1041 case SETPC:
1042
1043 code = append(code, 0x0f, 0x9b, modRM)
1044 case SETPS:
1045
1046 code = append(code, 0x0f, 0x9a, modRM)
1047 case NEGQ:
1048
1049 code = append(code, 0xf7, modRM)
1050 case INCQ:
1051
1052 code = append(code, 0xff, modRM)
1053 case DECQ:
1054
1055 code = append(code, 0xff, modRM)
1056 default:
1057 err = errorEncodingUnsupported(n)
1058 }
1059
1060 buf.Truncate(base + len(code))
1061 return
1062 }
1063
1064 func (a *AssemblerImpl) encodeNoneToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
1065 rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(true)
1066 if err != nil {
1067 return err
1068 }
1069
1070 var opcode byte
1071 switch n.instruction {
1072 case INCQ:
1073
1074 rexPrefix |= rexPrefixW
1075 opcode = 0xff
1076 case DECQ:
1077
1078 rexPrefix |= rexPrefixW
1079 modRM |= 0b00_001_000
1080 opcode = 0xff
1081 case JMP:
1082
1083 modRM |= 0b00_100_000
1084 opcode = 0xff
1085 default:
1086 return errorEncodingUnsupported(n)
1087 }
1088
1089 base := buf.Len()
1090 code := buf.Append(12)[:0]
1091
1092 if rexPrefix != rexPrefixNone {
1093 code = append(code, rexPrefix)
1094 }
1095
1096 code = append(code, opcode, modRM)
1097
1098 if sbiExist {
1099 code = append(code, sbi)
1100 }
1101
1102 if displacementWidth != 0 {
1103 code = appendConst(code, n.dstConst, displacementWidth)
1104 }
1105
1106 buf.Truncate(base + len(code))
1107 return
1108 }
1109
1110 type relativeJumpOpcode struct{ short, long []byte }
1111
1112 func (o relativeJumpOpcode) instructionLen(short bool) int64 {
1113 if short {
1114 return int64(len(o.short)) + 1
1115 } else {
1116 return int64(len(o.long)) + 4
1117 }
1118 }
1119
1120 var relativeJumpOpcodes = [...]relativeJumpOpcode{
1121
1122 JCC: {short: []byte{0x73}, long: []byte{0x0f, 0x83}},
1123 JCS: {short: []byte{0x72}, long: []byte{0x0f, 0x82}},
1124 JEQ: {short: []byte{0x74}, long: []byte{0x0f, 0x84}},
1125 JGE: {short: []byte{0x7d}, long: []byte{0x0f, 0x8d}},
1126 JGT: {short: []byte{0x7f}, long: []byte{0x0f, 0x8f}},
1127 JHI: {short: []byte{0x77}, long: []byte{0x0f, 0x87}},
1128 JLE: {short: []byte{0x7e}, long: []byte{0x0f, 0x8e}},
1129 JLS: {short: []byte{0x76}, long: []byte{0x0f, 0x86}},
1130 JLT: {short: []byte{0x7c}, long: []byte{0x0f, 0x8c}},
1131 JMI: {short: []byte{0x78}, long: []byte{0x0f, 0x88}},
1132 JPL: {short: []byte{0x79}, long: []byte{0x0f, 0x89}},
1133 JNE: {short: []byte{0x75}, long: []byte{0x0f, 0x85}},
1134 JPC: {short: []byte{0x7b}, long: []byte{0x0f, 0x8b}},
1135 JPS: {short: []byte{0x7a}, long: []byte{0x0f, 0x8a}},
1136
1137 JMP: {short: []byte{0xeb}, long: []byte{0xe9}},
1138 }
1139
1140 func (a *AssemblerImpl) resolveForwardRelativeJumps(buf asm.Buffer, target *nodeImpl) (err error) {
1141 offsetInBinary := int64(target.OffsetInBinary())
1142 origin := target.forwardJumpOrigins
1143 for ; origin != nil; origin = origin.forwardJumpOrigins {
1144 shortJump := origin.isForwardShortJump()
1145 op := relativeJumpOpcodes[origin.instruction]
1146 instructionLen := op.instructionLen(shortJump)
1147
1148
1149
1150 offset := offsetInBinary - (int64(origin.OffsetInBinary()) + instructionLen)
1151 if shortJump {
1152 if offset > math.MaxInt8 {
1153
1154 a.forceReAssemble = true
1155
1156
1157
1158 origin.flag ^= nodeFlagShortForwardJump
1159 } else {
1160 buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-1] = byte(offset)
1161 }
1162 } else {
1163 if offset > math.MaxInt32 {
1164 return fmt.Errorf("too large jump offset %d for encoding %s", offset, InstructionName(origin.instruction))
1165 }
1166 binary.LittleEndian.PutUint32(buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-4:], uint32(offset))
1167 }
1168 }
1169 return nil
1170 }
1171
1172 func (a *AssemblerImpl) encodeRelativeJump(buf asm.Buffer, n *nodeImpl) (err error) {
1173 if n.jumpTarget == nil {
1174 err = fmt.Errorf("jump target must not be nil for relative %s", InstructionName(n.instruction))
1175 return
1176 }
1177
1178 op := relativeJumpOpcodes[n.instruction]
1179 var isShortJump bool
1180
1181
1182 var offsetOfEIP int64 = 0
1183 if n.isBackwardJump() {
1184
1185 offsetOfJumpInstruction := int64(n.jumpTarget.OffsetInBinary()) - int64(n.OffsetInBinary())
1186 isShortJump = offsetOfJumpInstruction-2 >= math.MinInt8
1187 offsetOfEIP = offsetOfJumpInstruction - op.instructionLen(isShortJump)
1188 } else {
1189
1190 isShortJump = n.isForwardShortJump()
1191 }
1192
1193 if offsetOfEIP < math.MinInt32 {
1194 return fmt.Errorf("too large jump offset %d for encoding %s", offsetOfEIP, InstructionName(n.instruction))
1195 }
1196
1197 base := buf.Len()
1198 code := buf.Append(6)[:0]
1199
1200 if isShortJump {
1201 code = append(code, op.short...)
1202 code = append(code, byte(offsetOfEIP))
1203 } else {
1204 code = append(code, op.long...)
1205 code = appendUint32(code, uint32(offsetOfEIP))
1206 }
1207
1208 buf.Truncate(base + len(code))
1209 return
1210 }
1211
1212 func (a *AssemblerImpl) encodeRegisterToNone(buf asm.Buffer, n *nodeImpl) (err error) {
1213 regBits, prefix := register3bits(n.srcReg, registerSpecifierPositionModRMFieldRM)
1214
1215
1216 modRM := 0b11_000_000 |
1217 regBits
1218
1219 var opcode byte
1220 switch n.instruction {
1221 case DIVL:
1222
1223 modRM |= 0b00_110_000
1224 opcode = 0xf7
1225 case DIVQ:
1226
1227 prefix |= rexPrefixW
1228 modRM |= 0b00_110_000
1229 opcode = 0xf7
1230 case IDIVL:
1231
1232 modRM |= 0b00_111_000
1233 opcode = 0xf7
1234 case IDIVQ:
1235
1236 prefix |= rexPrefixW
1237 modRM |= 0b00_111_000
1238 opcode = 0xf7
1239 case MULL:
1240
1241 modRM |= 0b00_100_000
1242 opcode = 0xf7
1243 case MULQ:
1244
1245 prefix |= rexPrefixW
1246 modRM |= 0b00_100_000
1247 opcode = 0xf7
1248 default:
1249 err = errorEncodingUnsupported(n)
1250 }
1251
1252 base := buf.Len()
1253 code := buf.Append(3)[:0]
1254
1255 if prefix != rexPrefixNone {
1256 code = append(code, prefix)
1257 }
1258
1259 code = append(code, opcode, modRM)
1260
1261 buf.Truncate(base + len(code))
1262 return
1263 }
1264
1265 var registerToRegisterOpcode = [instructionEnd]*struct {
1266 opcode []byte
1267 rPrefix rexPrefix
1268 mandatoryPrefix byte
1269 srcOnModRMReg bool
1270 isSrc8bit bool
1271 needArg bool
1272 }{
1273
1274 ADDL: {opcode: []byte{0x1}, srcOnModRMReg: true},
1275 ADDQ: {opcode: []byte{0x1}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1276
1277 ANDL: {opcode: []byte{0x21}, srcOnModRMReg: true},
1278 ANDQ: {opcode: []byte{0x21}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1279
1280 CMPL: {opcode: []byte{0x39}},
1281 CMPQ: {opcode: []byte{0x39}, rPrefix: rexPrefixW},
1282
1283 CMOVQCS: {opcode: []byte{0x0f, 0x42}, rPrefix: rexPrefixW},
1284
1285 ADDSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x58}},
1286
1287 ADDSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x58}},
1288
1289 ANDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x54}},
1290
1291 ANDPS: {opcode: []byte{0x0f, 0x54}},
1292
1293 BSRL: {opcode: []byte{0xf, 0xbd}},
1294 BSRQ: {opcode: []byte{0xf, 0xbd}, rPrefix: rexPrefixW},
1295
1296 COMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2f}},
1297
1298 COMISS: {opcode: []byte{0x0f, 0x2f}},
1299
1300 CVTSD2SS: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5a}},
1301
1302 CVTSL2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}},
1303
1304 CVTSQ2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}, rPrefix: rexPrefixW},
1305
1306 CVTSL2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}},
1307
1308 CVTSQ2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}, rPrefix: rexPrefixW},
1309
1310 CVTSS2SD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5a}},
1311
1312 CVTTSD2SL: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}},
1313 CVTTSD2SQ: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}, rPrefix: rexPrefixW},
1314
1315 CVTTSS2SL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}},
1316 CVTTSS2SQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}, rPrefix: rexPrefixW},
1317
1318 DIVSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5e}},
1319
1320 DIVSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5e}},
1321
1322 LZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}},
1323 LZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}, rPrefix: rexPrefixW},
1324
1325 MAXSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5f}},
1326
1327 MAXSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5f}},
1328
1329 MINSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5d}},
1330
1331 MINSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5d}},
1332
1333 MOVBLSX: {opcode: []byte{0x0f, 0xbe}, isSrc8bit: true},
1334
1335 MOVBLZX: {opcode: []byte{0x0f, 0xb6}, isSrc8bit: true},
1336
1337 MOVWLZX: {opcode: []byte{0x0f, 0xb7}, isSrc8bit: true},
1338
1339 MOVBQSX: {opcode: []byte{0x0f, 0xbe}, rPrefix: rexPrefixW, isSrc8bit: true},
1340
1341 MOVLQSX: {opcode: []byte{0x63}, rPrefix: rexPrefixW},
1342
1343 MOVWQSX: {opcode: []byte{0x0f, 0xbf}, rPrefix: rexPrefixW},
1344
1345 MOVWLSX: {opcode: []byte{0x0f, 0xbf}},
1346
1347 IMULQ: {opcode: []byte{0x0f, 0xaf}, rPrefix: rexPrefixW},
1348
1349 MULSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x59}},
1350
1351 MULSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x59}},
1352
1353 ORL: {opcode: []byte{0x09}, srcOnModRMReg: true},
1354 ORQ: {opcode: []byte{0x09}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1355
1356 ORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x56}},
1357
1358 ORPS: {opcode: []byte{0x0f, 0x56}},
1359
1360 POPCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}},
1361 POPCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}, rPrefix: rexPrefixW},
1362
1363 ROUNDSS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0a}, needArg: true},
1364
1365 ROUNDSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0b}, needArg: true},
1366
1367 SQRTSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x51}},
1368
1369 SQRTSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x51}},
1370
1371 SUBL: {opcode: []byte{0x29}, srcOnModRMReg: true},
1372 SUBQ: {opcode: []byte{0x29}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1373
1374 SUBSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5c}},
1375
1376 SUBSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5c}},
1377
1378 TESTL: {opcode: []byte{0x85}, srcOnModRMReg: true},
1379 TESTQ: {opcode: []byte{0x85}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1380
1381 TZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}},
1382 TZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}, rPrefix: rexPrefixW},
1383
1384 UCOMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2e}},
1385
1386 UCOMISS: {opcode: []byte{0x0f, 0x2e}},
1387
1388 XCHGQ: {opcode: []byte{0x87}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1389
1390 XORL: {opcode: []byte{0x31}, srcOnModRMReg: true},
1391 XORQ: {opcode: []byte{0x31}, rPrefix: rexPrefixW, srcOnModRMReg: true},
1392
1393 XORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x57}},
1394 XORPS: {opcode: []byte{0x0f, 0x57}},
1395
1396 PINSRB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x20}, needArg: true},
1397
1398 PINSRW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc4}, needArg: true},
1399
1400 PINSRD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x22}, needArg: true},
1401
1402 PINSRQ: {mandatoryPrefix: 0x66, rPrefix: rexPrefixW, opcode: []byte{0x0f, 0x3a, 0x22}, needArg: true},
1403
1404 MOVDQU: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x6f}},
1405
1406 MOVDQA: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x6f}},
1407
1408 PADDB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfc}},
1409 PADDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfd}},
1410 PADDD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfe}},
1411 PADDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd4}},
1412
1413 PSUBB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf8}},
1414 PSUBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf9}},
1415 PSUBD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfa}},
1416
1417 PSUBQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfb}},
1418
1419 ADDPS: {opcode: []byte{0x0f, 0x58}},
1420
1421 ADDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x58}},
1422
1423 SUBPS: {opcode: []byte{0x0f, 0x5c}},
1424
1425 SUBPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5c}},
1426
1427 PXOR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xef}},
1428
1429 PAND: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdb}},
1430
1431 POR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xeb}},
1432
1433 PANDN: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdf}},
1434
1435 PSHUFB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x0}},
1436
1437 PSHUFD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x70}, needArg: true},
1438
1439 PEXTRB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x14}, needArg: true, srcOnModRMReg: true},
1440
1441 PEXTRW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc5}, needArg: true},
1442
1443 PEXTRD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x16}, needArg: true, srcOnModRMReg: true},
1444
1445 PEXTRQ: {rPrefix: rexPrefixW, mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x16}, needArg: true, srcOnModRMReg: true},
1446
1447 INSERTPS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x21}, needArg: true},
1448
1449 MOVLHPS: {opcode: []byte{0x0f, 0x16}},
1450
1451 PTEST: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x17}},
1452
1453 PCMPEQB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x74}},
1454 PCMPEQW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x75}},
1455 PCMPEQD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x76}},
1456
1457 PCMPEQQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x29}},
1458
1459 PADDUSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdc}},
1460
1461 MOVSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x10}},
1462
1463 PACKSSWB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x63}},
1464
1465 PMOVMSKB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd7}},
1466
1467 MOVMSKPS: {opcode: []byte{0x0f, 0x50}},
1468
1469 MOVMSKPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x50}},
1470
1471 PSRAD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe2}},
1472
1473 PSRAW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe1}},
1474
1475 PSRLQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd3}},
1476
1477 PSRLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd2}},
1478
1479 PSRLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd1}},
1480
1481 PSLLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf1}},
1482
1483 PSLLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf2}},
1484
1485 PSLLQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf3}},
1486
1487 PUNPCKLBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x60}},
1488
1489 PUNPCKHBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x68}},
1490
1491 CMPPS: {opcode: []byte{0x0f, 0xc2}, needArg: true},
1492
1493 CMPPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc2}, needArg: true},
1494
1495 PCMPGTQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x37}},
1496
1497 PCMPGTD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x66}},
1498
1499 PCMPGTW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x65}},
1500
1501 PCMPGTB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x64}},
1502
1503 PMINSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x39}},
1504
1505 PMAXSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3d}},
1506
1507 PMAXSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xee}},
1508
1509 PMAXSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3c}},
1510
1511 PMINSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xea}},
1512
1513 PMINSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x38}},
1514
1515 PMINUD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3b}},
1516
1517 PMINUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3a}},
1518
1519 PMINUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xda}},
1520
1521 PMAXUD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3f}},
1522
1523 PMAXUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3e}},
1524
1525 PMAXUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xde}},
1526
1527 PMULLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd5}},
1528
1529 PMULLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x40}},
1530
1531 PMULUDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf4}},
1532
1533 PSUBSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe8}},
1534
1535 PSUBSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe9}},
1536
1537 PSUBUSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd8}},
1538
1539 PSUBUSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd9}},
1540
1541 PADDSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xed}},
1542
1543 PADDSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xec}},
1544
1545 PADDUSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdd}},
1546
1547 PAVGB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe0}},
1548
1549 PAVGW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe3}},
1550
1551 PABSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1c}},
1552
1553 PABSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1d}},
1554
1555 PABSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1e}},
1556
1557 BLENDVPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x15}},
1558
1559 MAXPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5f}},
1560
1561 MAXPS: {opcode: []byte{0x0f, 0x5f}},
1562
1563 MINPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5d}},
1564
1565 MINPS: {opcode: []byte{0x0f, 0x5d}},
1566
1567 ANDNPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x55}},
1568
1569 ANDNPS: {opcode: []byte{0x0f, 0x55}},
1570
1571 MULPS: {opcode: []byte{0x0f, 0x59}},
1572
1573 MULPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x59}},
1574
1575 DIVPS: {opcode: []byte{0x0f, 0x5e}},
1576
1577 DIVPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5e}},
1578
1579 SQRTPS: {opcode: []byte{0x0f, 0x51}},
1580
1581 SQRTPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x51}},
1582
1583 ROUNDPS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x08}, needArg: true},
1584
1585 ROUNDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x09}, needArg: true},
1586
1587 PALIGNR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0f}, needArg: true},
1588
1589 PUNPCKLWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x61}},
1590
1591 PUNPCKHWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x69}},
1592
1593 PMULHUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe4}},
1594
1595 PMULDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x28}},
1596
1597 PMULHRSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x0b}},
1598
1599 PMOVSXBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x20}},
1600
1601 PMOVSXWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x23}},
1602
1603 PMOVSXDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x25}},
1604
1605 PMOVZXBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x30}},
1606
1607 PMOVZXWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x33}},
1608
1609 PMOVZXDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x35}},
1610
1611 PMULHW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe5}},
1612
1613 CMPEQPS: {opcode: []byte{0x0f, 0xc2}, needArg: true},
1614
1615 CMPEQPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc2}, needArg: true},
1616
1617 CVTTPS2DQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5b}},
1618
1619 CVTDQ2PS: {opcode: []byte{0x0f, 0x5b}},
1620
1621 CVTDQ2PD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xe6}},
1622
1623 CVTPD2PS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5a}},
1624
1625 CVTPS2PD: {opcode: []byte{0x0f, 0x5a}},
1626
1627 MOVUPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x10}},
1628
1629 SHUFPS: {opcode: []byte{0x0f, 0xc6}, needArg: true},
1630
1631 PMADDWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf5}},
1632
1633 UNPCKLPS: {opcode: []byte{0x0f, 0x14}},
1634
1635 PACKUSWB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x67}},
1636
1637 PACKSSDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x6b}},
1638
1639 PACKUSDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x2b}},
1640
1641 PMADDUBSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x04}},
1642
1643 CVTTPD2DQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe6}},
1644 }
1645
1646 var registerToRegisterShiftOpcode = [instructionEnd]*struct {
1647 opcode []byte
1648 rPrefix rexPrefix
1649 modRMExtension byte
1650 }{
1651
1652 ROLL: {opcode: []byte{0xd3}},
1653 ROLQ: {opcode: []byte{0xd3}, rPrefix: rexPrefixW},
1654 RORL: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000},
1655 RORQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000, rPrefix: rexPrefixW},
1656
1657 SARL: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000},
1658 SARQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000, rPrefix: rexPrefixW},
1659 SHLL: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000},
1660 SHLQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000, rPrefix: rexPrefixW},
1661 SHRL: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000},
1662 SHRQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000, rPrefix: rexPrefixW},
1663 }
1664
1665 func (a *AssemblerImpl) encodeRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
1666
1667 inst := n.instruction
1668 base := buf.Len()
1669 code := buf.Append(8)[:0]
1670
1671 switch inst {
1672 case MOVL, MOVQ:
1673 var (
1674 opcode []byte
1675 mandatoryPrefix byte
1676 srcOnModRMReg bool
1677 rPrefix rexPrefix
1678 )
1679 srcIsFloat, dstIsFloat := isVectorRegister(n.srcReg), isVectorRegister(n.dstReg)
1680 f2f := srcIsFloat && dstIsFloat
1681 if f2f {
1682
1683 opcode, mandatoryPrefix = []byte{0x0f, 0x7e}, 0xf3
1684 } else if srcIsFloat && !dstIsFloat {
1685
1686 opcode, mandatoryPrefix, srcOnModRMReg = []byte{0x0f, 0x7e}, 0x66, true
1687 } else if !srcIsFloat && dstIsFloat {
1688
1689 opcode, mandatoryPrefix, srcOnModRMReg = []byte{0x0f, 0x6e}, 0x66, false
1690 } else {
1691
1692 opcode, srcOnModRMReg = []byte{0x89}, true
1693 }
1694
1695 rexPrefix, modRM, err := n.getRegisterToRegisterModRM(srcOnModRMReg)
1696 if err != nil {
1697 return err
1698 }
1699 rexPrefix |= rPrefix
1700
1701 if inst == MOVQ && !f2f {
1702 rexPrefix |= rexPrefixW
1703 }
1704 if mandatoryPrefix != 0 {
1705 code = append(code, mandatoryPrefix)
1706 }
1707 if rexPrefix != rexPrefixNone {
1708 code = append(code, rexPrefix)
1709 }
1710 code = append(code, opcode...)
1711 code = append(code, modRM)
1712 buf.Truncate(base + len(code))
1713 return nil
1714 }
1715
1716 if op := registerToRegisterOpcode[inst]; op != nil {
1717 rexPrefix, modRM, err := n.getRegisterToRegisterModRM(op.srcOnModRMReg)
1718 if err != nil {
1719 return err
1720 }
1721 rexPrefix |= op.rPrefix
1722
1723 if op.isSrc8bit && RegSP <= n.srcReg && n.srcReg <= RegDI {
1724
1725
1726 rexPrefix |= rexPrefixDefault
1727 }
1728
1729 if op.mandatoryPrefix != 0 {
1730 code = append(code, op.mandatoryPrefix)
1731 }
1732
1733 if rexPrefix != rexPrefixNone {
1734 code = append(code, rexPrefix)
1735 }
1736 code = append(code, op.opcode...)
1737 code = append(code, modRM)
1738
1739 if op.needArg {
1740 code = append(code, n.arg)
1741 }
1742 } else if op := registerToRegisterShiftOpcode[inst]; op != nil {
1743 reg3bits, rexPrefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldRM)
1744 rexPrefix |= op.rPrefix
1745 if rexPrefix != rexPrefixNone {
1746 code = append(code, rexPrefix)
1747 }
1748
1749
1750 modRM := 0b11_000_000 |
1751 (op.modRMExtension) |
1752 reg3bits
1753 code = append(code, op.opcode...)
1754 code = append(code, modRM)
1755 } else {
1756 return errorEncodingUnsupported(n)
1757 }
1758
1759 buf.Truncate(base + len(code))
1760 return nil
1761 }
1762
1763 func (a *AssemblerImpl) encodeRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
1764 rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(true)
1765 if err != nil {
1766 return err
1767 }
1768
1769 var opcode []byte
1770 var mandatoryPrefix byte
1771 var isShiftInstruction bool
1772 var needArg bool
1773 switch n.instruction {
1774 case CMPL:
1775
1776 opcode = []byte{0x3b}
1777 case CMPQ:
1778
1779 rexPrefix |= rexPrefixW
1780 opcode = []byte{0x3b}
1781 case MOVB:
1782
1783 opcode = []byte{0x88}
1784
1785 if n.srcReg >= RegSP && n.srcReg <= RegDI {
1786 rexPrefix |= rexPrefixDefault
1787 }
1788 case MOVL:
1789 if isVectorRegister(n.srcReg) {
1790
1791 opcode = []byte{0x0f, 0x7e}
1792 mandatoryPrefix = 0x66
1793 } else {
1794
1795 opcode = []byte{0x89}
1796 }
1797 case MOVQ:
1798 if isVectorRegister(n.srcReg) {
1799
1800 opcode = []byte{0x0f, 0xd6}
1801 mandatoryPrefix = 0x66
1802 } else {
1803
1804 rexPrefix |= rexPrefixW
1805 opcode = []byte{0x89}
1806 }
1807 case MOVW:
1808
1809
1810
1811 mandatoryPrefix = 0x66
1812 opcode = []byte{0x89}
1813 case SARL:
1814
1815 modRM |= 0b00_111_000
1816 opcode = []byte{0xd3}
1817 isShiftInstruction = true
1818 case SARQ:
1819
1820 rexPrefix |= rexPrefixW
1821 modRM |= 0b00_111_000
1822 opcode = []byte{0xd3}
1823 isShiftInstruction = true
1824 case SHLL:
1825
1826 modRM |= 0b00_100_000
1827 opcode = []byte{0xd3}
1828 isShiftInstruction = true
1829 case SHLQ:
1830
1831 rexPrefix |= rexPrefixW
1832 modRM |= 0b00_100_000
1833 opcode = []byte{0xd3}
1834 isShiftInstruction = true
1835 case SHRL:
1836
1837 modRM |= 0b00_101_000
1838 opcode = []byte{0xd3}
1839 isShiftInstruction = true
1840 case SHRQ:
1841
1842 rexPrefix |= rexPrefixW
1843 modRM |= 0b00_101_000
1844 opcode = []byte{0xd3}
1845 isShiftInstruction = true
1846 case ROLL:
1847
1848 opcode = []byte{0xd3}
1849 isShiftInstruction = true
1850 case ROLQ:
1851
1852 rexPrefix |= rexPrefixW
1853 opcode = []byte{0xd3}
1854 isShiftInstruction = true
1855 case RORL:
1856
1857 modRM |= 0b00_001_000
1858 opcode = []byte{0xd3}
1859 isShiftInstruction = true
1860 case RORQ:
1861
1862 rexPrefix |= rexPrefixW
1863 opcode = []byte{0xd3}
1864 modRM |= 0b00_001_000
1865 isShiftInstruction = true
1866 case MOVDQU:
1867
1868 mandatoryPrefix = 0xf3
1869 opcode = []byte{0x0f, 0x7f}
1870 case PEXTRB:
1871 mandatoryPrefix = 0x66
1872 opcode = []byte{0x0f, 0x3a, 0x14}
1873 needArg = true
1874 case PEXTRW:
1875 mandatoryPrefix = 0x66
1876 opcode = []byte{0x0f, 0x3a, 0x15}
1877 needArg = true
1878 case PEXTRD:
1879 mandatoryPrefix = 0x66
1880 opcode = []byte{0x0f, 0x3a, 0x16}
1881 needArg = true
1882 case PEXTRQ:
1883 mandatoryPrefix = 0x66
1884 rexPrefix |= rexPrefixW
1885 opcode = []byte{0x0f, 0x3a, 0x16}
1886 needArg = true
1887 default:
1888 return errorEncodingUnsupported(n)
1889 }
1890
1891 if !isShiftInstruction {
1892 srcReg3Bits, prefix := register3bits(n.srcReg, registerSpecifierPositionModRMFieldReg)
1893
1894 rexPrefix |= prefix
1895 modRM |= srcReg3Bits << 3
1896 } else {
1897 if n.srcReg != RegCX {
1898 return fmt.Errorf("shifting instruction %s require CX register as src but got %s", InstructionName(n.instruction), RegisterName(n.srcReg))
1899 }
1900 }
1901
1902 base := buf.Len()
1903 code := buf.Append(16)[:0]
1904
1905 if mandatoryPrefix != 0 {
1906
1907 code = append(code, mandatoryPrefix)
1908 }
1909
1910 if rexPrefix != rexPrefixNone {
1911 code = append(code, rexPrefix)
1912 }
1913
1914 code = append(code, opcode...)
1915 code = append(code, modRM)
1916
1917 if sbiExist {
1918 code = append(code, sbi)
1919 }
1920
1921 if displacementWidth != 0 {
1922 code = appendConst(code, n.dstConst, displacementWidth)
1923 }
1924
1925 if needArg {
1926 code = append(code, n.arg)
1927 }
1928
1929 buf.Truncate(base + len(code))
1930 return
1931 }
1932
1933 func (a *AssemblerImpl) encodeRegisterToConst(buf asm.Buffer, n *nodeImpl) (err error) {
1934 regBits, prefix := register3bits(n.srcReg, registerSpecifierPositionModRMFieldRM)
1935
1936 base := buf.Len()
1937 code := buf.Append(10)[:0]
1938
1939 switch n.instruction {
1940 case CMPL, CMPQ:
1941 if n.instruction == CMPQ {
1942 prefix |= rexPrefixW
1943 }
1944 if prefix != rexPrefixNone {
1945 code = append(code, prefix)
1946 }
1947 is8bitConst := fitInSigned8bit(n.dstConst)
1948
1949 if n.srcReg == RegAX && !is8bitConst {
1950 code = append(code, 0x3d)
1951 } else {
1952
1953 modRM := 0b11_000_000 |
1954 0b00_111_000 |
1955 regBits
1956 if is8bitConst {
1957 code = append(code, 0x83, modRM)
1958 } else {
1959 code = append(code, 0x81, modRM)
1960 }
1961 }
1962 default:
1963 err = errorEncodingUnsupported(n)
1964 }
1965
1966 if fitInSigned8bit(n.dstConst) {
1967 code = append(code, byte(n.dstConst))
1968 } else {
1969 code = appendUint32(code, uint32(n.dstConst))
1970 }
1971
1972 buf.Truncate(base + len(code))
1973 return
1974 }
1975
1976 func (a *AssemblerImpl) finalizeReadInstructionAddressNode(code []byte, n *nodeImpl) (err error) {
1977
1978 targetNode := n
1979 for ; targetNode != nil; targetNode = targetNode.next {
1980 if targetNode.instruction == n.readInstructionAddressBeforeTargetInstruction {
1981 targetNode = targetNode.next
1982 break
1983 }
1984 }
1985
1986 if targetNode == nil {
1987 return errors.New("BUG: target instruction not found for read instruction address")
1988 }
1989
1990 offset := targetNode.OffsetInBinary() - (n.OffsetInBinary() + 7 )
1991 if offset >= math.MaxInt32 {
1992 return errors.New("BUG: too large offset for LEAQ instruction")
1993 }
1994
1995 binary.LittleEndian.PutUint32(code[n.OffsetInBinary()+3:], uint32(int32(offset)))
1996 return nil
1997 }
1998
1999 func (a *AssemblerImpl) encodeReadInstructionAddress(buf asm.Buffer, n *nodeImpl) error {
2000 dstReg3Bits, rexPrefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldReg)
2001
2002 a.readInstructionAddressNodes = append(a.readInstructionAddressNodes, n)
2003
2004
2005 opcode := byte(0x8d)
2006 rexPrefix |= rexPrefixW
2007
2008
2009 modRM := 0b00_000_101 |
2010 (dstReg3Bits << 3)
2011
2012 code := buf.Append(7)
2013 code[0] = rexPrefix
2014 code[1] = opcode
2015 code[2] = modRM
2016 binary.LittleEndian.PutUint32(code[3:], 0)
2017 return nil
2018 }
2019
2020 func (a *AssemblerImpl) encodeMemoryToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
2021 if n.instruction == LEAQ && n.readInstructionAddressBeforeTargetInstruction != NONE {
2022 return a.encodeReadInstructionAddress(buf, n)
2023 }
2024
2025 rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(false)
2026 if err != nil {
2027 return err
2028 }
2029
2030 dstReg3Bits, prefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldReg)
2031 rexPrefix |= prefix
2032 modRM |= dstReg3Bits << 3
2033
2034 var mandatoryPrefix byte
2035 var opcode []byte
2036 var needArg bool
2037
2038 switch n.instruction {
2039 case ADDL:
2040
2041 opcode = []byte{0x03}
2042 case ADDQ:
2043
2044 rexPrefix |= rexPrefixW
2045 opcode = []byte{0x03}
2046 case CMPL:
2047
2048 opcode = []byte{0x39}
2049 case CMPQ:
2050
2051 rexPrefix |= rexPrefixW
2052 opcode = []byte{0x39}
2053 case LEAQ:
2054
2055 rexPrefix |= rexPrefixW
2056 opcode = []byte{0x8d}
2057 case MOVBLSX:
2058
2059 opcode = []byte{0x0f, 0xbe}
2060 case MOVBLZX:
2061
2062 opcode = []byte{0x0f, 0xb6}
2063 case MOVBQSX:
2064
2065 rexPrefix |= rexPrefixW
2066 opcode = []byte{0x0f, 0xbe}
2067 case MOVBQZX:
2068
2069 rexPrefix |= rexPrefixW
2070 opcode = []byte{0x0f, 0xb6}
2071 case MOVLQSX:
2072
2073 rexPrefix |= rexPrefixW
2074 opcode = []byte{0x63}
2075 case MOVLQZX:
2076
2077
2078
2079 opcode = []byte{0x8B}
2080 case MOVL:
2081
2082
2083
2084 if isVectorRegister(n.dstReg) {
2085
2086 opcode = []byte{0x0f, 0x6e}
2087 mandatoryPrefix = 0x66
2088 } else {
2089
2090 opcode = []byte{0x8B}
2091 }
2092 case MOVQ:
2093 if isVectorRegister(n.dstReg) {
2094
2095 opcode = []byte{0x0f, 0x7e}
2096 mandatoryPrefix = 0xf3
2097 } else {
2098
2099 rexPrefix |= rexPrefixW
2100 opcode = []byte{0x8B}
2101 }
2102 case MOVWLSX:
2103
2104 opcode = []byte{0x0f, 0xbf}
2105 case MOVWLZX:
2106
2107 opcode = []byte{0x0f, 0xb7}
2108 case MOVWQSX:
2109
2110 rexPrefix |= rexPrefixW
2111 opcode = []byte{0x0f, 0xbf}
2112 case MOVWQZX:
2113
2114 rexPrefix |= rexPrefixW
2115 opcode = []byte{0x0f, 0xb7}
2116 case SUBQ:
2117
2118 rexPrefix |= rexPrefixW
2119 opcode = []byte{0x2b}
2120 case SUBSD:
2121
2122 opcode = []byte{0x0f, 0x5c}
2123 mandatoryPrefix = 0xf2
2124 case SUBSS:
2125
2126 opcode = []byte{0x0f, 0x5c}
2127 mandatoryPrefix = 0xf3
2128 case UCOMISD:
2129
2130 opcode = []byte{0x0f, 0x2e}
2131 mandatoryPrefix = 0x66
2132 case UCOMISS:
2133
2134 opcode = []byte{0x0f, 0x2e}
2135 case MOVDQU:
2136
2137 mandatoryPrefix = 0xf3
2138 opcode = []byte{0x0f, 0x6f}
2139 case PMOVSXBW:
2140 mandatoryPrefix = 0x66
2141 opcode = []byte{0x0f, 0x38, 0x20}
2142 case PMOVSXWD:
2143 mandatoryPrefix = 0x66
2144 opcode = []byte{0x0f, 0x38, 0x23}
2145 case PMOVSXDQ:
2146 mandatoryPrefix = 0x66
2147 opcode = []byte{0x0f, 0x38, 0x25}
2148 case PMOVZXBW:
2149 mandatoryPrefix = 0x66
2150 opcode = []byte{0x0f, 0x38, 0x30}
2151 case PMOVZXWD:
2152 mandatoryPrefix = 0x66
2153 opcode = []byte{0x0f, 0x38, 0x33}
2154 case PMOVZXDQ:
2155 mandatoryPrefix = 0x66
2156 opcode = []byte{0x0f, 0x38, 0x35}
2157 case PINSRB:
2158 mandatoryPrefix = 0x66
2159 opcode = []byte{0x0f, 0x3a, 0x20}
2160 needArg = true
2161 case PINSRW:
2162 mandatoryPrefix = 0x66
2163 opcode = []byte{0x0f, 0xc4}
2164 needArg = true
2165 case PINSRD:
2166 mandatoryPrefix = 0x66
2167 opcode = []byte{0x0f, 0x3a, 0x22}
2168 needArg = true
2169 case PINSRQ:
2170 rexPrefix |= rexPrefixW
2171 mandatoryPrefix = 0x66
2172 opcode = []byte{0x0f, 0x3a, 0x22}
2173 needArg = true
2174 default:
2175 return errorEncodingUnsupported(n)
2176 }
2177
2178 base := buf.Len()
2179 code := buf.Append(16)[:0]
2180
2181 if mandatoryPrefix != 0 {
2182
2183 code = append(code, mandatoryPrefix)
2184 }
2185
2186 if rexPrefix != rexPrefixNone {
2187 code = append(code, rexPrefix)
2188 }
2189
2190 code = append(code, opcode...)
2191 code = append(code, modRM)
2192
2193 if sbiExist {
2194 code = append(code, sbi)
2195 }
2196
2197 if displacementWidth != 0 {
2198 code = appendConst(code, n.srcConst, displacementWidth)
2199 }
2200
2201 if needArg {
2202 code = append(code, n.arg)
2203 }
2204
2205 buf.Truncate(base + len(code))
2206 return
2207 }
2208
2209 func (a *AssemblerImpl) encodeConstToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
2210 regBits, rexPrefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldRM)
2211
2212 isFloatReg := isVectorRegister(n.dstReg)
2213 switch n.instruction {
2214 case PSLLD, PSLLQ, PSRLD, PSRLQ, PSRAW, PSRLW, PSLLW, PSRAD:
2215 if !isFloatReg {
2216 return fmt.Errorf("%s needs float register but got %s", InstructionName(n.instruction), RegisterName(n.dstReg))
2217 }
2218 default:
2219 if isFloatReg {
2220 return fmt.Errorf("%s needs int register but got %s", InstructionName(n.instruction), RegisterName(n.dstReg))
2221 }
2222 }
2223
2224 if n.instruction != MOVQ && !fitIn32bit(n.srcConst) {
2225 return fmt.Errorf("constant must fit in 32-bit integer for %s, but got %d", InstructionName(n.instruction), n.srcConst)
2226 } else if (n.instruction == SHLQ || n.instruction == SHRQ) && (n.srcConst < 0 || n.srcConst > math.MaxUint8) {
2227 return fmt.Errorf("constant must fit in positive 8-bit integer for %s, but got %d", InstructionName(n.instruction), n.srcConst)
2228 } else if (n.instruction == PSLLD ||
2229 n.instruction == PSLLQ ||
2230 n.instruction == PSRLD ||
2231 n.instruction == PSRLQ) && (n.srcConst < math.MinInt8 || n.srcConst > math.MaxInt8) {
2232 return fmt.Errorf("constant must fit in signed 8-bit integer for %s, but got %d", InstructionName(n.instruction), n.srcConst)
2233 }
2234
2235 base := buf.Len()
2236 code := buf.Append(32)[:0]
2237
2238 isSigned8bitConst := fitInSigned8bit(n.srcConst)
2239 switch inst := n.instruction; inst {
2240 case ADDQ:
2241
2242 rexPrefix |= rexPrefixW
2243 if n.dstReg == RegAX && !isSigned8bitConst {
2244 code = append(code, rexPrefix, 0x05)
2245 } else {
2246 modRM := 0b11_000_000 |
2247 regBits
2248 if isSigned8bitConst {
2249 code = append(code, rexPrefix, 0x83, modRM)
2250 } else {
2251 code = append(code, rexPrefix, 0x81, modRM)
2252 }
2253 }
2254 if isSigned8bitConst {
2255 code = append(code, byte(n.srcConst))
2256 } else {
2257 code = appendUint32(code, uint32(n.srcConst))
2258 }
2259 case ANDQ:
2260
2261 rexPrefix |= rexPrefixW
2262 if n.dstReg == RegAX && !isSigned8bitConst {
2263 code = append(code, rexPrefix, 0x25)
2264 } else {
2265 modRM := 0b11_000_000 |
2266 0b00_100_000 |
2267 regBits
2268 if isSigned8bitConst {
2269 code = append(code, rexPrefix, 0x83, modRM)
2270 } else {
2271 code = append(code, rexPrefix, 0x81, modRM)
2272 }
2273 }
2274 if fitInSigned8bit(n.srcConst) {
2275 code = append(code, byte(n.srcConst))
2276 } else {
2277 code = appendUint32(code, uint32(n.srcConst))
2278 }
2279 case TESTQ:
2280
2281 rexPrefix |= rexPrefixW
2282 if n.dstReg == RegAX && !isSigned8bitConst {
2283 code = append(code, rexPrefix, 0xa9)
2284 } else {
2285 modRM := 0b11_000_000 |
2286 regBits
2287 code = append(code, rexPrefix, 0xf7, modRM)
2288 }
2289 code = appendUint32(code, uint32(n.srcConst))
2290 case MOVL:
2291
2292 if rexPrefix != rexPrefixNone {
2293 code = append(code, rexPrefix)
2294 }
2295 code = append(code, 0xb8|regBits)
2296 code = appendUint32(code, uint32(n.srcConst))
2297 case MOVQ:
2298
2299 if fitIn32bit(n.srcConst) {
2300 if n.srcConst > math.MaxInt32 {
2301 if rexPrefix != rexPrefixNone {
2302 code = append(code, rexPrefix)
2303 }
2304 code = append(code, 0xb8|regBits)
2305 } else {
2306 rexPrefix |= rexPrefixW
2307 modRM := 0b11_000_000 |
2308 regBits
2309 code = append(code, rexPrefix, 0xc7, modRM)
2310 }
2311 code = appendUint32(code, uint32(n.srcConst))
2312 } else {
2313 rexPrefix |= rexPrefixW
2314 code = append(code, rexPrefix, 0xb8|regBits)
2315 code = appendUint64(code, uint64(n.srcConst))
2316 }
2317 case SHLQ:
2318
2319 rexPrefix |= rexPrefixW
2320 modRM := 0b11_000_000 |
2321 0b00_100_000 |
2322 regBits
2323 if n.srcConst == 1 {
2324 code = append(code, rexPrefix, 0xd1, modRM)
2325 } else {
2326 code = append(code, rexPrefix, 0xc1, modRM, byte(n.srcConst))
2327 }
2328 case SHRQ:
2329
2330 rexPrefix |= rexPrefixW
2331 modRM := 0b11_000_000 |
2332 0b00_101_000 |
2333 regBits
2334 if n.srcConst == 1 {
2335 code = append(code, rexPrefix, 0xd1, modRM)
2336 } else {
2337 code = append(code, rexPrefix, 0xc1, modRM, byte(n.srcConst))
2338 }
2339 case PSLLD:
2340
2341 modRM := 0b11_000_000 |
2342 0b00_110_000 |
2343 regBits
2344 if rexPrefix != rexPrefixNone {
2345 code = append(code, 0x66, rexPrefix, 0x0f, 0x72, modRM, byte(n.srcConst))
2346 } else {
2347 code = append(code, 0x66, 0x0f, 0x72, modRM, byte(n.srcConst))
2348 }
2349 case PSLLQ:
2350
2351 modRM := 0b11_000_000 |
2352 0b00_110_000 |
2353 regBits
2354 if rexPrefix != rexPrefixNone {
2355 code = append(code, 0x66, rexPrefix, 0x0f, 0x73, modRM, byte(n.srcConst))
2356 } else {
2357 code = append(code, 0x66, 0x0f, 0x73, modRM, byte(n.srcConst))
2358 }
2359 case PSRLD:
2360
2361
2362 modRM := 0b11_000_000 |
2363 0b00_010_000 |
2364 regBits
2365 if rexPrefix != rexPrefixNone {
2366 code = append(code, 0x66, rexPrefix, 0x0f, 0x72, modRM, byte(n.srcConst))
2367 } else {
2368 code = append(code, 0x66, 0x0f, 0x72, modRM, byte(n.srcConst))
2369 }
2370 case PSRLQ:
2371
2372 modRM := 0b11_000_000 |
2373 0b00_010_000 |
2374 regBits
2375 if rexPrefix != rexPrefixNone {
2376 code = append(code, 0x66, rexPrefix, 0x0f, 0x73, modRM, byte(n.srcConst))
2377 } else {
2378 code = append(code, 0x66, 0x0f, 0x73, modRM, byte(n.srcConst))
2379 }
2380 case PSRAW, PSRAD:
2381
2382 modRM := 0b11_000_000 |
2383 0b00_100_000 |
2384 regBits
2385 code = append(code, 0x66)
2386 if rexPrefix != rexPrefixNone {
2387 code = append(code, rexPrefix)
2388 }
2389
2390 var op byte
2391 if inst == PSRAD {
2392 op = 0x72
2393 } else {
2394 op = 0x71
2395 }
2396
2397 code = append(code, 0x0f, op, modRM, byte(n.srcConst))
2398 case PSRLW:
2399
2400 modRM := 0b11_000_000 |
2401 0b00_010_000 |
2402 regBits
2403 code = append(code, 0x66)
2404 if rexPrefix != rexPrefixNone {
2405 code = append(code, rexPrefix)
2406 }
2407 code = append(code, 0x0f, 0x71, modRM, byte(n.srcConst))
2408 case PSLLW:
2409
2410 modRM := 0b11_000_000 |
2411 0b00_110_000 |
2412 regBits
2413 code = append(code, 0x66)
2414 if rexPrefix != rexPrefixNone {
2415 code = append(code, rexPrefix)
2416 }
2417 code = append(code, 0x0f, 0x71, modRM, byte(n.srcConst))
2418 case XORL, XORQ:
2419
2420 if inst == XORQ {
2421 rexPrefix |= rexPrefixW
2422 }
2423 if rexPrefix != rexPrefixNone {
2424 code = append(code, rexPrefix)
2425 }
2426 if n.dstReg == RegAX && !isSigned8bitConst {
2427 code = append(code, 0x35)
2428 } else {
2429 modRM := 0b11_000_000 |
2430 0b00_110_000 |
2431 regBits
2432 if isSigned8bitConst {
2433 code = append(code, 0x83, modRM)
2434 } else {
2435 code = append(code, 0x81, modRM)
2436 }
2437 }
2438 if fitInSigned8bit(n.srcConst) {
2439 code = append(code, byte(n.srcConst))
2440 } else {
2441 code = appendUint32(code, uint32(n.srcConst))
2442 }
2443 default:
2444 err = errorEncodingUnsupported(n)
2445 }
2446
2447 buf.Truncate(base + len(code))
2448 return
2449 }
2450
2451 func (a *AssemblerImpl) encodeMemoryToConst(buf asm.Buffer, n *nodeImpl) (err error) {
2452 if !fitIn32bit(n.dstConst) {
2453 return fmt.Errorf("too large target const %d for %s", n.dstConst, InstructionName(n.instruction))
2454 }
2455
2456 rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(false)
2457 if err != nil {
2458 return err
2459 }
2460
2461
2462 c := n.dstConst
2463
2464 var opcode, constWidth byte
2465 switch n.instruction {
2466 case CMPL:
2467
2468 if fitInSigned8bit(c) {
2469 opcode = 0x83
2470 constWidth = 8
2471 } else {
2472 opcode = 0x81
2473 constWidth = 32
2474 }
2475 modRM |= 0b00_111_000
2476 default:
2477 return errorEncodingUnsupported(n)
2478 }
2479
2480 base := buf.Len()
2481 code := buf.Append(20)[:0]
2482
2483 if rexPrefix != rexPrefixNone {
2484 code = append(code, rexPrefix)
2485 }
2486
2487 code = append(code, opcode, modRM)
2488
2489 if sbiExist {
2490 code = append(code, sbi)
2491 }
2492
2493 if displacementWidth != 0 {
2494 code = appendConst(code, n.srcConst, displacementWidth)
2495 }
2496
2497 code = appendConst(code, c, constWidth)
2498 buf.Truncate(base + len(code))
2499 return
2500 }
2501
2502 func (a *AssemblerImpl) encodeConstToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
2503 rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(true)
2504 if err != nil {
2505 return err
2506 }
2507
2508
2509 inst := n.instruction
2510 c := n.srcConst
2511
2512 if inst == MOVB && !fitInSigned8bit(c) {
2513 return fmt.Errorf("too large load target const %d for MOVB", c)
2514 } else if !fitIn32bit(c) {
2515 return fmt.Errorf("too large load target const %d for %s", c, InstructionName(n.instruction))
2516 }
2517
2518 var constWidth, opcode byte
2519 switch inst {
2520 case MOVB:
2521 opcode = 0xc6
2522 constWidth = 8
2523 case MOVL:
2524 opcode = 0xc7
2525 constWidth = 32
2526 case MOVQ:
2527 rexPrefix |= rexPrefixW
2528 opcode = 0xc7
2529 constWidth = 32
2530 default:
2531 return errorEncodingUnsupported(n)
2532 }
2533
2534 base := buf.Len()
2535 code := buf.Append(20)[:0]
2536
2537 if rexPrefix != rexPrefixNone {
2538 code = append(code, rexPrefix)
2539 }
2540
2541 code = append(code, opcode, modRM)
2542
2543 if sbiExist {
2544 code = append(code, sbi)
2545 }
2546
2547 if displacementWidth != 0 {
2548 code = appendConst(code, n.dstConst, displacementWidth)
2549 }
2550
2551 code = appendConst(code, c, constWidth)
2552
2553 buf.Truncate(base + len(code))
2554 return
2555 }
2556
2557 func appendUint32(code []byte, v uint32) []byte {
2558 b := [4]byte{}
2559 binary.LittleEndian.PutUint32(b[:], uint32(v))
2560 return append(code, b[:]...)
2561 }
2562
2563 func appendUint64(code []byte, v uint64) []byte {
2564 b := [8]byte{}
2565 binary.LittleEndian.PutUint64(b[:], uint64(v))
2566 return append(code, b[:]...)
2567 }
2568
2569 func appendConst(code []byte, v int64, length byte) []byte {
2570 switch length {
2571 case 8:
2572 return append(code, byte(v))
2573 case 32:
2574 return appendUint32(code, uint32(v))
2575 default:
2576 return appendUint64(code, uint64(v))
2577 }
2578 }
2579
2580 func (n *nodeImpl) getMemoryLocation(dstMem bool) (p rexPrefix, modRM byte, sbi byte, sbiExist bool, displacementWidth byte, err error) {
2581 var baseReg, indexReg asm.Register
2582 var offset asm.ConstantValue
2583 var scale byte
2584 if dstMem {
2585 baseReg, offset, indexReg, scale = n.dstReg, n.dstConst, n.dstMemIndex, n.dstMemScale
2586 } else {
2587 baseReg, offset, indexReg, scale = n.srcReg, n.srcConst, n.srcMemIndex, n.srcMemScale
2588 }
2589
2590 if !fitIn32bit(offset) {
2591 err = errors.New("offset does not fit in 32-bit integer")
2592 return
2593 }
2594
2595 if baseReg == asm.NilRegister && indexReg != asm.NilRegister {
2596
2597 err = errors.New("addressing without base register but with index is not implemented")
2598 } else if baseReg == asm.NilRegister {
2599 modRM = 0b00_000_100
2600 sbi, sbiExist = byte(0b00_100_101), true
2601 displacementWidth = 32
2602 } else if indexReg == asm.NilRegister {
2603 modRM, p = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
2604
2605
2606
2607 withoutDisplacement := offset == 0 &&
2608
2609
2610
2611 baseReg != RegR13 && baseReg != RegBP
2612 if withoutDisplacement {
2613
2614 modRM |= 0b00_000_000
2615 displacementWidth = 0
2616 } else if fitInSigned8bit(offset) {
2617
2618 modRM |= 0b01_000_000
2619 displacementWidth = 8
2620 } else {
2621
2622 modRM |= 0b10_000_000
2623 displacementWidth = 32
2624 }
2625
2626
2627
2628
2629
2630
2631 if baseReg == RegSP || baseReg == RegR12 {
2632 sbi, sbiExist = byte(0b00_100_100), true
2633 }
2634 } else {
2635 if indexReg == RegSP {
2636 err = errors.New("SP cannot be used for SIB index")
2637 return
2638 }
2639
2640 modRM = 0b00_000_100
2641
2642 withoutDisplacement := offset == 0 &&
2643
2644 baseReg != RegR13 && baseReg != RegBP
2645 if withoutDisplacement {
2646
2647 modRM |= 0b00_000_000
2648 displacementWidth = 0
2649 } else if fitInSigned8bit(offset) {
2650
2651 modRM |= 0b01_000_000
2652 displacementWidth = 8
2653 } else {
2654
2655 modRM |= 0b10_000_000
2656 displacementWidth = 32
2657 }
2658
2659 var baseRegBits byte
2660 baseRegBits, p = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
2661
2662 var indexRegBits byte
2663 var indexRegPrefix rexPrefix
2664 indexRegBits, indexRegPrefix = register3bits(indexReg, registerSpecifierPositionSIBIndex)
2665 p |= indexRegPrefix
2666
2667 sbi, sbiExist = baseRegBits|(indexRegBits<<3), true
2668 switch scale {
2669 case 1:
2670 sbi |= 0b00_000_000
2671 case 2:
2672 sbi |= 0b01_000_000
2673 case 4:
2674 sbi |= 0b10_000_000
2675 case 8:
2676 sbi |= 0b11_000_000
2677 default:
2678 err = fmt.Errorf("scale in SIB must be one of 1, 2, 4, 8 but got %d", scale)
2679 return
2680 }
2681
2682 }
2683 return
2684 }
2685
2686
2687
2688
2689
2690 func (n *nodeImpl) getRegisterToRegisterModRM(srcOnModRMReg bool) (rexPrefix, modRM byte, err error) {
2691 var reg3bits, rm3bits byte
2692 if srcOnModRMReg {
2693 reg3bits, rexPrefix = register3bits(n.srcReg,
2694
2695 registerSpecifierPositionModRMFieldReg)
2696
2697 var dstRexPrefix byte
2698 rm3bits, dstRexPrefix = register3bits(n.dstReg,
2699
2700 registerSpecifierPositionModRMFieldRM)
2701 rexPrefix |= dstRexPrefix
2702 } else {
2703 rm3bits, rexPrefix = register3bits(n.srcReg,
2704
2705 registerSpecifierPositionModRMFieldRM)
2706
2707 var dstRexPrefix byte
2708 reg3bits, dstRexPrefix = register3bits(n.dstReg,
2709
2710 registerSpecifierPositionModRMFieldReg)
2711 rexPrefix |= dstRexPrefix
2712 }
2713
2714
2715 modRM = 0b11_000_000 |
2716 (reg3bits << 3) |
2717 rm3bits
2718
2719 return
2720 }
2721
2722
2723 type rexPrefix = byte
2724
2725
2726 const (
2727 rexPrefixNone rexPrefix = 0x0000_0000
2728 rexPrefixDefault rexPrefix = 0b0100_0000
2729 rexPrefixW = 0b0000_1000 | rexPrefixDefault
2730 rexPrefixR = 0b0000_0100 | rexPrefixDefault
2731 rexPrefixX = 0b0000_0010 | rexPrefixDefault
2732 rexPrefixB = 0b0000_0001 | rexPrefixDefault
2733 )
2734
2735
2736 type registerSpecifierPosition byte
2737
2738 const (
2739 registerSpecifierPositionModRMFieldReg registerSpecifierPosition = iota
2740 registerSpecifierPositionModRMFieldRM
2741 registerSpecifierPositionSIBIndex
2742 )
2743
2744 var regInfo = [...]struct {
2745 bits byte
2746 needRex bool
2747 }{
2748 RegAX: {bits: 0b000},
2749 RegCX: {bits: 0b001},
2750 RegDX: {bits: 0b010},
2751 RegBX: {bits: 0b011},
2752 RegSP: {bits: 0b100},
2753 RegBP: {bits: 0b101},
2754 RegSI: {bits: 0b110},
2755 RegDI: {bits: 0b111},
2756 RegR8: {bits: 0b000, needRex: true},
2757 RegR9: {bits: 0b001, needRex: true},
2758 RegR10: {bits: 0b010, needRex: true},
2759 RegR11: {bits: 0b011, needRex: true},
2760 RegR12: {bits: 0b100, needRex: true},
2761 RegR13: {bits: 0b101, needRex: true},
2762 RegR14: {bits: 0b110, needRex: true},
2763 RegR15: {bits: 0b111, needRex: true},
2764 RegX0: {bits: 0b000},
2765 RegX1: {bits: 0b001},
2766 RegX2: {bits: 0b010},
2767 RegX3: {bits: 0b011},
2768 RegX4: {bits: 0b100},
2769 RegX5: {bits: 0b101},
2770 RegX6: {bits: 0b110},
2771 RegX7: {bits: 0b111},
2772 RegX8: {bits: 0b000, needRex: true},
2773 RegX9: {bits: 0b001, needRex: true},
2774 RegX10: {bits: 0b010, needRex: true},
2775 RegX11: {bits: 0b011, needRex: true},
2776 RegX12: {bits: 0b100, needRex: true},
2777 RegX13: {bits: 0b101, needRex: true},
2778 RegX14: {bits: 0b110, needRex: true},
2779 RegX15: {bits: 0b111, needRex: true},
2780 }
2781
2782 func register3bits(
2783 reg asm.Register,
2784 registerSpecifierPosition registerSpecifierPosition,
2785 ) (bits byte, prefix rexPrefix) {
2786 info := regInfo[reg]
2787 bits = info.bits
2788 if info.needRex {
2789
2790 switch registerSpecifierPosition {
2791 case registerSpecifierPositionModRMFieldReg:
2792 prefix = rexPrefixR
2793 case registerSpecifierPositionModRMFieldRM:
2794 prefix = rexPrefixB
2795 case registerSpecifierPositionSIBIndex:
2796 prefix = rexPrefixX
2797 }
2798 }
2799 return
2800 }
2801
2802 func fitIn32bit(v int64) bool {
2803 return math.MinInt32 <= v && v <= math.MaxUint32
2804 }
2805
2806 func fitInSigned8bit(v int64) bool {
2807 return math.MinInt8 <= v && v <= math.MaxInt8
2808 }
2809
2810 func isVectorRegister(r asm.Register) bool {
2811 return RegX0 <= r && r <= RegX15
2812 }
2813
View as plain text