// This file implements the compiler for arm64 target. // Please refer to https://developer.arm.com/documentation/102374/latest/ // if unfamiliar with arm64 instructions and semantics. package compiler import ( "bytes" "errors" "fmt" "math" "github.com/tetratelabs/wazero/internal/asm" "github.com/tetratelabs/wazero/internal/asm/arm64" "github.com/tetratelabs/wazero/internal/wasm" "github.com/tetratelabs/wazero/internal/wazeroir" ) type arm64Compiler struct { assembler arm64.Assembler ir *wazeroir.CompilationResult // locationStack holds the state of wazeroir virtual stack. // and each item is either placed in register or the actual memory stack. locationStack *runtimeValueLocationStack // labels maps a label (e.g. ".L1_then") to *arm64LabelInfo. labels [wazeroir.LabelKindNum][]arm64LabelInfo // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. stackPointerCeil uint64 // assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling. assignStackPointerCeilNeeded asm.Node compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node withListener bool typ *wasm.FunctionType br *bytes.Reader // locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack, // we cache it here, and reset and set to .locationStack in the Init method. locationStackForEntrypoint runtimeValueLocationStack // frameIDMax tracks the maximum value of frame id per function. frameIDMax int brTableTmp []runtimeValueLocation } func newArm64Compiler() compiler { return &arm64Compiler{ assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary), locationStackForEntrypoint: newRuntimeValueLocationStack(), br: bytes.NewReader(nil), } } // Init implements compiler.Init. func (c *arm64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) { c.assembler.Reset() c.locationStackForEntrypoint.reset() c.resetLabels() *c = arm64Compiler{ ir: ir, withListener: withListener, typ: typ, assembler: c.assembler, labels: c.labels, br: c.br, brTableTmp: c.brTableTmp, locationStackForEntrypoint: c.locationStackForEntrypoint, } // Reuses the initial location stack for the compilation of subsequent functions. c.locationStack = &c.locationStackForEntrypoint } // resetLabels resets the existing content in arm64Compiler.labels so that // we could reuse the allocated slices and stacks in the subsequent compilations. func (c *arm64Compiler) resetLabels() { for i := range c.labels { for j := range c.labels[i] { if j > c.frameIDMax { // Only need to reset until the maximum frame id. This makes the compilation faster for large binary. break } l := &c.labels[i][j] l.initialInstruction = nil l.stackInitialized = false l.initialStack.reset() } } } var ( arm64UnreservedVectorRegisters = []asm.Register{ arm64.RegV0, arm64.RegV1, arm64.RegV2, arm64.RegV3, arm64.RegV4, arm64.RegV5, arm64.RegV6, arm64.RegV7, arm64.RegV8, arm64.RegV9, arm64.RegV10, arm64.RegV11, arm64.RegV12, arm64.RegV13, arm64.RegV14, arm64.RegV15, arm64.RegV16, arm64.RegV17, arm64.RegV18, arm64.RegV19, arm64.RegV20, arm64.RegV21, arm64.RegV22, arm64.RegV23, arm64.RegV24, arm64.RegV25, arm64.RegV26, arm64.RegV27, arm64.RegV28, arm64.RegV29, arm64.RegV30, arm64.RegV31, } // Note (see arm64 section in https://go.dev/doc/asm): // * RegR18 is reserved as a platform register, and we don't use it in Compiler. // * RegR28 is reserved for Goroutine by Go runtime, and we don't use it in Compiler. arm64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint arm64.RegR3, arm64.RegR4, arm64.RegR5, arm64.RegR6, arm64.RegR7, arm64.RegR8, arm64.RegR9, arm64.RegR10, arm64.RegR11, arm64.RegR12, arm64.RegR13, arm64.RegR14, arm64.RegR15, arm64.RegR16, arm64.RegR17, arm64.RegR19, arm64.RegR20, arm64.RegR21, arm64.RegR22, arm64.RegR23, arm64.RegR24, arm64.RegR25, arm64.RegR26, arm64.RegR29, arm64.RegR30, } ) const ( // arm64ReservedRegisterForCallEngine holds the pointer to callEngine instance (i.e. *callEngine as uintptr) arm64ReservedRegisterForCallEngine = arm64.RegR0 // arm64ReservedRegisterForStackBasePointerAddress holds stack base pointer's address (callEngine.stackBasePointer) in the current function call. arm64ReservedRegisterForStackBasePointerAddress = arm64.RegR1 // arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). arm64ReservedRegisterForMemory = arm64.RegR2 // arm64ReservedRegisterForTemporary is the temporary register which is available at any point of execution, but its content shouldn't be supposed to live beyond the single operation. // Note: we choose R27 as that is the temporary register used in Go's assembler. arm64ReservedRegisterForTemporary = arm64.RegR27 ) var arm64CallingConventionModuleInstanceAddressRegister = arm64.RegR29 const ( // arm64CallEngineArchContextCompilerCallReturnAddressOffset is the offset of archContext.nativeCallReturnAddress in callEngine. arm64CallEngineArchContextCompilerCallReturnAddressOffset = 144 // arm64CallEngineArchContextMinimum32BitSignedIntOffset is the offset of archContext.minimum32BitSignedIntAddress in callEngine. arm64CallEngineArchContextMinimum32BitSignedIntOffset = 152 // arm64CallEngineArchContextMinimum64BitSignedIntOffset is the offset of archContext.minimum64BitSignedIntAddress in callEngine. arm64CallEngineArchContextMinimum64BitSignedIntOffset = 160 ) func isZeroRegister(r asm.Register) bool { return r == arm64.RegRZR } // compileNOP implements compiler.compileNOP for the arm64 architecture. func (c *arm64Compiler) compileNOP() asm.Node { return c.assembler.CompileStandAlone(arm64.NOP) } // compile implements compiler.compile for the arm64 architecture. func (c *arm64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) { // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) // used for all labels (via setLocationStack), excluding the current one. // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. stackPointerCeil = c.stackPointerCeil if stackPointerCeil < c.locationStack.stackPointerCeil { stackPointerCeil = c.locationStack.stackPointerCeil } // Now that the ceil of stack pointer is determined, we are invoking the callback. // Note: this must be called before Assemble() below. c.assignStackPointerCeil(stackPointerCeil) err = c.assembler.Assemble(buf) return } // arm64LabelInfo holds a wazeroir label specific information in this function. type arm64LabelInfo struct { // initialInstruction is the initial instruction for this label so other block can branch into it. initialInstruction asm.Node // initialStack is the initial value location stack from which we start compiling this label. initialStack runtimeValueLocationStack stackInitialized bool } // assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the arm64 architecture. func (c *arm64Compiler) assignStackPointerCeil(ceil uint64) { if c.assignStackPointerCeilNeeded != nil { c.assignStackPointerCeilNeeded.AssignSourceConstant(int64(ceil) << 3) } } func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo { kind := label.Kind() frames := c.labels[kind] frameID := label.FrameID() if c.frameIDMax < frameID { c.frameIDMax = frameID } // If the frameID is not allocated yet, expand the slice by twice of the diff, // so that we could reduce the allocation in the subsequent compilation. if diff := frameID - len(frames) + 1; diff > 0 { for i := 0; i < diff; i++ { frames = append(frames, arm64LabelInfo{initialStack: newRuntimeValueLocationStack()}) } c.labels[kind] = frames } return &frames[frameID] } // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { return c.locationStack } // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64. func (c *arm64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) c.markRegisterUsed(reg) return } // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for arm64. func (c *arm64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) c.markRegisterUsed(reg) return } func (c *arm64Compiler) markRegisterUsed(regs ...asm.Register) { for _, reg := range regs { if !isZeroRegister(reg) && reg != asm.NilRegister { c.locationStack.markRegisterUsed(reg) } } } func (c *arm64Compiler) markRegisterUnused(regs ...asm.Register) { for _, reg := range regs { if !isZeroRegister(reg) && reg != asm.NilRegister { c.locationStack.markRegisterUnused(reg) } } } func (c *arm64Compiler) String() (ret string) { return c.locationStack.String() } // compilePreamble implements compiler.compilePreamble for the arm64 architecture. func (c *arm64Compiler) compilePreamble() error { c.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) defer c.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) c.locationStack.init(c.typ) // Check if it's necessary to grow the value stack before entering function body. if err := c.compileMaybeGrowStack(); err != nil { return err } if err := c.compileModuleContextInitialization(); err != nil { return err } if c.withListener { if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil { return err } } // We must initialize the stack base pointer register so that we can manipulate the stack properly. c.compileReservedStackBasePointerRegisterInitialization() c.compileReservedMemoryRegisterInitialization() return nil } // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, // and if so, make the builtin function call to do so. These instructions are called in the function's // preamble. func (c *arm64Compiler) compileMaybeGrowStack() error { tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) if !found { panic("BUG: all the registers should be free at this point") } c.markRegisterUsed(tmpX) tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) if !found { panic("BUG: all the registers should be free at this point") } c.markRegisterUsed(tmpY) // "tmpX = len(ce.stack)" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, tmpX, ) // "tmpY = ce.stackBasePointer" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, tmpY, ) // "tmpX = tmpX - tmpY", in other words "tmpX = len(ce.stack) - ce.stackBasePointer" c.assembler.CompileRegisterToRegister( arm64.SUB, tmpY, tmpX, ) // "tmpY = stackPointerCeil" loadStackPointerCeil := c.assembler.CompileConstToRegister( arm64.MOVD, math.MaxInt32, tmpY, ) // At this point of compilation, we don't know the value of stack point ceil, // so we lazily resolve the value later. c.assignStackPointerCeilNeeded = loadStackPointerCeil // Compare tmpX (len(ce.stack) - ce.stackBasePointer) and tmpY (ce.stackPointerCeil) c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmpX, tmpY) // If ceil > stackLen - stack base pointer, we need to grow the stack by calling builtin Go function. brIfStackOK := c.assembler.CompileJump(arm64.BCONDLS) if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexGrowStack); err != nil { return err } // Otherwise, skip calling it. c.assembler.SetJumpTargetOnNext(brIfStackOK) c.markRegisterUnused(tmpX, tmpY) return nil } // returnFunction emits instructions to return from the current function frame. // If the current frame is the bottom, the code goes back to the Go code with nativeCallStatusCodeReturned status. // Otherwise, we branch into the caller's return address. func (c *arm64Compiler) compileReturnFunction() error { // Release all the registers as our calling convention requires the caller-save. if err := c.compileReleaseAllRegistersToStack(); err != nil { return err } if c.withListener { if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerAfter); err != nil { return err } // After return, we re-initialize the stack base pointer as that is used to return to the caller below. c.compileReservedStackBasePointerRegisterInitialization() } // arm64CallingConventionModuleInstanceAddressRegister holds the module intstance's address // so mark it used so that it won't be used as a free register. c.locationStack.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) defer c.locationStack.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ) // If the return address is zero, meaning that we return from the execution. returnAddress.setRegister(arm64ReservedRegisterForTemporary) c.compileLoadValueOnStackToRegister(returnAddress) c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, arm64.RegRZR) // Br if the address does not equal zero, otherwise, exit. // If the address doesn't equal zero, return br into returnAddressRegister (caller's return address). c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeReturned) // Alias for readability. tmp := arm64CallingConventionModuleInstanceAddressRegister // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. callerStackBasePointerInBytes.setRegister(tmp) c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) // Next, restore moduleContext.fn from callerFunction. callerFunction.setRegister(tmp) c.compileLoadValueOnStackToRegister(callerFunction) c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) // Also, we have to put the target function's *wasm.ModuleInstance into arm64CallingConventionModuleInstanceAddressRegister. c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, functionModuleInstanceOffset, arm64CallingConventionModuleInstanceAddressRegister) c.assembler.CompileJumpToRegister(arm64.B, returnAddress.register) return nil } func (c *arm64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) { skip := c.assembler.CompileJump(skipCondition) c.compileExitFromNativeCode(status) c.assembler.SetJumpTargetOnNext(skip) } // compileExitFromNativeCode adds instructions to give the control back to ce.exec with the given status code. func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { if target := c.compiledTrapTargets[status]; target != nil { c.assembler.CompileJump(arm64.B).AssignJumpTarget(target) return } switch status { case nativeCallStatusCodeReturned: // Save the target for reuse. c.compiledTrapTargets[status] = c.compileNOP() case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction: // Read the return address, and write it to callEngine.exitContext.returnAddress. c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET) c.assembler.CompileRegisterToMemory( arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, ) default: if c.ir.IROperationSourceOffsetsInWasmBinary != nil { // This case, the execution traps, and we want the top frame's source position in the stack trace. // We store the instruction address onto callEngine.returnAddress. c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.STRD) c.assembler.CompileRegisterToMemory( arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, ) } else { // We won't use the source position, so just save the target for reuse. c.compiledTrapTargets[status] = c.compileNOP() } } // Write the current stack pointer to the ce.stackPointer. c.assembler.CompileConstToRegister(arm64.MOVD, int64(c.locationStack.sp), arm64ReservedRegisterForTemporary) c.assembler.CompileRegisterToMemory(arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineStackContextStackPointerOffset) // Write the status to callEngine.exitContext.statusCode. if status != 0 { c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary) c.assembler.CompileRegisterToMemory(arm64.STRW, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) } else { // If the status == 0, we use zero register to store zero. c.assembler.CompileRegisterToMemory(arm64.STRW, arm64.RegRZR, arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) } // The return address to the Go code is stored in archContext.compilerReturnAddress which // is embedded in ce. We load the value to the tmpRegister, and then // invoke RET with that register. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, arm64CallEngineArchContextCompilerCallReturnAddressOffset, arm64ReservedRegisterForTemporary) c.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary) } // compileGoHostFunction implements compiler.compileHostFunction for the arm64 architecture. func (c *arm64Compiler) compileGoDefinedHostFunction() error { // First we must update the location stack to reflect the number of host function inputs. c.locationStack.init(c.typ) if c.withListener { if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil { return err } } // Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack // (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack, // and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function // without sacrificing the performance. c.compileReservedStackBasePointerRegisterInitialization() // Alias for readability. tmp := arm64CallingConventionModuleInstanceAddressRegister // Get the location of the callerFunction (*function) in the stack, which depends on the signature. _, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ) // Load the value into the tmp register: tmp = &function{..} callerFunction.setRegister(tmp) c.compileLoadValueOnStackToRegister(callerFunction) // tmp = *(tmp+functionModuleInstanceOffset) = &wasm.ModuleInstance{...} c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, functionModuleInstanceOffset, tmp) // Load it onto callEngine.exitContext.callerModuleInstance. c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, arm64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset) // Reset the state of callerFunction value location so that we won't mess up subsequent code generation below. c.locationStack.releaseRegister(callerFunction) if err := c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction, 0); err != nil { return err } // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. c.compileReservedStackBasePointerRegisterInitialization() // Go function can change the module state in arbitrary way, so we have to force // the callEngine.moduleContext initialization on the function return. To do so, // we zero-out callEngine.moduleInstance. c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) return c.compileReturnFunction() } // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. // This is called when we branch into different block. func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { if c.stackPointerCeil < c.locationStack.stackPointerCeil { c.stackPointerCeil = c.locationStack.stackPointerCeil } c.locationStack = newStack } // compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the arm64 architecture. func (c *arm64Compiler) compileBuiltinFunctionCheckExitCode() error { if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexCheckExitCode); err != nil { return err } // After return, we re-initialize reserved registers just like preamble of functions. c.compileReservedStackBasePointerRegisterInitialization() c.compileReservedMemoryRegisterInitialization() return nil } // compileLabel implements compiler.compileLabel for the arm64 architecture. func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel bool) { labelKey := wazeroir.Label(o.U1) labelInfo := c.label(labelKey) // If initialStack is not set, that means this label has never been reached. if !labelInfo.stackInitialized { skipThisLabel = true return } if labelBegin := labelInfo.initialInstruction; labelBegin == nil { // We use NOP as a beginning of instructions in a label. // This should be eventually optimized out by assembler. labelInfo.initialInstruction = c.assembler.CompileStandAlone(arm64.NOP) } else { c.assembler.Add(labelBegin) } // Set the initial stack. c.setLocationStack(&labelInfo.initialStack) return false } // compileUnreachable implements compiler.compileUnreachable for the arm64 architecture. func (c *arm64Compiler) compileUnreachable() error { c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) return nil } // compileSet implements compiler.compileSet for the arm64 architecture. func (c *arm64Compiler) compileSet(o *wazeroir.UnionOperation) error { depth := int(o.U1) isTargetVector := o.B3 setTargetIndex := int(c.locationStack.sp) - 1 - depth if isTargetVector { _ = c.locationStack.pop() } v := c.locationStack.pop() if err := c.compileEnsureOnRegister(v); err != nil { return err } targetLocation := &c.locationStack.stack[setTargetIndex] if targetLocation.onRegister() { // We no longer need the register previously used by the target location. c.markRegisterUnused(targetLocation.register) } reg := v.register targetLocation.setRegister(reg) targetLocation.valueType = v.valueType if isTargetVector { hi := &c.locationStack.stack[setTargetIndex+1] hi.setRegister(reg) } return nil } // compileGlobalGet implements compiler.compileGlobalGet for the arm64 architecture. func (c *arm64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } index := uint32(o.U1) wasmValueType := c.ir.Globals[index].ValType isV128 := wasmValueType == wasm.ValueTypeV128 // Get the address of globals[index] into globalAddressReg. globalAddressReg, err := c.compileReadGlobalAddress(index) if err != nil { return err } if isV128 { resultReg, err := c.allocateRegister(registerTypeVector) if err != nil { return err } c.assembler.CompileConstToRegister(arm64.ADD, globalInstanceValueOffset, globalAddressReg) c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, globalAddressReg, 0, resultReg, arm64.VectorArrangementQ) c.pushVectorRuntimeValueLocationOnRegister(resultReg) } else { ldr := arm64.NOP var result asm.Register var vt runtimeValueType switch wasmValueType { case wasm.ValueTypeI32: ldr = arm64.LDRW vt = runtimeValueTypeI32 result = globalAddressReg case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: ldr = arm64.LDRD vt = runtimeValueTypeI64 result = globalAddressReg case wasm.ValueTypeF32: result, err = c.allocateRegister(registerTypeVector) if err != nil { return err } ldr = arm64.FLDRS vt = runtimeValueTypeF32 case wasm.ValueTypeF64: result, err = c.allocateRegister(registerTypeVector) if err != nil { return err } ldr = arm64.FLDRD vt = runtimeValueTypeF64 } // "result = [globalAddressReg + globalInstanceValueOffset] (== globals[index].Val)" c.assembler.CompileMemoryToRegister( ldr, globalAddressReg, globalInstanceValueOffset, result, ) c.pushRuntimeValueLocationOnRegister(result, vt) } return nil } // compileGlobalSet implements compiler.compileGlobalSet for the arm64 architecture. func (c *arm64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error { index := uint32(o.U1) wasmValueType := c.ir.Globals[index].ValType isV128 := wasmValueType == wasm.ValueTypeV128 var val *runtimeValueLocation if isV128 { val = c.locationStack.popV128() } else { val = c.locationStack.pop() } if err := c.compileEnsureOnRegister(val); err != nil { return err } globalInstanceAddressRegister, err := c.compileReadGlobalAddress(index) if err != nil { return err } if isV128 { c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, val.register, globalInstanceAddressRegister, globalInstanceValueOffset, arm64.VectorArrangementQ) } else { var str asm.Instruction switch c.ir.Globals[index].ValType { case wasm.ValueTypeI32: str = arm64.STRW case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: str = arm64.STRD case wasm.ValueTypeF32: str = arm64.FSTRS case wasm.ValueTypeF64: str = arm64.FSTRD } // At this point "globalInstanceAddressRegister = globals[index]". // Therefore, this means "globals[index].Val = val.register" c.assembler.CompileRegisterToMemory( str, val.register, globalInstanceAddressRegister, globalInstanceValueOffset, ) } c.markRegisterUnused(val.register) return nil } // compileReadGlobalAddress adds instructions to store the absolute address of the global instance at globalIndex into a register func (c *arm64Compiler) compileReadGlobalAddress(globalIndex uint32) (destinationRegister asm.Register, err error) { // TODO: rethink about the type used in store `globals []*GlobalInstance`. // If we use `[]GlobalInstance` instead, we could reduce one MOV instruction here. destinationRegister, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return } // "destinationRegister = globalIndex * 8" c.assembler.CompileConstToRegister( // globalIndex is an index to []*GlobalInstance, therefore // we have to multiply it by the size of *GlobalInstance == the pointer size == 8. arm64.MOVD, int64(globalIndex)*8, destinationRegister, ) // "arm64ReservedRegisterForTemporary = &globals[0]" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, arm64ReservedRegisterForTemporary, ) // "destinationRegister = [arm64ReservedRegisterForTemporary + destinationRegister] (== globals[globalIndex])". c.assembler.CompileMemoryWithRegisterOffsetToRegister( arm64.LDRD, arm64ReservedRegisterForTemporary, destinationRegister, destinationRegister, ) return } // compileBr implements compiler.compileBr for the arm64 architecture. func (c *arm64Compiler) compileBr(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } return c.compileBranchInto(wazeroir.Label(o.U1)) } // compileBrIf implements compiler.compileBrIf for the arm64 architecture. func (c *arm64Compiler) compileBrIf(o *wazeroir.UnionOperation) error { cond := c.locationStack.pop() var conditionalBR asm.Node if cond.onConditionalRegister() { // If the cond is on a conditional register, it corresponds to one of "conditional codes" // https://developer.arm.com/documentation/dui0801/a/Condition-Codes/Condition-code-suffixes // Here we represent the conditional codes by using arm64.COND_** registers, and that means the // conditional jump can be performed if we use arm64.B**. // For example, if we have arm64.CondEQ on cond, that means we performed compileEq right before // this compileBrIf and BrIf can be achieved by arm64.BCONDEQ. var brInst asm.Instruction switch cond.conditionalRegister { case arm64.CondEQ: brInst = arm64.BCONDEQ case arm64.CondNE: brInst = arm64.BCONDNE case arm64.CondHS: brInst = arm64.BCONDHS case arm64.CondLO: brInst = arm64.BCONDLO case arm64.CondMI: brInst = arm64.BCONDMI case arm64.CondHI: brInst = arm64.BCONDHI case arm64.CondLS: brInst = arm64.BCONDLS case arm64.CondGE: brInst = arm64.BCONDGE case arm64.CondLT: brInst = arm64.BCONDLT case arm64.CondGT: brInst = arm64.BCONDGT case arm64.CondLE: brInst = arm64.BCONDLE default: // BUG: This means that we use the cond.conditionalRegister somewhere in this file, // but not covered in switch ^. That shouldn't happen. return fmt.Errorf("unsupported condition for br_if: %v", cond.conditionalRegister) } conditionalBR = c.assembler.CompileJump(brInst) } else { // If the value is not on the conditional register, we compare the value with the zero register, // and then do the conditional BR if the value doesn't equal zero. if err := c.compileEnsureOnRegister(cond); err != nil { return err } // Compare the value with zero register. Note that the value is ensured to be i32 by function validation phase, // so we use CMPW (32-bit compare) here. c.assembler.CompileTwoRegistersToNone(arm64.CMPW, cond.register, arm64.RegRZR) conditionalBR = c.assembler.CompileJump(arm64.BCONDNE) c.markRegisterUnused(cond.register) } // Emit the code for branching into else branch. elseTarget := wazeroir.Label(o.U2) if err := c.compileBranchInto(elseTarget); err != nil { return err } // We branch into here from the original conditional BR (conditionalBR). c.assembler.SetJumpTargetOnNext(conditionalBR) thenTarget := wazeroir.Label(o.U1) if err := compileDropRange(c, o.U3); err != nil { return err } return c.compileBranchInto(thenTarget) } func (c *arm64Compiler) compileBranchInto(target wazeroir.Label) error { if target.IsReturnTarget() { return c.compileReturnFunction() } else { if c.ir.LabelCallers[target] > 1 { // We can only re-use register state if when there's a single call-site. // Release existing values on registers to the stack if there's multiple ones to have // the consistent value location state at the beginning of label. if err := c.compileReleaseAllRegistersToStack(); err != nil { return err } } // Set the initial stack of the target label, so we can start compiling the label // with the appropriate value locations. Note we clone the stack here as we maybe // manipulate the stack before compiler reaches the label. targetLabel := c.label(target) if !targetLabel.stackInitialized { targetLabel.initialStack.cloneFrom(*c.locationStack) targetLabel.stackInitialized = true } br := c.assembler.CompileJump(arm64.B) c.assignBranchTarget(target, br) return nil } } // assignBranchTarget assigns the given label's initial instruction to the destination of br. func (c *arm64Compiler) assignBranchTarget(label wazeroir.Label, br asm.Node) { target := c.label(label) targetInst := target.initialInstruction if targetInst == nil { // If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction. targetInst = c.assembler.AllocateNOP() target.initialInstruction = targetInst } br.AssignJumpTarget(targetInst) } // compileBrTable implements compiler.compileBrTable for the arm64 architecture. func (c *arm64Compiler) compileBrTable(o *wazeroir.UnionOperation) error { // If the operation only consists of the default target, we branch into it and return early. if len(o.Us) == 2 { loc := c.locationStack.pop() if loc.onRegister() { c.markRegisterUnused(loc.register) } if err := compileDropRange(c, o.Us[1]); err != nil { return err } return c.compileBranchInto(wazeroir.Label(o.Us[0])) } index := c.locationStack.pop() if err := c.compileEnsureOnRegister(index); err != nil { return err } if isZeroRegister(index.register) { reg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } index.setRegister(reg) c.markRegisterUsed(reg) // Zero the value on a picked register. c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) } tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } // Load the branch table's length. // "tmpReg = len(o.Targets)" c.assembler.CompileConstToRegister(arm64.MOVW, int64(len(o.Us)/2-1), tmpReg) // Compare the length with offset. c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register) // If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index). brDefaultIndex := c.assembler.CompileJump(arm64.BCONDLO) c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register) c.assembler.SetJumpTargetOnNext(brDefaultIndex) // We prepare the asm.StaticConst which holds the offset of // each target's first instruction (incl. default) // relative to the beginning of label tables. // // For example, if we have targets=[L0, L1] and default=L_DEFAULT, // we emit the code like this at [Emit the code for each target and default branch] below. // // L0: // 0x123001: XXXX, ... // ..... // L1: // 0x123005: YYY, ... // ..... // L_DEFAULT: // 0x123009: ZZZ, ... // // then offsetData becomes like [0x0, 0x5, 0x8]. // By using this offset list, we could jump into the label for the index by // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by ADR instruction. // // Note: We store each offset of 32-bit unsigned integer as 4 consecutive bytes. So more precisely, // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. // // Note: this is similar to how GCC implements Switch statements in C. offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2))) // "tmpReg = &offsetData[0]" c.assembler.CompileStaticConstToRegister(arm64.ADR, offsetData, tmpReg) // "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])" c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register) // "index.register = *index.register (== offsetData[offset])" c.assembler.CompileMemoryToRegister(arm64.LDRW, index.register, 0, index.register) // Now we read the address of the beginning of the jump table. // In the above example, this corresponds to reading the address of 0x123001. c.assembler.CompileReadInstructionAddress(tmpReg, arm64.B) // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. // So we could achieve the br_table jump by adding them and jump into the resulting address. c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register) c.assembler.CompileJumpToRegister(arm64.B, index.register) // We no longer need the index's register, so mark it unused. c.markRegisterUnused(index.register) // [Emit the code for each targets and default branch] labelInitialInstructions := make([]asm.Node, len(o.Us)/2) // Since we might end up having the different stack state in each branch, // we need to save the initial stack state here, and use the same initial state // for each iteration. initialLocationStack := c.getSavedTemporaryLocationStack() for i := range labelInitialInstructions { // Emit the initial instruction of each target where // we use NOP as we don't yet know the next instruction in each label. init := c.assembler.CompileStandAlone(arm64.NOP) labelInitialInstructions[i] = init targetLabel := wazeroir.Label(o.Us[i*2]) targetToDrop := o.Us[i*2+1] if err = compileDropRange(c, targetToDrop); err != nil { return err } if err = c.compileBranchInto(targetLabel); err != nil { return err } // After the iteration, reset the stack's state with initialLocationStack. c.locationStack.cloneFrom(initialLocationStack) } c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) return nil } func (c *arm64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack { initialLocationStack := *c.locationStack // Take copy! // Use c.brTableTmp for the underlying stack so that we could reduce the allocations. if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 { c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...) } copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp]) initialLocationStack.stack = c.brTableTmp return initialLocationStack } // compileCall implements compiler.compileCall for the arm64 architecture. func (c *arm64Compiler) compileCall(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } functionIndex := o.U1 tp := &c.ir.Types[c.ir.Functions[functionIndex]] targetFunctionAddressReg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(targetFunctionAddressReg) defer c.markRegisterUnused(targetFunctionAddressReg) // 3) Set rc.next to specify which function is executed on the current call frame. // // First, we read the address of the first item of ce.functions slice (= &ce.functions[0]) // into tmp. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, targetFunctionAddressReg) c.assembler.CompileConstToRegister( arm64.ADD, int64(functionIndex)*functionSize, // * 8 because the size of *function equals 8 bytes. targetFunctionAddressReg) return c.compileCallImpl(targetFunctionAddressReg, tp) } // compileCallImpl implements compiler.compileCall and compiler.compileCallIndirect for the arm64 architecture. func (c *arm64Compiler) compileCallImpl(targetFunctionAddressRegister asm.Register, functype *wasm.FunctionType) error { // Release all the registers as our calling convention requires the caller-save. if err := c.compileReleaseAllRegistersToStack(); err != nil { return err } tmp, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) if !ok { panic("BUG: cannot take a free register") } // The stack should look like: // // reserved slots for results (if len(results) > len(args)) // | | // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... // | | | // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} // | // nextStackBasePointerOffset // // where callFrame is used to return to this currently executed function. nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, tmp) callFrameStackBasePointerInBytesLoc.setRegister(tmp) c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) // Set callEngine.stackContext.stackBasePointer for the next function. c.assembler.CompileConstToRegister(arm64.ADD, nextStackBasePointerOffset<<3, tmp) c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, tmp) callFrameFunctionLoc.setRegister(tmp) c.compileReleaseRegisterToStack(callFrameFunctionLoc) // Set callEngine.moduleContext.fn to the next *function. c.assembler.CompileRegisterToMemory(arm64.STRD, targetFunctionAddressRegister, arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) // Write the return address into callFrameReturnAddressLoc. c.assembler.CompileReadInstructionAddress(tmp, arm64.B) callFrameReturnAddressLoc.setRegister(tmp) c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) if targetFunctionAddressRegister == arm64CallingConventionModuleInstanceAddressRegister { // This case we must move the value on targetFunctionAddressRegister to another register, otherwise // the address (jump target below) will be modified and result in segfault. // See #526. c.assembler.CompileRegisterToRegister(arm64.MOVD, targetFunctionAddressRegister, tmp) targetFunctionAddressRegister = tmp } // Also, we have to put the code's moduleInstance address into arm64CallingConventionModuleInstanceAddressRegister. c.assembler.CompileMemoryToRegister(arm64.LDRD, targetFunctionAddressRegister, functionModuleInstanceOffset, arm64CallingConventionModuleInstanceAddressRegister, ) // Then, br into the target function's initial address. c.assembler.CompileMemoryToRegister(arm64.LDRD, targetFunctionAddressRegister, functionCodeInitialAddressOffset, targetFunctionAddressRegister) c.assembler.CompileJumpToRegister(arm64.B, targetFunctionAddressRegister) // We consumed the function parameters, the call frame stack and reserved slots during the call. c.locationStack.sp = uint64(nextStackBasePointerOffset) // Also, the function results were pushed by the call. for _, t := range functype.Results { loc := c.locationStack.pushRuntimeValueLocationOnStack() switch t { case wasm.ValueTypeI32: loc.valueType = runtimeValueTypeI32 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: loc.valueType = runtimeValueTypeI64 case wasm.ValueTypeF32: loc.valueType = runtimeValueTypeF32 case wasm.ValueTypeF64: loc.valueType = runtimeValueTypeF64 case wasm.ValueTypeV128: loc.valueType = runtimeValueTypeV128Lo hi := c.locationStack.pushRuntimeValueLocationOnStack() hi.valueType = runtimeValueTypeV128Hi } } if err := c.compileModuleContextInitialization(); err != nil { return err } // On the function return, we initialize the state for this function. c.compileReservedStackBasePointerRegisterInitialization() c.compileReservedMemoryRegisterInitialization() return nil } // compileCallIndirect implements compiler.compileCallIndirect for the arm64 architecture. func (c *arm64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) (err error) { offset := c.locationStack.pop() if err = c.compileEnsureOnRegister(offset); err != nil { return err } typeIndex := o.U1 tableIndex := o.U2 offsetReg := offset.register if isZeroRegister(offsetReg) { offsetReg, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(offsetReg) // Zero the value on a picked register. c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetReg) } tmp, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(tmp) tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(tmp2) // First, we need to check if the offset doesn't exceed the length of table. // "tmp = &Tables[0]" c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp, ) // tmp = [tmp + TableIndex*8] = [&Tables[0] + TableIndex*sizeOf(*tableInstance)] = Tables[tableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, int64(tableIndex)*8, tmp, ) // tmp2 = [tmp + tableInstanceTableLenOffset] = len(Tables[tableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, tableInstanceTableLenOffset, tmp2) // "cmp tmp2, offset" c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp2, offsetReg) // If it exceeds len(table), we trap. c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) // Otherwise, we proceed to do function type check. // We need to obtain the absolute address of table element. // "tmp = &Tables[tableIndex].table[0]" c.assembler.CompileMemoryToRegister( arm64.LDRD, tmp, tableInstanceTableOffset, tmp, ) // "offset = tmp + (offset << pointerSizeLog2) (== &table[offset])" // Here we left shifting by 3 in order to get the offset in bytes, // and the table element type is uintptr which is 8 bytes. c.assembler.CompileLeftShiftedRegisterToRegister( arm64.ADD, offsetReg, pointerSizeLog2, tmp, offsetReg, ) // "offset = (*offset) (== table[offset])" c.assembler.CompileMemoryToRegister(arm64.LDRD, offsetReg, 0, offsetReg) // Check if the value of table[offset] equals zero, meaning that the target element is uninitialized. c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, offsetReg) // Skipped if the target is initialized. c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeInvalidTableAccess) // next we check the type matches, i.e. table[offset].source.TypeID == targetFunctionType. // "tmp = table[offset].typeID" c.assembler.CompileMemoryToRegister( arm64.LDRD, offsetReg, functionTypeIDOffset, tmp, ) // "tmp2 = ModuleInstance.TypeIDs[index]" c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, tmp2) c.assembler.CompileMemoryToRegister(arm64.LDRW, tmp2, int64(typeIndex)*4, tmp2) // Compare these two values, and if they equal, we are ready to make function call. c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmp, tmp2) // Skipped if the type matches. c.compileMaybeExitFromNativeCode(arm64.BCONDEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall) targetFunctionType := &c.ir.Types[typeIndex] if err := c.compileCallImpl(offsetReg, targetFunctionType); err != nil { return err } // The offset register should be marked as un-used as we consumed in the function call. c.markRegisterUnused(offsetReg, tmp, tmp2) return nil } // compileDrop implements compiler.compileDrop for the arm64 architecture. func (c *arm64Compiler) compileDrop(o *wazeroir.UnionOperation) error { return compileDropRange(c, o.U1) } func (c *arm64Compiler) compileSelectV128Impl(selectorRegister asm.Register) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, selectorRegister) brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) // In this branch, we select the value of x2, so we move the value into x1.register so that // we can have the result in x1.register regardless of the selection. c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, x2.register, x2.register, x1.register, arm64.VectorArrangement16B) c.assembler.SetJumpTargetOnNext(brIfNotZero) // As noted, the result exists in x1.register regardless of the selector. c.pushVectorRuntimeValueLocationOnRegister(x1.register) // Plus, x2.register is no longer used. c.markRegisterUnused(x2.register) return nil } // compileSelect implements compiler.compileSelect for the arm64 architecture. func (c *arm64Compiler) compileSelect(o *wazeroir.UnionOperation) error { cv, err := c.popValueOnRegister() if err != nil { return err } isTargetVector := o.B3 if isTargetVector { return c.compileSelectV128Impl(cv.register) } c.markRegisterUsed(cv.register) x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } if isZeroRegister(x1.register) && isZeroRegister(x2.register) { // If both values are zero, the result is always zero. c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) c.markRegisterUnused(cv.register) return nil } // In the following, we emit the code so that x1's register contains the chosen value // no matter which of original x1 or x2 is selected. // // If x1 is currently on zero register, we cannot place the result because // "MOV arm64.RegRZR x2.register" results in arm64.RegRZR regardless of the value. // So we explicitly assign a general purpose register to x1 here. if isZeroRegister(x1.register) { // Mark x2 and cv's registers are used so they won't be chosen. c.markRegisterUsed(x2.register) // Pick the non-zero register for x1. x1Reg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } x1.setRegister(x1Reg) // And zero our the picked register. c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, x1Reg) } // At this point, x1 is non-zero register, and x2 is either general purpose or zero register. c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, cv.register) brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) // If cv == 0, we move the value of x2 to the x1.register. switch x1.valueType { case runtimeValueTypeI32: // TODO: use 32-bit mov c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) case runtimeValueTypeI64: c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) case runtimeValueTypeF32: // TODO: use 32-bit mov c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) case runtimeValueTypeF64: c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) default: return errors.New("TODO: implement vector type select") } c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) // Otherwise, nothing to do for select. c.assembler.SetJumpTargetOnNext(brIfNotZero) // Only x1.register is reused. c.markRegisterUnused(cv.register, x2.register) return nil } // compilePick implements compiler.compilePick for the arm64 architecture. func (c *arm64Compiler) compilePick(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } depth := o.U1 isTargetVector := o.B3 pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)] pickedRegister, err := c.allocateRegister(pickTarget.getRegisterType()) if err != nil { return err } if pickTarget.onRegister() { // Copy the value to the pickedRegister. switch pickTarget.valueType { case runtimeValueTypeI32: c.assembler.CompileRegisterToRegister(arm64.MOVW, pickTarget.register, pickedRegister) case runtimeValueTypeI64: c.assembler.CompileRegisterToRegister(arm64.MOVD, pickTarget.register, pickedRegister) case runtimeValueTypeF32: c.assembler.CompileRegisterToRegister(arm64.FMOVS, pickTarget.register, pickedRegister) case runtimeValueTypeF64: c.assembler.CompileRegisterToRegister(arm64.FMOVD, pickTarget.register, pickedRegister) case runtimeValueTypeV128Lo: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, pickTarget.register, pickTarget.register, pickedRegister, arm64.VectorArrangement16B) case runtimeValueTypeV128Hi: panic("BUG") // since pick target must point to the lower 64-bits of vectors. } } else if pickTarget.onStack() { // Temporarily assign a register to the pick target, and then load the value. pickTarget.setRegister(pickedRegister) c.compileLoadValueOnStackToRegister(pickTarget) // After the load, we revert the register assignment to the pick target. pickTarget.setRegister(asm.NilRegister) if isTargetVector { hi := &c.locationStack.stack[pickTarget.stackPointer+1] hi.setRegister(asm.NilRegister) } } // Now we have the value of the target on the pickedRegister, // so push the location. c.pushRuntimeValueLocationOnRegister(pickedRegister, pickTarget.valueType) if isTargetVector { c.pushRuntimeValueLocationOnRegister(pickedRegister, runtimeValueTypeV128Hi) } return nil } // compileAdd implements compiler.compileAdd for the arm64 architecture. func (c *arm64Compiler) compileAdd(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } // Addition can be nop if one of operands is zero. if isZeroRegister(x1.register) { c.pushRuntimeValueLocationOnRegister(x2.register, x1.valueType) return nil } else if isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } var inst asm.Instruction unsignedType := wazeroir.UnsignedType(o.B1) switch unsignedType { case wazeroir.UnsignedTypeI32: inst = arm64.ADDW case wazeroir.UnsignedTypeI64: inst = arm64.ADD case wazeroir.UnsignedTypeF32: inst = arm64.FADDS case wazeroir.UnsignedTypeF64: inst = arm64.FADDD } c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) // The result is placed on a register for x1, so record it. c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileSub implements compiler.compileSub for the arm64 architecture. func (c *arm64Compiler) compileSub(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } // If both of registers are zeros, this can be nop and push the zero register. if isZeroRegister(x1.register) && isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) return nil } // At this point, at least one of x1 or x2 registers is non zero. // Choose the non-zero register as destination. destinationReg := x1.register if isZeroRegister(x1.register) { destinationReg = x2.register } var inst asm.Instruction var vt runtimeValueType unsignedType := wazeroir.UnsignedType(o.B1) switch unsignedType { case wazeroir.UnsignedTypeI32: inst = arm64.SUBW vt = runtimeValueTypeI32 case wazeroir.UnsignedTypeI64: inst = arm64.SUB vt = runtimeValueTypeI64 case wazeroir.UnsignedTypeF32: inst = arm64.FSUBS vt = runtimeValueTypeF32 case wazeroir.UnsignedTypeF64: inst = arm64.FSUBD vt = runtimeValueTypeF64 } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) c.pushRuntimeValueLocationOnRegister(destinationReg, vt) return nil } // compileMul implements compiler.compileMul for the arm64 architecture. func (c *arm64Compiler) compileMul(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } // Multiplication can be done by putting a zero register if one of operands is zero. if isZeroRegister(x1.register) || isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) return nil } var inst asm.Instruction var vt runtimeValueType unsignedType := wazeroir.UnsignedType(o.B1) switch unsignedType { case wazeroir.UnsignedTypeI32: inst = arm64.MULW vt = runtimeValueTypeI32 case wazeroir.UnsignedTypeI64: inst = arm64.MUL vt = runtimeValueTypeI64 case wazeroir.UnsignedTypeF32: inst = arm64.FMULS vt = runtimeValueTypeF32 case wazeroir.UnsignedTypeF64: inst = arm64.FMULD vt = runtimeValueTypeF64 } c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) // The result is placed on a register for x1, so record it. c.pushRuntimeValueLocationOnRegister(x1.register, vt) return nil } // compileClz implements compiler.compileClz for the arm64 architecture. func (c *arm64Compiler) compileClz(o *wazeroir.UnionOperation) error { v, err := c.popValueOnRegister() if err != nil { return err } unsignedInt := wazeroir.UnsignedInt(o.B1) if isZeroRegister(v.register) { // If the target is zero register, the result is always 32 (or 64 for 64-bits), // so we allocate a register and put the const on it. reg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } var vt runtimeValueType if unsignedInt == wazeroir.UnsignedInt32 { vt = runtimeValueTypeI32 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) } else { vt = runtimeValueTypeI64 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) } c.pushRuntimeValueLocationOnRegister(reg, vt) return nil } reg := v.register var vt runtimeValueType if unsignedInt == wazeroir.UnsignedInt32 { vt = runtimeValueTypeI32 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) } else { vt = runtimeValueTypeI64 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) } c.pushRuntimeValueLocationOnRegister(reg, vt) return nil } // compileCtz implements compiler.compileCtz for the arm64 architecture. func (c *arm64Compiler) compileCtz(o *wazeroir.UnionOperation) error { v, err := c.popValueOnRegister() if err != nil { return err } unsignedInt := wazeroir.UnsignedInt(o.B1) reg := v.register if isZeroRegister(reg) { // If the target is zero register, the result is always 32 (or 64 for 64-bits), // so we allocate a register and put the const on it. reg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } var vt runtimeValueType if unsignedInt == wazeroir.UnsignedInt32 { vt = runtimeValueTypeI32 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) } else { vt = runtimeValueTypeI64 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) } c.pushRuntimeValueLocationOnRegister(reg, vt) return nil } // Since arm64 doesn't have an instruction directly counting trailing zeros, // we reverse the bits first, and then do CLZ, which is exactly the same as // gcc implements __builtin_ctz for arm64. var vt runtimeValueType if unsignedInt == wazeroir.UnsignedInt32 { vt = runtimeValueTypeI32 c.assembler.CompileRegisterToRegister(arm64.RBITW, reg, reg) c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) } else { vt = runtimeValueTypeI64 c.assembler.CompileRegisterToRegister(arm64.RBIT, reg, reg) c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) } c.pushRuntimeValueLocationOnRegister(reg, vt) return nil } // compilePopcnt implements compiler.compilePopcnt for the arm64 architecture. func (c *arm64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error { v, err := c.popValueOnRegister() if err != nil { return err } reg := v.register if isZeroRegister(reg) { c.pushRuntimeValueLocationOnRegister(reg, v.valueType) return nil } freg, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // arm64 doesn't have an instruction for population count on scalar register, // so we use the vector one (VCNT). // This exactly what the official Go implements bits.OneCount. // For example, "func () int { return bits.OneCount(10) }" is compiled as // // MOVD $10, R0 ;; Load 10. // FMOVD R0, F0 // VCNT V0.B8, V0.B8 // UADDLV V0.B8, V0 // var movInst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) if unsignedInt == wazeroir.UnsignedInt32 { movInst = arm64.FMOVS } else { movInst = arm64.FMOVD } c.assembler.CompileRegisterToRegister(movInst, reg, freg) c.assembler.CompileVectorRegisterToVectorRegister(arm64.VCNT, freg, freg, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.UADDLV, freg, freg, arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileRegisterToRegister(movInst, freg, reg) c.pushRuntimeValueLocationOnRegister(reg, v.valueType) return nil } // compileDiv implements compiler.compileDiv for the arm64 architecture. func (c *arm64Compiler) compileDiv(o *wazeroir.UnionOperation) error { dividend, divisor, err := c.popTwoValuesOnRegisters() if err != nil { return err } signedType := wazeroir.SignedType(o.B1) // If the divisor is on the zero register, exit from the function deterministically. if isZeroRegister(divisor.register) { // Push any value so that the subsequent instruction can have a consistent location stack state. v := c.locationStack.pushRuntimeValueLocationOnStack() switch signedType { case wazeroir.SignedTypeInt32, wazeroir.SignedTypeUint32: v.valueType = runtimeValueTypeI32 case wazeroir.SignedTypeUint64, wazeroir.SignedTypeInt64: v.valueType = runtimeValueTypeI64 } c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) return nil } var inst asm.Instruction var vt runtimeValueType switch signedType { case wazeroir.SignedTypeUint32: inst = arm64.UDIVW if err := c.compileIntegerDivPrecheck(true, false, dividend.register, divisor.register); err != nil { return err } vt = runtimeValueTypeI32 case wazeroir.SignedTypeUint64: if err := c.compileIntegerDivPrecheck(false, false, dividend.register, divisor.register); err != nil { return err } inst = arm64.UDIV vt = runtimeValueTypeI64 case wazeroir.SignedTypeInt32: if err := c.compileIntegerDivPrecheck(true, true, dividend.register, divisor.register); err != nil { return err } inst = arm64.SDIVW vt = runtimeValueTypeI32 case wazeroir.SignedTypeInt64: if err := c.compileIntegerDivPrecheck(false, true, dividend.register, divisor.register); err != nil { return err } inst = arm64.SDIV vt = runtimeValueTypeI64 case wazeroir.SignedTypeFloat32: inst = arm64.FDIVS vt = runtimeValueTypeF32 case wazeroir.SignedTypeFloat64: inst = arm64.FDIVD vt = runtimeValueTypeF64 } c.assembler.CompileRegisterToRegister(inst, divisor.register, dividend.register) c.pushRuntimeValueLocationOnRegister(dividend.register, vt) return nil } // compileIntegerDivPrecheck adds instructions to check if the divisor and dividend are sound for division operation. // First, this adds instructions to check if the divisor equals zero, and if so, exits the function. // Plus, for signed divisions, check if the result might result in overflow or not. func (c *arm64Compiler) compileIntegerDivPrecheck(is32Bit, isSigned bool, dividend, divisor asm.Register) error { // We check the divisor value equals zero. var cmpInst, movInst, loadInst asm.Instruction var minValueOffsetInVM int64 if is32Bit { cmpInst = arm64.CMPW movInst = arm64.MOVW loadInst = arm64.LDRW minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset } else { cmpInst = arm64.CMP movInst = arm64.MOVD loadInst = arm64.LDRD minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset } c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisor) // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) // Otherwise, we proceed. // If the operation is a signed integer div, we have to do an additional check on overflow. if isSigned { // For signed division, we have to have branches for "math.MinInt{32,64} / -1" // case which results in the overflow. // First, we compare the divisor with -1. c.assembler.CompileConstToRegister(movInst, -1, arm64ReservedRegisterForTemporary) c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, divisor) // If they not equal, we skip the following check. brIfDivisorNonMinusOne := c.assembler.CompileJump(arm64.BCONDNE) // Otherwise, we further check if the dividend equals math.MinInt32 or MinInt64. c.assembler.CompileMemoryToRegister( loadInst, arm64ReservedRegisterForCallEngine, minValueOffsetInVM, arm64ReservedRegisterForTemporary, ) c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, dividend) // If they not equal, we are safe to execute the division. // Otherwise, we raise overflow error. c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerOverflow) c.assembler.SetJumpTargetOnNext(brIfDivisorNonMinusOne) } return nil } // compileRem implements compiler.compileRem for the arm64 architecture. func (c *arm64Compiler) compileRem(o *wazeroir.UnionOperation) error { dividend, divisor, err := c.popTwoValuesOnRegisters() if err != nil { return err } dividendReg := dividend.register divisorReg := divisor.register // If the divisor is on the zero register, exit from the function deterministically. if isZeroRegister(divisor.register) { // Push any value so that the subsequent instruction can have a consistent location stack state. v := c.locationStack.pushRuntimeValueLocationOnStack() v.valueType = runtimeValueTypeI32 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) return nil } var divInst, msubInst, cmpInst asm.Instruction signedInt := wazeroir.SignedInt(o.B1) switch signedInt { case wazeroir.SignedUint32: divInst = arm64.UDIVW msubInst = arm64.MSUBW cmpInst = arm64.CMPW case wazeroir.SignedUint64: divInst = arm64.UDIV msubInst = arm64.MSUB cmpInst = arm64.CMP case wazeroir.SignedInt32: divInst = arm64.SDIVW msubInst = arm64.MSUBW cmpInst = arm64.CMPW case wazeroir.SignedInt64: divInst = arm64.SDIV msubInst = arm64.MSUB cmpInst = arm64.CMP } // We check the divisor value equals zero. c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisorReg) // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) // Otherwise, we proceed. // Temporarily mark them used to allocate a result register while keeping these values. c.markRegisterUsed(dividend.register, divisor.register) resultReg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } // arm64 doesn't have an instruction for rem, we use calculate it by two instructions: UDIV (SDIV for signed) and MSUB. // This exactly the same code that Clang emits. // [input: x0=dividend, x1=divisor] // >> UDIV x2, x0, x1 // >> MSUB x3, x2, x1, x0 // [result: x2=quotient, x3=remainder] // c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg) // ResultReg = dividendReg - (divisorReg * resultReg) c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg) c.markRegisterUnused(dividend.register, divisor.register) c.pushRuntimeValueLocationOnRegister(resultReg, dividend.valueType) return nil } // compileAnd implements compiler.compileAnd for the arm64 architecture. func (c *arm64Compiler) compileAnd(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } // If either of the registers x1 or x2 is zero, // the result will always be zero. if isZeroRegister(x1.register) || isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) return nil } // At this point, at least one of x1 or x2 registers is non zero. // Choose the non-zero register as destination. destinationReg := x1.register if isZeroRegister(x1.register) { destinationReg = x2.register } var inst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.ANDW case wazeroir.UnsignedInt64: inst = arm64.AND } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileOr implements compiler.compileOr for the arm64 architecture. func (c *arm64Compiler) compileOr(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } if isZeroRegister(x1.register) { c.pushRuntimeValueLocationOnRegister(x2.register, x2.valueType) return nil } if isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } var inst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.ORRW case wazeroir.UnsignedInt64: inst = arm64.ORR } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileXor implements compiler.compileXor for the arm64 architecture. func (c *arm64Compiler) compileXor(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } // At this point, at least one of x1 or x2 registers is non zero. // Choose the non-zero register as destination. destinationReg := x1.register if isZeroRegister(x1.register) { destinationReg = x2.register } var inst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.EORW case wazeroir.UnsignedInt64: inst = arm64.EOR } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) c.pushRuntimeValueLocationOnRegister(destinationReg, x1.valueType) return nil } // compileShl implements compiler.compileShl for the arm64 architecture. func (c *arm64Compiler) compileShl(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } if isZeroRegister(x1.register) || isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } var inst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.LSLW case wazeroir.UnsignedInt64: inst = arm64.LSL } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileShr implements compiler.compileShr for the arm64 architecture. func (c *arm64Compiler) compileShr(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } if isZeroRegister(x1.register) || isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } var inst asm.Instruction signedInt := wazeroir.SignedInt(o.B1) switch signedInt { case wazeroir.SignedInt32: inst = arm64.ASRW case wazeroir.SignedInt64: inst = arm64.ASR case wazeroir.SignedUint32: inst = arm64.LSRW case wazeroir.SignedUint64: inst = arm64.LSR } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileRotl implements compiler.compileRotl for the arm64 architecture. func (c *arm64Compiler) compileRotl(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } if isZeroRegister(x1.register) || isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } var inst, neginst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.RORW neginst = arm64.NEGW case wazeroir.UnsignedInt64: inst = arm64.ROR neginst = arm64.NEG } // Arm64 doesn't have rotate left instruction. // The shift amount needs to be converted to a negative number, similar to assembly output of bits.RotateLeft. c.assembler.CompileRegisterToRegister(neginst, x2.register, x2.register) c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileRotr implements compiler.compileRotr for the arm64 architecture. func (c *arm64Compiler) compileRotr(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } if isZeroRegister(x1.register) || isZeroRegister(x2.register) { c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } var inst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.RORW case wazeroir.UnsignedInt64: inst = arm64.ROR } c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileAbs implements compiler.compileAbs for the arm64 architecture. func (c *arm64Compiler) compileAbs(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FABSS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FABSD, runtimeValueTypeF64) } } // compileNeg implements compiler.compileNeg for the arm64 architecture. func (c *arm64Compiler) compileNeg(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FNEGS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FNEGD, runtimeValueTypeF64) } } // compileCeil implements compiler.compileCeil for the arm64 architecture. func (c *arm64Compiler) compileCeil(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FRINTPS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FRINTPD, runtimeValueTypeF64) } } // compileFloor implements compiler.compileFloor for the arm64 architecture. func (c *arm64Compiler) compileFloor(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FRINTMS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FRINTMD, runtimeValueTypeF64) } } // compileTrunc implements compiler.compileTrunc for the arm64 architecture. func (c *arm64Compiler) compileTrunc(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FRINTZS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FRINTZD, runtimeValueTypeF64) } } // compileNearest implements compiler.compileNearest for the arm64 architecture. func (c *arm64Compiler) compileNearest(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FRINTNS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FRINTND, runtimeValueTypeF64) } } // compileSqrt implements compiler.compileSqrt for the arm64 architecture. func (c *arm64Compiler) compileSqrt(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleUnop(arm64.FSQRTS, runtimeValueTypeF32) } else { return c.compileSimpleUnop(arm64.FSQRTD, runtimeValueTypeF64) } } // compileMin implements compiler.compileMin for the arm64 architecture. func (c *arm64Compiler) compileMin(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleFloatBinop(arm64.FMINS) } else { return c.compileSimpleFloatBinop(arm64.FMIND) } } // compileMax implements compiler.compileMax for the arm64 architecture. func (c *arm64Compiler) compileMax(o *wazeroir.UnionOperation) error { if wazeroir.Float(o.B1) == wazeroir.Float32 { return c.compileSimpleFloatBinop(arm64.FMAXS) } else { return c.compileSimpleFloatBinop(arm64.FMAXD) } } func (c *arm64Compiler) compileSimpleFloatBinop(inst asm.Instruction) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileCopysign implements compiler.compileCopysign for the arm64 architecture. func (c *arm64Compiler) compileCopysign(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } var ldr asm.Instruction var minValueOffsetInVM int64 if wazeroir.Float(o.B1) == wazeroir.Float32 { ldr = arm64.FLDRS minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset } else { ldr = arm64.FLDRD minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset } c.markRegisterUsed(x1.register, x2.register) freg, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // This is exactly the same code emitted by GCC for "__builtin_copysign": // // mov x0, -9223372036854775808 // fmov d2, x0 // vbit v0.8b, v1.8b, v2.8b // // "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)" c.assembler.CompileMemoryToRegister( ldr, arm64ReservedRegisterForCallEngine, minValueOffsetInVM, freg, ) // VBIT inserts each bit from the first operand into the destination if the corresponding bit of the second operand is 1, // otherwise it leaves the destination bit unchanged. // See https://developer.arm.com/documentation/dui0801/g/Advanced-SIMD-Instructions--32-bit-/VBIT // // "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1". c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VBIT, freg, x2.register, x1.register, arm64.VectorArrangement16B) c.markRegisterUnused(x2.register) c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) return nil } // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture. func (c *arm64Compiler) compileI32WrapFromI64() error { return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI32) } // compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture. func (c *arm64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) error { // Clear the floating point status register (FPSR). c.assembler.CompileRegisterToRegister(arm64.MSR, arm64.RegRZR, arm64.RegFPSR) var vt runtimeValueType var convinst asm.Instruction inputType := wazeroir.Float(o.B1) outputType := wazeroir.SignedInt(o.B2) nonTrapping := o.B3 is32bitFloat := inputType == wazeroir.Float32 if is32bitFloat && outputType == wazeroir.SignedInt32 { convinst = arm64.FCVTZSSW vt = runtimeValueTypeI32 } else if is32bitFloat && outputType == wazeroir.SignedInt64 { convinst = arm64.FCVTZSS vt = runtimeValueTypeI64 } else if !is32bitFloat && outputType == wazeroir.SignedInt32 { convinst = arm64.FCVTZSDW vt = runtimeValueTypeI32 } else if !is32bitFloat && outputType == wazeroir.SignedInt64 { convinst = arm64.FCVTZSD vt = runtimeValueTypeI64 } else if is32bitFloat && outputType == wazeroir.SignedUint32 { convinst = arm64.FCVTZUSW vt = runtimeValueTypeI32 } else if is32bitFloat && outputType == wazeroir.SignedUint64 { convinst = arm64.FCVTZUS vt = runtimeValueTypeI64 } else if !is32bitFloat && outputType == wazeroir.SignedUint32 { convinst = arm64.FCVTZUDW vt = runtimeValueTypeI32 } else if !is32bitFloat && outputType == wazeroir.SignedUint64 { convinst = arm64.FCVTZUD vt = runtimeValueTypeI64 } source, err := c.popValueOnRegister() if err != nil { return err } sourceReg := source.register destinationReg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileRegisterToRegister(convinst, sourceReg, destinationReg) c.pushRuntimeValueLocationOnRegister(destinationReg, vt) if !nonTrapping { // Obtain the floating point status register value into the general purpose register, // so that we can check if the conversion resulted in undefined behavior. c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.RegFPSR, arm64ReservedRegisterForTemporary) // Check if the conversion was undefined by comparing the status with 1. // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) brOK := c.assembler.CompileJump(arm64.BCONDNE) // If so, exit the execution with errors depending on whether or not the source value is NaN. var floatcmp asm.Instruction if is32bitFloat { floatcmp = arm64.FCMPS } else { floatcmp = arm64.FCMPD } c.assembler.CompileTwoRegistersToNone(floatcmp, sourceReg, sourceReg) // VS flag is set if at least one of values for FCMP is NaN. // https://developer.arm.com/documentation/dui0801/g/Condition-Codes/Comparison-of-condition-code-meanings-in-integer-and-floating-point-code // If the source value is not NaN, the operation was overflow. c.compileMaybeExitFromNativeCode(arm64.BCONDVS, nativeCallStatusIntegerOverflow) // Otherwise, the operation was invalid as this is trying to convert NaN to integer. c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) // Otherwise, we branch into the next instruction. c.assembler.SetJumpTargetOnNext(brOK) } return nil } // compileFConvertFromI implements compiler.compileFConvertFromI for the arm64 architecture. func (c *arm64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) error { var convinst asm.Instruction inputType := wazeroir.SignedInt(o.B1) outputType := wazeroir.Float(o.B2) if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 { convinst = arm64.SCVTFWS } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 { convinst = arm64.SCVTFS } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 { convinst = arm64.SCVTFWD } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 { convinst = arm64.SCVTFD } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 { convinst = arm64.UCVTFWS } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 { convinst = arm64.UCVTFS } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 { convinst = arm64.UCVTFWD } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 { convinst = arm64.UCVTFD } var vt runtimeValueType if outputType == wazeroir.Float32 { vt = runtimeValueTypeF32 } else { vt = runtimeValueTypeF64 } return c.compileSimpleConversion(convinst, registerTypeVector, vt) } // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the arm64 architecture. func (c *arm64Compiler) compileF32DemoteFromF64() error { return c.compileSimpleUnop(arm64.FCVTDS, runtimeValueTypeF32) } // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the arm64 architecture. func (c *arm64Compiler) compileF64PromoteFromF32() error { return c.compileSimpleUnop(arm64.FCVTSD, runtimeValueTypeF64) } // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the arm64 architecture. func (c *arm64Compiler) compileI32ReinterpretFromF32() error { if peek := c.locationStack.peek(); peek.onStack() { // If the value is on the stack, this is no-op as there is nothing to do for converting type. peek.valueType = runtimeValueTypeI32 return nil } return c.compileSimpleConversion(arm64.FMOVS, registerTypeGeneralPurpose, runtimeValueTypeI32) } // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the arm64 architecture. func (c *arm64Compiler) compileI64ReinterpretFromF64() error { if peek := c.locationStack.peek(); peek.onStack() { // If the value is on the stack, this is no-op as there is nothing to do for converting type. peek.valueType = runtimeValueTypeI64 return nil } return c.compileSimpleConversion(arm64.FMOVD, registerTypeGeneralPurpose, runtimeValueTypeI64) } // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the arm64 architecture. func (c *arm64Compiler) compileF32ReinterpretFromI32() error { if peek := c.locationStack.peek(); peek.onStack() { // If the value is on the stack, this is no-op as there is nothing to do for converting type. peek.valueType = runtimeValueTypeF32 return nil } return c.compileSimpleConversion(arm64.FMOVS, registerTypeVector, runtimeValueTypeF32) } // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the arm64 architecture. func (c *arm64Compiler) compileF64ReinterpretFromI64() error { if peek := c.locationStack.peek(); peek.onStack() { // If the value is on the stack, this is no-op as there is nothing to do for converting type. peek.valueType = runtimeValueTypeF64 return nil } return c.compileSimpleConversion(arm64.FMOVD, registerTypeVector, runtimeValueTypeF64) } func (c *arm64Compiler) compileSimpleConversion(inst asm.Instruction, destinationRegType registerType, resultRuntimeValueType runtimeValueType) error { source, err := c.popValueOnRegister() if err != nil { return err } destinationReg, err := c.allocateRegister(destinationRegType) if err != nil { return err } c.assembler.CompileRegisterToRegister(inst, source.register, destinationReg) c.pushRuntimeValueLocationOnRegister(destinationReg, resultRuntimeValueType) return nil } // compileExtend implements compiler.compileExtend for the arm64 architecture. func (c *arm64Compiler) compileExtend(o *wazeroir.UnionOperation) error { signed := o.B1 != 0 if signed { return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) } else { return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64) } } // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the arm64 architecture. func (c *arm64Compiler) compileSignExtend32From8() error { return c.compileSimpleUnop(arm64.SXTBW, runtimeValueTypeI32) } // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the arm64 architecture. func (c *arm64Compiler) compileSignExtend32From16() error { return c.compileSimpleUnop(arm64.SXTHW, runtimeValueTypeI32) } // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the arm64 architecture. func (c *arm64Compiler) compileSignExtend64From8() error { return c.compileSimpleUnop(arm64.SXTB, runtimeValueTypeI64) } // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the arm64 architecture. func (c *arm64Compiler) compileSignExtend64From16() error { return c.compileSimpleUnop(arm64.SXTH, runtimeValueTypeI64) } // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the arm64 architecture. func (c *arm64Compiler) compileSignExtend64From32() error { return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) } func (c *arm64Compiler) compileSimpleUnop(inst asm.Instruction, resultRuntimeValueType runtimeValueType) error { v, err := c.popValueOnRegister() if err != nil { return err } reg := v.register c.assembler.CompileRegisterToRegister(inst, reg, reg) c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType) return nil } // compileEq implements compiler.compileEq for the arm64 architecture. func (c *arm64Compiler) compileEq(o *wazeroir.UnionOperation) error { return c.emitEqOrNe(true, wazeroir.UnsignedType(o.B1)) } // compileNe implements compiler.compileNe for the arm64 architecture. func (c *arm64Compiler) compileNe(o *wazeroir.UnionOperation) error { return c.emitEqOrNe(false, wazeroir.UnsignedType(o.B1)) } // emitEqOrNe implements compiler.compileEq and compiler.compileNe for the arm64 architecture. func (c *arm64Compiler) emitEqOrNe(isEq bool, unsignedType wazeroir.UnsignedType) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } var inst asm.Instruction switch unsignedType { case wazeroir.UnsignedTypeI32: inst = arm64.CMPW case wazeroir.UnsignedTypeI64: inst = arm64.CMP case wazeroir.UnsignedTypeF32: inst = arm64.FCMPS case wazeroir.UnsignedTypeF64: inst = arm64.FCMPD } c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) // Push the comparison result as a conditional register value. cond := arm64.CondNE if isEq { cond = arm64.CondEQ } c.locationStack.pushRuntimeValueLocationOnConditionalRegister(cond) return nil } // compileEqz implements compiler.compileEqz for the arm64 architecture. func (c *arm64Compiler) compileEqz(o *wazeroir.UnionOperation) error { x1, err := c.popValueOnRegister() if err != nil { return err } var inst asm.Instruction unsignedInt := wazeroir.UnsignedInt(o.B1) switch unsignedInt { case wazeroir.UnsignedInt32: inst = arm64.CMPW case wazeroir.UnsignedInt64: inst = arm64.CMP } c.assembler.CompileTwoRegistersToNone(inst, arm64.RegRZR, x1.register) // Push the comparison result as a conditional register value. c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) return nil } // compileLt implements compiler.compileLt for the arm64 architecture. func (c *arm64Compiler) compileLt(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } var inst asm.Instruction var conditionalRegister asm.ConditionalRegisterState signedType := wazeroir.SignedType(o.B1) switch signedType { case wazeroir.SignedTypeUint32: inst = arm64.CMPW conditionalRegister = arm64.CondLO case wazeroir.SignedTypeUint64: inst = arm64.CMP conditionalRegister = arm64.CondLO case wazeroir.SignedTypeInt32: inst = arm64.CMPW conditionalRegister = arm64.CondLT case wazeroir.SignedTypeInt64: inst = arm64.CMP conditionalRegister = arm64.CondLT case wazeroir.SignedTypeFloat32: inst = arm64.FCMPS conditionalRegister = arm64.CondMI case wazeroir.SignedTypeFloat64: inst = arm64.FCMPD conditionalRegister = arm64.CondMI } c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) // Push the comparison result as a conditional register value. c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) return nil } // compileGt implements compiler.compileGt for the arm64 architecture. func (c *arm64Compiler) compileGt(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } var inst asm.Instruction var conditionalRegister asm.ConditionalRegisterState signedType := wazeroir.SignedType(o.B1) switch signedType { case wazeroir.SignedTypeUint32: inst = arm64.CMPW conditionalRegister = arm64.CondHI case wazeroir.SignedTypeUint64: inst = arm64.CMP conditionalRegister = arm64.CondHI case wazeroir.SignedTypeInt32: inst = arm64.CMPW conditionalRegister = arm64.CondGT case wazeroir.SignedTypeInt64: inst = arm64.CMP conditionalRegister = arm64.CondGT case wazeroir.SignedTypeFloat32: inst = arm64.FCMPS conditionalRegister = arm64.CondGT case wazeroir.SignedTypeFloat64: inst = arm64.FCMPD conditionalRegister = arm64.CondGT } c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) // Push the comparison result as a conditional register value. c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) return nil } // compileLe implements compiler.compileLe for the arm64 architecture. func (c *arm64Compiler) compileLe(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } var inst asm.Instruction var conditionalRegister asm.ConditionalRegisterState signedType := wazeroir.SignedType(o.B1) switch signedType { case wazeroir.SignedTypeUint32: inst = arm64.CMPW conditionalRegister = arm64.CondLS case wazeroir.SignedTypeUint64: inst = arm64.CMP conditionalRegister = arm64.CondLS case wazeroir.SignedTypeInt32: inst = arm64.CMPW conditionalRegister = arm64.CondLE case wazeroir.SignedTypeInt64: inst = arm64.CMP conditionalRegister = arm64.CondLE case wazeroir.SignedTypeFloat32: inst = arm64.FCMPS conditionalRegister = arm64.CondLS case wazeroir.SignedTypeFloat64: inst = arm64.FCMPD conditionalRegister = arm64.CondLS } c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) // Push the comparison result as a conditional register value. c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) return nil } // compileGe implements compiler.compileGe for the arm64 architecture. func (c *arm64Compiler) compileGe(o *wazeroir.UnionOperation) error { x1, x2, err := c.popTwoValuesOnRegisters() if err != nil { return err } var inst asm.Instruction var conditionalRegister asm.ConditionalRegisterState signedType := wazeroir.SignedType(o.B1) switch signedType { case wazeroir.SignedTypeUint32: inst = arm64.CMPW conditionalRegister = arm64.CondHS case wazeroir.SignedTypeUint64: inst = arm64.CMP conditionalRegister = arm64.CondHS case wazeroir.SignedTypeInt32: inst = arm64.CMPW conditionalRegister = arm64.CondGE case wazeroir.SignedTypeInt64: inst = arm64.CMP conditionalRegister = arm64.CondGE case wazeroir.SignedTypeFloat32: inst = arm64.FCMPS conditionalRegister = arm64.CondGE case wazeroir.SignedTypeFloat64: inst = arm64.FCMPD conditionalRegister = arm64.CondGE } c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) // Push the comparison result as a conditional register value. c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) return nil } // compileLoad implements compiler.compileLoad for the arm64 architecture. func (c *arm64Compiler) compileLoad(o *wazeroir.UnionOperation) error { var ( isFloat bool loadInst asm.Instruction targetSizeInBytes int64 vt runtimeValueType ) unsignedType := wazeroir.UnsignedType(o.B1) offset := uint32(o.U2) switch unsignedType { case wazeroir.UnsignedTypeI32: loadInst = arm64.LDRW targetSizeInBytes = 32 / 8 vt = runtimeValueTypeI32 case wazeroir.UnsignedTypeI64: loadInst = arm64.LDRD targetSizeInBytes = 64 / 8 vt = runtimeValueTypeI64 case wazeroir.UnsignedTypeF32: loadInst = arm64.FLDRS isFloat = true targetSizeInBytes = 32 / 8 vt = runtimeValueTypeF32 case wazeroir.UnsignedTypeF64: loadInst = arm64.FLDRD isFloat = true targetSizeInBytes = 64 / 8 vt = runtimeValueTypeF64 } return c.compileLoadImpl(offset, loadInst, targetSizeInBytes, isFloat, vt) } // compileLoad8 implements compiler.compileLoad8 for the arm64 architecture. func (c *arm64Compiler) compileLoad8(o *wazeroir.UnionOperation) error { var loadInst asm.Instruction var vt runtimeValueType signedInt := wazeroir.SignedInt(o.B1) offset := uint32(o.U2) switch signedInt { case wazeroir.SignedInt32: loadInst = arm64.LDRSBW vt = runtimeValueTypeI32 case wazeroir.SignedInt64: loadInst = arm64.LDRSBD vt = runtimeValueTypeI64 case wazeroir.SignedUint32: loadInst = arm64.LDRB vt = runtimeValueTypeI32 case wazeroir.SignedUint64: loadInst = arm64.LDRB vt = runtimeValueTypeI64 } return c.compileLoadImpl(offset, loadInst, 1, false, vt) } // compileLoad16 implements compiler.compileLoad16 for the arm64 architecture. func (c *arm64Compiler) compileLoad16(o *wazeroir.UnionOperation) error { var loadInst asm.Instruction var vt runtimeValueType signedInt := wazeroir.SignedInt(o.B1) offset := uint32(o.U2) switch signedInt { case wazeroir.SignedInt32: loadInst = arm64.LDRSHW vt = runtimeValueTypeI32 case wazeroir.SignedInt64: loadInst = arm64.LDRSHD vt = runtimeValueTypeI64 case wazeroir.SignedUint32: loadInst = arm64.LDRH vt = runtimeValueTypeI32 case wazeroir.SignedUint64: loadInst = arm64.LDRH vt = runtimeValueTypeI64 } return c.compileLoadImpl(offset, loadInst, 16/8, false, vt) } // compileLoad32 implements compiler.compileLoad32 for the arm64 architecture. func (c *arm64Compiler) compileLoad32(o *wazeroir.UnionOperation) error { var loadInst asm.Instruction signed := o.B1 == 1 offset := uint32(o.U2) if signed { loadInst = arm64.LDRSW } else { loadInst = arm64.LDRW } return c.compileLoadImpl(offset, loadInst, 32/8, false, runtimeValueTypeI64) } // compileLoadImpl implements compileLoadImpl* variants for arm64 architecture. func (c *arm64Compiler) compileLoadImpl(offsetArg uint32, loadInst asm.Instruction, targetSizeInBytes int64, isFloat bool, resultRuntimeValueType runtimeValueType, ) error { offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) if err != nil { return err } resultRegister := offsetReg if isFloat { resultRegister, err = c.allocateRegister(registerTypeVector) if err != nil { return err } } // "resultRegister = [arm64ReservedRegisterForMemory + offsetReg]" // In other words, "resultRegister = memory.Buffer[offset: offset+targetSizeInBytes]" c.assembler.CompileMemoryWithRegisterOffsetToRegister( loadInst, arm64ReservedRegisterForMemory, offsetReg, resultRegister, ) c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) return nil } // compileStore implements compiler.compileStore for the arm64 architecture. func (c *arm64Compiler) compileStore(o *wazeroir.UnionOperation) error { var movInst asm.Instruction var targetSizeInBytes int64 unsignedType := wazeroir.UnsignedType(o.B1) offset := uint32(o.U2) switch unsignedType { case wazeroir.UnsignedTypeI32: movInst = arm64.STRW targetSizeInBytes = 32 / 8 case wazeroir.UnsignedTypeI64: movInst = arm64.STRD targetSizeInBytes = 64 / 8 case wazeroir.UnsignedTypeF32: movInst = arm64.FSTRS targetSizeInBytes = 32 / 8 case wazeroir.UnsignedTypeF64: movInst = arm64.FSTRD targetSizeInBytes = 64 / 8 } return c.compileStoreImpl(offset, movInst, targetSizeInBytes) } // compileStore8 implements compiler.compileStore8 for the arm64 architecture. func (c *arm64Compiler) compileStore8(o *wazeroir.UnionOperation) error { return c.compileStoreImpl(uint32(o.U2), arm64.STRB, 1) } // compileStore16 implements compiler.compileStore16 for the arm64 architecture. func (c *arm64Compiler) compileStore16(o *wazeroir.UnionOperation) error { return c.compileStoreImpl(uint32(o.U2), arm64.STRH, 16/8) } // compileStore32 implements compiler.compileStore32 for the arm64 architecture. func (c *arm64Compiler) compileStore32(o *wazeroir.UnionOperation) error { return c.compileStoreImpl(uint32(o.U2), arm64.STRW, 32/8) } // compileStoreImpl implements compleStore* variants for arm64 architecture. func (c *arm64Compiler) compileStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error { val, err := c.popValueOnRegister() if err != nil { return err } // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. c.markRegisterUsed(val.register) offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) if err != nil { return err } // "[arm64ReservedRegisterForMemory + offsetReg] = val.register" // In other words, "memory.Buffer[offset: offset+targetSizeInBytes] = val.register" c.assembler.CompileRegisterToMemoryWithRegisterOffset( storeInst, val.register, arm64ReservedRegisterForMemory, offsetReg, ) c.markRegisterUnused(val.register) return nil } // compileMemoryAccessOffsetSetup pops the top value from the stack (called "base"), stores "base + offsetArg" // into a register, and returns the stored register. We call the result "offset" because we access the memory // as memory.Buffer[offset: offset+targetSizeInBytes]. // // Note: this also emits the instructions to check the out of bounds memory access. // In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. func (c *arm64Compiler) compileMemoryAccessOffsetSetup(offsetArg uint32, targetSizeInBytes int64) (offsetRegister asm.Register, err error) { base, err := c.popValueOnRegister() if err != nil { return 0, err } offsetRegister = base.register if isZeroRegister(base.register) { offsetRegister, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return } c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetRegister) } if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxUint32 { // "offsetRegister = base + offsetArg + targetSizeInBytes" c.assembler.CompileConstToRegister(arm64.ADD, offsetConst, offsetRegister) } else { // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) return } // "arm64ReservedRegisterForTemporary = len(memory.Buffer)" c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, arm64ReservedRegisterForTemporary) // Check if offsetRegister(= base+offsetArg+targetSizeInBytes) > len(memory.Buffer). c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, offsetRegister) // If offsetRegister(= base+offsetArg+targetSizeInBytes) exceeds the memory length, // we exit the function with nativeCallStatusCodeMemoryOutOfBounds. c.compileMaybeExitFromNativeCode(arm64.BCONDLS, nativeCallStatusCodeMemoryOutOfBounds) // Otherwise, we subtract targetSizeInBytes from offsetRegister. c.assembler.CompileConstToRegister(arm64.SUB, targetSizeInBytes, offsetRegister) return offsetRegister, nil } // compileMemoryGrow implements compileMemoryGrow variants for arm64 architecture. func (c *arm64Compiler) compileMemoryGrow() error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexMemoryGrow); err != nil { return err } // After return, we re-initialize reserved registers just like preamble of functions. c.compileReservedStackBasePointerRegisterInitialization() c.compileReservedMemoryRegisterInitialization() return nil } // compileMemorySize implements compileMemorySize variants for arm64 architecture. func (c *arm64Compiler) compileMemorySize() error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } reg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } // "reg = len(memory.Buffer)" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, reg, ) // memory.size loads the page size of memory, so we have to divide by the page size. // "reg = reg >> wasm.MemoryPageSizeInBits (== reg / wasm.MemoryPageSize) " c.assembler.CompileConstToRegister( arm64.LSR, wasm.MemoryPageSizeInBits, reg, ) c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) return nil } // compileCallGoFunction adds instructions to call a Go function whose address equals the addr parameter. // compilerStatus is set before making call, and it should be either nativeCallStatusCodeCallBuiltInFunction or // nativeCallStatusCodeCallGoHostFunction. func (c *arm64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode, builtinFunction wasm.Index) error { // Release all the registers as our calling convention requires the caller-save. if err := c.compileReleaseAllRegistersToStack(); err != nil { return err } if compilerStatus == nativeCallStatusCodeCallBuiltInFunction { // Set the target function address to ce.functionCallAddress // "tmp = $index" c.assembler.CompileConstToRegister( arm64.MOVD, int64(builtinFunction), arm64ReservedRegisterForTemporary, ) // "[arm64ReservedRegisterForCallEngine + callEngineExitContextFunctionCallAddressOffset] = tmp" // In other words, "ce.functionCallAddress = tmp (== $addr)" c.assembler.CompileRegisterToMemory( arm64.STRW, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset, ) } c.compileExitFromNativeCode(compilerStatus) return nil } // compileConstI32 implements compiler.compileConstI32 for the arm64 architecture. func (c *arm64Compiler) compileConstI32(o *wazeroir.UnionOperation) error { return c.compileIntConstant(true, o.U1) } // compileConstI64 implements compiler.compileConstI64 for the arm64 architecture. func (c *arm64Compiler) compileConstI64(o *wazeroir.UnionOperation) error { return c.compileIntConstant(false, o.U1) } // compileIntConstant adds instructions to load an integer constant. // is32bit is true if the target value is originally 32-bit const, false otherwise. // value holds the (zero-extended for 32-bit case) load target constant. func (c *arm64Compiler) compileIntConstant(is32bit bool, value uint64) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } var inst asm.Instruction var vt runtimeValueType if is32bit { inst = arm64.MOVW vt = runtimeValueTypeI32 } else { inst = arm64.MOVD vt = runtimeValueTypeI64 } if value == 0 { c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, vt) } else { // Take a register to load the value. reg, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileConstToRegister(inst, int64(value), reg) c.pushRuntimeValueLocationOnRegister(reg, vt) } return nil } // compileConstF32 implements compiler.compileConstF32 for the arm64 architecture. func (c *arm64Compiler) compileConstF32(o *wazeroir.UnionOperation) error { return c.compileFloatConstant(true, o.U1 /*uint64(math.Float32bits(o.Value))*/) } // compileConstF64 implements compiler.compileConstF64 for the arm64 architecture. func (c *arm64Compiler) compileConstF64(o *wazeroir.UnionOperation) error { return c.compileFloatConstant(false, o.U1 /*math.Float64bits(o.Value)*/) } // compileFloatConstant adds instructions to load a float constant. // is32bit is true if the target value is originally 32-bit const, false otherwise. // value holds the (zero-extended for 32-bit case) bit representation of load target float constant. func (c *arm64Compiler) compileFloatConstant(is32bit bool, value uint64) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } // Take a register to load the value. reg, err := c.allocateRegister(registerTypeVector) if err != nil { return err } tmpReg := arm64.RegRZR if value != 0 { tmpReg = arm64ReservedRegisterForTemporary var inst asm.Instruction if is32bit { inst = arm64.MOVW } else { inst = arm64.MOVD } c.assembler.CompileConstToRegister(inst, int64(value), tmpReg) } // Use FMOV instruction to move the value on integer register into the float one. var inst asm.Instruction var vt runtimeValueType if is32bit { vt = runtimeValueTypeF32 inst = arm64.FMOVS } else { vt = runtimeValueTypeF64 inst = arm64.FMOVD } c.assembler.CompileRegisterToRegister(inst, tmpReg, reg) c.pushRuntimeValueLocationOnRegister(reg, vt) return nil } // compileMemoryInit implements compiler.compileMemoryInit for the arm64 architecture. func (c *arm64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error { dataIndex := uint32(o.U1) return c.compileInitImpl(false, dataIndex, 0) } // compileInitImpl implements compileTableInit and compileMemoryInit. // // TODO: the compiled code in this function should be reused and compile at once as // the code is independent of any module. func (c *arm64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds if isTable { outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess } copySize, err := c.popValueOnRegister() if err != nil { return err } c.markRegisterUsed(copySize.register) sourceOffset, err := c.popValueOnRegister() if err != nil { return err } if isZeroRegister(sourceOffset.register) { sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) } c.markRegisterUsed(sourceOffset.register) destinationOffset, err := c.popValueOnRegister() if err != nil { return err } if isZeroRegister(destinationOffset.register) { destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) } c.markRegisterUsed(destinationOffset.register) tableInstanceAddressReg := asm.NilRegister if isTable { tableInstanceAddressReg, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(tableInstanceAddressReg) } if !isZeroRegister(copySize.register) { // sourceOffset += size. c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) // destinationOffset += size. c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) } instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } if isTable { c.compileLoadElemInstanceAddress(index, instanceAddr) } else { c.compileLoadDataInstanceAddress(index, instanceAddr) } // Check data instance bounds. c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. arm64ReservedRegisterForTemporary) c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) // If not, raise out of bounds memory access error. c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) // Otherwise, ready to copy the value from destination to source. // Check destination bounds. if isTable { // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // tableInstanceAddressReg = arm64ReservedRegisterForTemporary + tableIndex*8 // = &tables[0] + sizeOf(*tableInstance)*8 // = &tables[tableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(tableIndex)*8, tableInstanceAddressReg) // arm64ReservedRegisterForTemporary = [tableInstanceAddressReg+tableInstanceTableLenOffset] = len(tables[tableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg, tableInstanceTableLenOffset, arm64ReservedRegisterForTemporary) } else { c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, arm64ReservedRegisterForTemporary) } c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) // If not, raise out of bounds memory access error. c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) // Otherwise, ready to copy the value from source to destination. if !isZeroRegister(copySize.register) { // If the size equals zero, we can skip the entire instructions beflow. c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) var ldr, str asm.Instruction var movSize int64 if isTable { ldr, str = arm64.LDRD, arm64.STRD movSize = 8 // arm64ReservedRegisterForTemporary = &Table[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg, tableInstanceTableOffset, arm64ReservedRegisterForTemporary) // destinationOffset = (destinationOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, destinationOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, destinationOffset.register) // arm64ReservedRegisterForTemporary = &ElementInstance.References[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) // sourceOffset = (sourceOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, sourceOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, sourceOffset.register) // copySize = copySize << pointerSizeLog2 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) } else { ldr, str = arm64.LDRB, arm64.STRB movSize = 1 // destinationOffset += memory buffer's absolute address. c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) // sourceOffset += data buffer's absolute address. c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, sourceOffset.register) } // Negate the counter. c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, sourceOffset.register, copySize.register, arm64ReservedRegisterForTemporary) // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, arm64ReservedRegisterForTemporary, destinationOffset.register, copySize.register, ) // Decrement the size counter and if the value is still negative, continue the loop. c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) c.assembler.SetJumpTargetOnNext(skipCopyJump) } c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register, instanceAddr, tableInstanceAddressReg) return nil } // compileDataDrop implements compiler.compileDataDrop for the arm64 architecture. func (c *arm64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } tmp, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } dataIndex := uint32(o.U1) c.compileLoadDataInstanceAddress(dataIndex, tmp) // Clears the content of DataInstance[o.DataIndex] (== []byte type). c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) return nil } func (c *arm64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { // dst = dataIndex * dataInstanceStructSize c.assembler.CompileConstToRegister(arm64.MOVD, int64(dataIndex)*dataInstanceStructSize, dst) // arm64ReservedRegisterForTemporary = &moduleInstance.DataInstances[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, arm64ReservedRegisterForTemporary, ) // dst = arm64ReservedRegisterForTemporary + dst // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize // = &moduleInstance.DataInstances[dataIndex] c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) } // compileMemoryCopy implements compiler.compileMemoryCopy for the arm64 architecture. func (c *arm64Compiler) compileMemoryCopy() error { return c.compileCopyImpl(false, 0, 0) } // compileCopyImpl implements compileTableCopy and compileMemoryCopy. // // TODO: the compiled code in this function should be reused and compile at once as // the code is independent of any module. func (c *arm64Compiler) compileCopyImpl(isTable bool, srcTableIndex, dstTableIndex uint32) error { outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds if isTable { outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess } copySize, err := c.popValueOnRegister() if err != nil { return err } c.markRegisterUsed(copySize.register) sourceOffset, err := c.popValueOnRegister() if err != nil { return err } if isZeroRegister(sourceOffset.register) { sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) } c.markRegisterUsed(sourceOffset.register) destinationOffset, err := c.popValueOnRegister() if err != nil { return err } if isZeroRegister(destinationOffset.register) { destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) } c.markRegisterUsed(destinationOffset.register) if !isZeroRegister(copySize.register) { // sourceOffset += size. c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) // destinationOffset += size. c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) } if isTable { // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 // = &tables[0] + sizeOf(*tableInstance)*8 // = &tables[srcTableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, arm64ReservedRegisterForTemporary) } else { // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, arm64ReservedRegisterForTemporary) } // Check memory len >= sourceOffset. c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) // If not, raise out of bounds memory access error. c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) // Otherwise, check memory len >= destinationOffset. if isTable { // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + dstTableIndex*8 // = &tables[0] + sizeOf(*tableInstance)*8 // = &tables[dstTableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[dstTableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, arm64ReservedRegisterForTemporary) } c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) // If not, raise out of bounds memory access error. c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) // Otherwise, ready to copy the value from source to destination. var ldr, str asm.Instruction var movSize int64 if isTable { ldr, str = arm64.LDRD, arm64.STRD movSize = 8 } else { ldr, str = arm64.LDRB, arm64.STRB movSize = 1 } // If the size equals zero, we can skip the entire instructions beflow. if !isZeroRegister(copySize.register) { c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) // If source offet < destination offset: for (i = size-1; i >= 0; i--) dst[i] = src[i]; c.assembler.CompileTwoRegistersToNone(arm64.CMP, sourceOffset.register, destinationOffset.register) destLowerThanSourceJump := c.assembler.CompileJump(arm64.BCONDLS) var endJump asm.Node { // sourceOffset -= size. c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, sourceOffset.register) // destinationOffset -= size. c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, destinationOffset.register) if isTable { // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, arm64ReservedRegisterForTemporary) // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, destinationOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, destinationOffset.register) // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, arm64ReservedRegisterForTemporary) // sourceOffset = (sourceOffset<< 3) + &Table[0] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, sourceOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, sourceOffset.register) // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) } else { // sourceOffset += memory buffer's absolute address. c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) // destinationOffset += memory buffer's absolute address. c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) } beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) // size -= 1 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, copySize.register) // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, sourceOffset.register, copySize.register, arm64ReservedRegisterForTemporary) // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, arm64ReservedRegisterForTemporary, destinationOffset.register, copySize.register, ) // If the value on the copySize.register is not equal zero, continue the loop. c.assembler.CompileJump(arm64.BCONDNE).AssignJumpTarget(beginCopyLoop) // Otherwise, exit the loop. endJump = c.assembler.CompileJump(arm64.B) } // Else (destination offet < source offset): for (i = 0; i < size; i++) dst[counter-1-i] = src[counter-1-i]; c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) { if isTable { // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, arm64ReservedRegisterForTemporary) // destinationOffset = (destinationOffset<< interfaceDataySizeLog2) + &Table[dstTableIndex].Table[0] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, destinationOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, destinationOffset.register) // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, arm64ReservedRegisterForTemporary) // sourceOffset = (sourceOffset<< 3) + &Table[0] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, sourceOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, sourceOffset.register) // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) } else { // sourceOffset += memory buffer's absolute address. c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) // destinationOffset += memory buffer's absolute address. c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) } // Negate the counter. c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, sourceOffset.register, copySize.register, arm64ReservedRegisterForTemporary) // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, arm64ReservedRegisterForTemporary, destinationOffset.register, copySize.register, ) // size += 1 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) } c.assembler.SetJumpTargetOnNext(skipCopyJump) c.assembler.SetJumpTargetOnNext(endJump) } // Mark all of the operand registers. c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register) return nil } // compileMemoryFill implements compiler.compileMemoryCopy for the arm64 architecture. func (c *arm64Compiler) compileMemoryFill() error { return c.compileFillImpl(false, 0) } // compileFillImpl implements TableFill and MemoryFill. // // TODO: the compiled code in this function should be reused and compile at once as // the code is independent of any module. func (c *arm64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds if isTable { outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess } fillSize, err := c.popValueOnRegister() if err != nil { return err } c.markRegisterUsed(fillSize.register) value, err := c.popValueOnRegister() if err != nil { return err } c.markRegisterUsed(value.register) destinationOffset, err := c.popValueOnRegister() if err != nil { return err } if isZeroRegister(destinationOffset.register) { destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) } c.markRegisterUsed(destinationOffset.register) // destinationOffset += size. c.assembler.CompileRegisterToRegister(arm64.ADD, fillSize.register, destinationOffset.register) if isTable { // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 // = &tables[0] + sizeOf(*tableInstance)*8 // = &tables[srcTableIndex] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(tableIndex)*8, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, arm64ReservedRegisterForTemporary) } else { // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, arm64ReservedRegisterForTemporary) } // Check len >= destinationOffset. c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) // If not, raise the runtime error. c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) // Otherwise, ready to copy the value from destination to source. // If the size equals zero, we can skip the entire instructions below. c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, fillSize.register) skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) // destinationOffset -= size. c.assembler.CompileRegisterToRegister(arm64.SUB, fillSize.register, destinationOffset.register) var str asm.Instruction var movSize int64 if isTable { str = arm64.STRD movSize = 8 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, int64(tableIndex)*8, arm64ReservedRegisterForTemporary) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, arm64ReservedRegisterForTemporary) // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, destinationOffset.register, pointerSizeLog2, arm64ReservedRegisterForTemporary, destinationOffset.register) // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, fillSize.register) } else { str = arm64.STRB movSize = 1 // destinationOffset += memory buffer's absolute address. c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) } // Naively implement the copy with "for loop" by copying byte one by one. beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) // size -= 1 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, fillSize.register) // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, value.register, destinationOffset.register, fillSize.register, ) // If the value on the copySizeRgister.register is not equal zero, continue the loop. continueJump := c.assembler.CompileJump(arm64.BCONDNE) continueJump.AssignJumpTarget(beginCopyLoop) // Mark all of the operand registers. c.markRegisterUnused(fillSize.register, value.register, destinationOffset.register) c.assembler.SetJumpTargetOnNext(skipCopyJump) return nil } // compileTableInit implements compiler.compileTableInit for the arm64 architecture. func (c *arm64Compiler) compileTableInit(o *wazeroir.UnionOperation) error { elemIndex := uint32(o.U1) tableIndex := uint32(o.U2) return c.compileInitImpl(true, elemIndex, tableIndex) } // compileTableCopy implements compiler.compileTableCopy for the arm64 architecture. func (c *arm64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error { return c.compileCopyImpl(true, uint32(o.U1), uint32(o.U2)) } // compileElemDrop implements compiler.compileElemDrop for the arm64 architecture. func (c *arm64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } tmp, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } elemIndex := uint32(o.U1) c.compileLoadElemInstanceAddress(elemIndex, tmp) // Clears the content of ElementInstances[o.ElemIndex] (== []interface{} type). c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) return nil } func (c *arm64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { // dst = dataIndex * elementInstanceStructSize c.assembler.CompileConstToRegister(arm64.MOVD, int64(elemIndex)*elementInstanceStructSize, dst) // arm64ReservedRegisterForTemporary = &moduleInstance.ElementInstances[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, arm64ReservedRegisterForTemporary, ) // dst = arm64ReservedRegisterForTemporary + dst // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize // = &moduleInstance.ElementInstances[elemIndex] c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) } // compileRefFunc implements compiler.compileRefFunc for the arm64 architecture. func (c *arm64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } ref, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset] // = &moduleEngine.functions[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, ref) // ref = ref + int64(o.FunctionIndex)*sizeOf(function) // = &moduleEngine.functions[index] functionIndex := int64(o.U1) c.assembler.CompileConstToRegister(arm64.ADD, functionIndex*functionSize, ref, ) c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) return nil } // compileTableGet implements compiler.compileTableGet for the arm64 architecture. func (c *arm64Compiler) compileTableGet(o *wazeroir.UnionOperation) error { ref, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(ref) offset, err := c.popValueOnRegister() if err != nil { return err } // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] // = [&tables[TableIndex]] = tables[TableIndex]. tableIndex := int64(o.U1) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableIndex*8, arm64ReservedRegisterForTemporary) // Out of bounds check. // ref = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, ref, ) // "cmp ref, offset" c.assembler.CompileTwoRegistersToNone(arm64.CMP, ref, offset.register) // If it exceeds len(table), we exit the execution. c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, ref, ) // ref = (offset << pointerSizeLog2) + ref // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset // = &tables[TableIndex].References[offset] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, ref, ref) // ref = [&tables[TableIndex]] = load the Reference's pointer as uint64. c.assembler.CompileMemoryToRegister(arm64.LDRD, ref, 0, ref) c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. return nil } // compileTableSet implements compiler.compileTableSet for the arm64 architecture. func (c *arm64Compiler) compileTableSet(o *wazeroir.UnionOperation) error { ref := c.locationStack.pop() if err := c.compileEnsureOnRegister(ref); err != nil { return err } offset := c.locationStack.pop() if err := c.compileEnsureOnRegister(offset); err != nil { return err } tmp, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + TableIndex*8 // = &tables[0] + TableIndex*sizeOf(*tableInstance) // = &tables[TableIndex] tableIndex := int64(o.U1) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableIndex*8, arm64ReservedRegisterForTemporary) // Out of bounds check. // tmp = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, tmp, ) // "cmp tmp, offset" c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp, offset.register) // If it exceeds len(table), we exit the execution. c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableOffset, tmp, ) // tmp = (offset << pointerSizeLog2) + tmp // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset // = &tables[TableIndex].References[offset] c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, tmp, tmp) // Set the reference's raw pointer. c.assembler.CompileRegisterToMemory(arm64.STRD, ref.register, tmp, 0) c.markRegisterUnused(offset.register, ref.register, tmp) return nil } // compileTableGrow implements compiler.compileTableGrow for the arm64 architecture. func (c *arm64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } // Pushes the table index. tableIndex := o.U1 if err := c.compileIntConstant(true, tableIndex); err != nil { return err } // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. // Therefore, call out to the built function for this purpose. if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexTableGrow); err != nil { return err } // TableGrow consumes three values (table index, number of items, initial value). for i := 0; i < 3; i++ { c.locationStack.pop() } // Then, the previous length was pushed as the result. v := c.locationStack.pushRuntimeValueLocationOnStack() v.valueType = runtimeValueTypeI32 // After return, we re-initialize reserved registers just like preamble of functions. c.compileReservedStackBasePointerRegisterInitialization() c.compileReservedMemoryRegisterInitialization() return nil } // compileTableSize implements compiler.compileTableSize for the arm64 architecture. func (c *arm64Compiler) compileTableSize(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } result, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.markRegisterUsed(result) // arm64ReservedRegisterForTemporary = &tables[0] c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] // = [&tables[TableIndex]] = tables[TableIndex]. tableIndex := int64(o.U1) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableIndex*8, arm64ReservedRegisterForTemporary) // result = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, result, ) c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) return nil } // compileTableFill implements compiler.compileTableFill for the arm64 architecture. func (c *arm64Compiler) compileTableFill(o *wazeroir.UnionOperation) error { tableIndex := uint32(o.U1) return c.compileFillImpl(true, tableIndex) } // popTwoValuesOnRegisters pops two values from the location stacks, ensures // these two values are located on registers, and mark them unused. // // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, // but the name seems awkward. func (c *arm64Compiler) popTwoValuesOnRegisters() (x1, x2 *runtimeValueLocation, err error) { x2 = c.locationStack.pop() if err = c.compileEnsureOnRegister(x2); err != nil { return } x1 = c.locationStack.pop() if err = c.compileEnsureOnRegister(x1); err != nil { return } c.markRegisterUnused(x2.register) c.markRegisterUnused(x1.register) return } // popValueOnRegister pops one value from the location stack, ensures // that it is located on a register, and mark it unused. // // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, // but the name seems awkward. func (c *arm64Compiler) popValueOnRegister() (v *runtimeValueLocation, err error) { v = c.locationStack.pop() if err = c.compileEnsureOnRegister(v); err != nil { return } c.markRegisterUnused(v.register) return } // compileEnsureOnRegister emits instructions to ensure that a value is located on a register. func (c *arm64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { if loc.onStack() { reg, err := c.allocateRegister(loc.getRegisterType()) if err != nil { return err } // Record that the value holds the register and the register is marked used. loc.setRegister(reg) c.markRegisterUsed(reg) c.compileLoadValueOnStackToRegister(loc) } else if loc.onConditionalRegister() { err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) } return } // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack // if the value is located on a conditional register. // // This is usually called at the beginning of methods on compiler interface where we possibly // compile instructions without saving the conditional register value. // compile* functions without calling this function is saving the conditional // value to the stack or register by invoking ensureOnGeneralPurposeRegister for the top. func (c *arm64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { if c.locationStack.sp > 0 { if loc := c.locationStack.peek(); loc.onConditionalRegister() { err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) } } return } // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value // to a general purpose register. func (c *arm64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { reg, err := c.allocateRegister(loc.getRegisterType()) if err != nil { return err } c.markRegisterUsed(reg) c.assembler.CompileConditionalRegisterSet(loc.conditionalRegister, reg) // Record that now the value is located on a general purpose register. loc.setRegister(reg) return nil } // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for arm64. func (c *arm64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { switch loc.valueType { case runtimeValueTypeI32: c.assembler.CompileMemoryToRegister(arm64.LDRW, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register) case runtimeValueTypeI64: c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register) case runtimeValueTypeF32: c.assembler.CompileMemoryToRegister(arm64.FLDRS, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register) case runtimeValueTypeF64: c.assembler.CompileMemoryToRegister(arm64.FLDRD, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register) case runtimeValueTypeV128Lo: c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register, arm64.VectorArrangementQ) // Higher 64-bits are loaded as well ^^. hi := &c.locationStack.stack[loc.stackPointer+1] hi.setRegister(loc.register) case runtimeValueTypeV128Hi: panic("BUG: V128Hi must be be loaded to a register along with V128Lo") } } // allocateRegister implements compiler.allocateRegister for arm64. func (c *arm64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { var ok bool // Try to get the unused register. reg, ok = c.locationStack.takeFreeRegister(t) if ok { return } // If not found, we have to steal the register. stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) if !ok { err = fmt.Errorf("cannot steal register") return } // Release the steal target register value onto stack location. reg = stealTarget.register c.compileReleaseRegisterToStack(stealTarget) return } // compileReleaseAllRegistersToStack adds instructions to store all the values located on // either general purpose or conditional registers onto the memory stack. // See releaseRegisterToStack. func (c *arm64Compiler) compileReleaseAllRegistersToStack() (err error) { for i := uint64(0); i < c.locationStack.sp; i++ { if loc := &c.locationStack.stack[i]; loc.onRegister() { c.compileReleaseRegisterToStack(loc) } else if loc.onConditionalRegister() { if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { return } c.compileReleaseRegisterToStack(loc) } } return } // releaseRegisterToStack adds an instruction to write the value on a register back to memory stack region. func (c *arm64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { switch loc.valueType { case runtimeValueTypeI32: c.assembler.CompileRegisterToMemory(arm64.STRW, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) case runtimeValueTypeI64: c.assembler.CompileRegisterToMemory(arm64.STRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) case runtimeValueTypeF32: c.assembler.CompileRegisterToMemory(arm64.FSTRS, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) case runtimeValueTypeF64: c.assembler.CompileRegisterToMemory(arm64.FSTRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) case runtimeValueTypeV128Lo: c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, arm64.VectorArrangementQ) // Higher 64-bits are released as well ^^. hi := &c.locationStack.stack[loc.stackPointer+1] c.locationStack.releaseRegister(hi) case runtimeValueTypeV128Hi: panic("BUG: V128Hi must be released to the stack along with V128Lo") default: panic("BUG") } // Mark the register is free. c.locationStack.releaseRegister(loc) } // compileReservedStackBasePointerRegisterInitialization adds instructions to initialize arm64ReservedRegisterForStackBasePointerAddress // so that it points to the absolute address of the stack base for this function. func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() { // First, load the address of the first element in the value stack into arm64ReservedRegisterForStackBasePointerAddress temporarily. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, arm64ReservedRegisterForStackBasePointerAddress) // next we move the base pointer (ce.stackBasePointer) to arm64ReservedRegisterForTemporary. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, arm64ReservedRegisterForTemporary) // Finally, we calculate "callEngineStackContextStackBasePointerInBytesOffset + arm64ReservedRegisterForTemporary" c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForStackBasePointerAddress) } func (c *arm64Compiler) compileReservedMemoryRegisterInitialization() { if c.ir.HasMemory || c.ir.UsesMemory { // "arm64ReservedRegisterForMemory = ce.MemoryElement0Address" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, arm64ReservedRegisterForMemory, ) } } // compileModuleContextInitialization adds instructions to initialize ce.moduleContext's fields based on // ce.moduleContext.ModuleInstanceAddress. // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. func (c *arm64Compiler) compileModuleContextInitialization() error { tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) if !found { panic("BUG: all the registers should be free at this point") } c.markRegisterUsed(tmpX) tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) if !found { panic("BUG: all the registers should be free at this point") } c.markRegisterUsed(tmpY) // "tmpX = ce.ModuleInstanceAddress" c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, tmpX) // If the module instance address stays the same, we could skip the entire code below. c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64CallingConventionModuleInstanceAddressRegister, tmpX) brIfModuleUnchanged := c.assembler.CompileJump(arm64.BCONDEQ) // Otherwise, update the moduleEngine.moduleContext.ModuleInstanceAddress. c.assembler.CompileRegisterToMemory(arm64.STRD, arm64CallingConventionModuleInstanceAddressRegister, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, ) // Also, we have to update the following fields: // * callEngine.moduleContext.globalElement0Address // * callEngine.moduleContext.memoryElement0Address // * callEngine.moduleContext.memorySliceLen // * callEngine.moduleContext.memoryInstance // * callEngine.moduleContext.tableElement0Address // * callEngine.moduleContext.tableSliceLen // * callEngine.moduleContext.functionsElement0Address // * callEngine.moduleContext.typeIDsElement0Address // * callEngine.moduleContext.dataInstancesElement0Address // * callEngine.moduleContext.elementInstancesElement0Address // Update globalElement0Address. // // Note: if there's global.get or set instruction in the function, the existence of the globals // is ensured by function validation at module instantiation phase, and that's why it is ok to // skip the initialization if the module's globals slice is empty. if len(c.ir.Globals) > 0 { // "tmpX = &moduleInstance.Globals[0]" c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, tmpX, ) // "ce.GlobalElement0Address = tmpX (== &moduleInstance.Globals[0])" c.assembler.CompileRegisterToMemory( arm64.STRD, tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, ) } // Update memoryElement0Address and memorySliceLen. // // Note: if there's memory instruction in the function, memory instance must be non-nil. // That is ensured by function validation at module instantiation phase, and that's // why it is ok to skip the initialization if the module's memory instance is nil. if c.ir.HasMemory { // "tmpX = moduleInstance.Memory" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, tmpX, ) // First, set ce.memoryInstance c.assembler.CompileRegisterToMemory( arm64.STRD, tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, ) // Next, we write the memory length into ce.MemorySliceLen. // // "tmpY = [tmpX + memoryInstanceBufferLenOffset] (== len(memory.Buffer))" c.assembler.CompileMemoryToRegister( arm64.LDRD, tmpX, memoryInstanceBufferLenOffset, tmpY, ) // "ce.MemorySliceLen = tmpY". c.assembler.CompileRegisterToMemory( arm64.STRD, tmpY, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, ) // Finally, we write ce.memoryElement0Address. // // "tmpY = *tmpX (== &memory.Buffer[0])" c.assembler.CompileMemoryToRegister( arm64.LDRD, tmpX, memoryInstanceBufferOffset, tmpY, ) // "ce.memoryElement0Address = tmpY". c.assembler.CompileRegisterToMemory( arm64.STRD, tmpY, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, ) } // Update tableElement0Address, tableSliceLen and typeIDsElement0Address. // // Note: if there's table instruction in the function, the existence of the table // is ensured by function validation at module instantiation phase, and that's // why it is ok to skip the initialization if the module's table doesn't exist. if c.ir.HasTable { // "tmpX = &tables[0] (type of **wasm.Table)" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTablesOffset, tmpX, ) // Update ce.tableElement0Address. // "ce.tableElement0Address = tmpX". c.assembler.CompileRegisterToMemory( arm64.STRD, tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, ) // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpX) c.assembler.CompileRegisterToMemory(arm64.STRD, tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) } // Update callEngine.moduleContext.functionsElement0Address { // "tmpX = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" // // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} // where tab points to the interface table, and the latter points to the actual // implementation of interface. This case, we extract "data" pointer as *moduleEngine. // See the following references for detail: // * https://research.swtch.com/interfaces // * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210 c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, tmpX, ) // "tmpY = [tmpX + moduleEngineFunctionsOffset] (== &moduleEngine.functions[0])" c.assembler.CompileMemoryToRegister( arm64.LDRD, tmpX, moduleEngineFunctionsOffset, tmpY, ) // "callEngine.moduleContext.functionsElement0Address = tmpY". c.assembler.CompileRegisterToMemory( arm64.STRD, tmpY, arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, ) } // Update dataInstancesElement0Address. if c.ir.HasDataInstances { // "tmpX = &moduleInstance.DataInstances[0]" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, tmpX, ) // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". c.assembler.CompileRegisterToMemory( arm64.STRD, tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, ) } // Update callEngine.moduleContext.elementInstancesElement0Address if c.ir.HasElementInstances { // "tmpX = &moduleInstance.DataInstances[0]" c.assembler.CompileMemoryToRegister( arm64.LDRD, arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, tmpX, ) // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". c.assembler.CompileRegisterToMemory( arm64.STRD, tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, ) } c.assembler.SetJumpTargetOnNext(brIfModuleUnchanged) c.markRegisterUnused(tmpX, tmpY) return nil }