package compiler import ( "github.com/tetratelabs/wazero/internal/asm" "github.com/tetratelabs/wazero/internal/asm/arm64" "github.com/tetratelabs/wazero/internal/wazeroir" ) // compileV128Const implements compiler.compileV128Const for arm64. func (c *arm64Compiler) compileV128Const(o *wazeroir.UnionOperation) error { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } lo, hi := o.U1, o.U2 result, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // Moves the lower 64-bits as a scalar float. intReg := arm64ReservedRegisterForTemporary if lo == 0 { intReg = arm64.RegRZR } else { c.assembler.CompileConstToRegister(arm64.MOVD, int64(lo), arm64ReservedRegisterForTemporary) } c.assembler.CompileRegisterToRegister(arm64.FMOVD, intReg, result) // Then, insert the higher bits with INS(vector,general). intReg = arm64ReservedRegisterForTemporary if hi == 0 { intReg = arm64.RegRZR } else { c.assembler.CompileConstToRegister(arm64.MOVD, int64(hi), arm64ReservedRegisterForTemporary) } // "ins Vn.D[1], intReg" c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, intReg, result, arm64.VectorArrangementD, 1) c.pushVectorRuntimeValueLocationOnRegister(result) return nil } // compileV128Add implements compiler.compileV128Add for arm64. func (c *arm64Compiler) compileV128Add(o *wazeroir.UnionOperation) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } x1r, x2r := x1.register, x2.register var arr arm64.VectorArrangement var inst asm.Instruction shape := o.B1 switch shape { case wazeroir.ShapeI8x16: inst = arm64.VADD arr = arm64.VectorArrangement16B case wazeroir.ShapeI16x8: inst = arm64.VADD arr = arm64.VectorArrangement8H case wazeroir.ShapeI32x4: inst = arm64.VADD arr = arm64.VectorArrangement4S case wazeroir.ShapeI64x2: inst = arm64.VADD arr = arm64.VectorArrangement2D case wazeroir.ShapeF32x4: inst = arm64.VFADDS arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: inst = arm64.VFADDD arr = arm64.VectorArrangement2D } c.assembler.CompileVectorRegisterToVectorRegister(inst, x1r, x2r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) c.pushVectorRuntimeValueLocationOnRegister(x2r) c.markRegisterUnused(x1r) return nil } // compileV128Sub implements compiler.compileV128Sub for arm64. func (c *arm64Compiler) compileV128Sub(o *wazeroir.UnionOperation) (err error) { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } x1r, x2r := x1.register, x2.register var arr arm64.VectorArrangement var inst asm.Instruction shape := o.B1 switch shape { case wazeroir.ShapeI8x16: inst = arm64.VSUB arr = arm64.VectorArrangement16B case wazeroir.ShapeI16x8: inst = arm64.VSUB arr = arm64.VectorArrangement8H case wazeroir.ShapeI32x4: inst = arm64.VSUB arr = arm64.VectorArrangement4S case wazeroir.ShapeI64x2: inst = arm64.VSUB arr = arm64.VectorArrangement2D case wazeroir.ShapeF32x4: inst = arm64.VFSUBS arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: inst = arm64.VFSUBD arr = arm64.VectorArrangement2D } c.assembler.CompileVectorRegisterToVectorRegister(inst, x2r, x1r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) c.pushVectorRuntimeValueLocationOnRegister(x1r) c.markRegisterUnused(x2r) return } // compileV128Load implements compiler.compileV128Load for arm64. func (c *arm64Compiler) compileV128Load(o *wazeroir.UnionOperation) (err error) { if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { return err } result, err := c.allocateRegister(registerTypeVector) if err != nil { return err } offset := uint32(o.U2) loadType := wazeroir.V128LoadType(o.B1) switch loadType { case wazeroir.V128LoadType128: offset, err := c.compileMemoryAccessOffsetSetup(offset, 16) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementQ, ) case wazeroir.V128LoadType8x8s: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128LoadType8x8u: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128LoadType16x4s: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128LoadType16x4u: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128LoadType32x2s: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128LoadType32x2u: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128LoadType8Splat: offset, err := c.compileMemoryAccessOffsetSetup(offset, 1) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement16B) case wazeroir.V128LoadType16Splat: offset, err := c.compileMemoryAccessOffsetSetup(offset, 2) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement8H) case wazeroir.V128LoadType32Splat: offset, err := c.compileMemoryAccessOffsetSetup(offset, 4) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement4S) case wazeroir.V128LoadType64Splat: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement2D) case wazeroir.V128LoadType32zero: offset, err := c.compileMemoryAccessOffsetSetup(offset, 4) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementS, ) case wazeroir.V128LoadType64zero: offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) if err != nil { return err } c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, ) } c.pushVectorRuntimeValueLocationOnRegister(result) return } // compileV128LoadLane implements compiler.compileV128LoadLane for arm64. func (c *arm64Compiler) compileV128LoadLane(o *wazeroir.UnionOperation) (err error) { targetVector := c.locationStack.popV128() if err = c.compileEnsureOnRegister(targetVector); err != nil { return } laneSize, laneIndex := o.B1, o.B2 offset := uint32(o.U2) targetSizeInBytes := int64(laneSize / 8) source, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes) if err != nil { return err } var loadInst asm.Instruction var arr arm64.VectorArrangement switch laneSize { case 8: arr = arm64.VectorArrangementB loadInst = arm64.LDRB case 16: arr = arm64.VectorArrangementH loadInst = arm64.LDRH case 32: loadInst = arm64.LDRW arr = arm64.VectorArrangementS case 64: loadInst = arm64.LDRD arr = arm64.VectorArrangementD } c.assembler.CompileMemoryWithRegisterOffsetToRegister(loadInst, arm64ReservedRegisterForMemory, source, source) c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, source, targetVector.register, arr, arm64.VectorIndex(laneIndex)) c.pushVectorRuntimeValueLocationOnRegister(targetVector.register) c.locationStack.markRegisterUnused(source) return } // compileV128Store implements compiler.compileV128Store for arm64. func (c *arm64Compiler) compileV128Store(o *wazeroir.UnionOperation) (err error) { v := c.locationStack.popV128() if err = c.compileEnsureOnRegister(v); err != nil { return } const targetSizeInBytes = 16 offset := uint32(o.U2) offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes) if err != nil { return err } c.assembler.CompileVectorRegisterToMemoryWithRegisterOffset(arm64.VMOV, v.register, arm64ReservedRegisterForMemory, offsetReg, arm64.VectorArrangementQ) c.markRegisterUnused(v.register) return } // compileV128StoreLane implements compiler.compileV128StoreLane for arm64. func (c *arm64Compiler) compileV128StoreLane(o *wazeroir.UnionOperation) (err error) { var arr arm64.VectorArrangement var storeInst asm.Instruction laneSize := o.B1 laneIndex := o.B2 offset := uint32(o.U2) switch laneSize { case 8: storeInst = arm64.STRB arr = arm64.VectorArrangementB case 16: storeInst = arm64.STRH arr = arm64.VectorArrangementH case 32: storeInst = arm64.STRW arr = arm64.VectorArrangementS case 64: storeInst = arm64.STRD arr = arm64.VectorArrangementD } v := c.locationStack.popV128() if err = c.compileEnsureOnRegister(v); err != nil { return } targetSizeInBytes := int64(laneSize / 8) offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes) if err != nil { return err } c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, arm64ReservedRegisterForTemporary, arr, arm64.VectorIndex(laneIndex)) c.assembler.CompileRegisterToMemoryWithRegisterOffset(storeInst, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForMemory, offsetReg) c.locationStack.markRegisterUnused(v.register) return } // compileV128ExtractLane implements compiler.compileV128ExtractLane for arm64. func (c *arm64Compiler) compileV128ExtractLane(o *wazeroir.UnionOperation) (err error) { v := c.locationStack.popV128() if err = c.compileEnsureOnRegister(v); err != nil { return } shape := o.B1 laneIndex := o.B2 signed := o.B3 switch shape { case wazeroir.ShapeI8x16: result, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } var inst asm.Instruction if signed { inst = arm64.SMOV32 } else { inst = arm64.UMOV } c.assembler.CompileVectorRegisterToRegister(inst, v.register, result, arm64.VectorArrangementB, arm64.VectorIndex(laneIndex)) c.locationStack.markRegisterUnused(v.register) c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) case wazeroir.ShapeI16x8: result, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } var inst asm.Instruction if signed { inst = arm64.SMOV32 } else { inst = arm64.UMOV } c.assembler.CompileVectorRegisterToRegister(inst, v.register, result, arm64.VectorArrangementH, arm64.VectorIndex(laneIndex)) c.locationStack.markRegisterUnused(v.register) c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) case wazeroir.ShapeI32x4: result, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result, arm64.VectorArrangementS, arm64.VectorIndex(laneIndex)) c.locationStack.markRegisterUnused(v.register) c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) case wazeroir.ShapeI64x2: result, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result, arm64.VectorArrangementD, arm64.VectorIndex(laneIndex)) c.locationStack.markRegisterUnused(v.register) c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) case wazeroir.ShapeF32x4: c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register, arm64.VectorArrangementS, arm64.VectorIndex(laneIndex), 0) c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF32) case wazeroir.ShapeF64x2: c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register, arm64.VectorArrangementD, arm64.VectorIndex(laneIndex), 0) c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF64) } return } // compileV128ReplaceLane implements compiler.compileV128ReplaceLane for arm64. func (c *arm64Compiler) compileV128ReplaceLane(o *wazeroir.UnionOperation) (err error) { origin := c.locationStack.pop() if err = c.compileEnsureOnRegister(origin); err != nil { return } vector := c.locationStack.popV128() if err = c.compileEnsureOnRegister(vector); err != nil { return } shape := o.B1 laneIndex := o.B2 switch shape { case wazeroir.ShapeI8x16: c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, arm64.VectorArrangementB, arm64.VectorIndex(laneIndex)) case wazeroir.ShapeI16x8: c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, arm64.VectorArrangementH, arm64.VectorIndex(laneIndex)) case wazeroir.ShapeI32x4: c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, arm64.VectorArrangementS, arm64.VectorIndex(laneIndex)) case wazeroir.ShapeI64x2: c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, arm64.VectorArrangementD, arm64.VectorIndex(laneIndex)) case wazeroir.ShapeF32x4: c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register, arm64.VectorArrangementS, 0, arm64.VectorIndex(laneIndex)) case wazeroir.ShapeF64x2: c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register, arm64.VectorArrangementD, 0, arm64.VectorIndex(laneIndex)) } c.locationStack.markRegisterUnused(origin.register) c.pushVectorRuntimeValueLocationOnRegister(vector.register) return } // compileV128Splat implements compiler.compileV128Splat for arm64. func (c *arm64Compiler) compileV128Splat(o *wazeroir.UnionOperation) (err error) { origin := c.locationStack.pop() if err = c.compileEnsureOnRegister(origin); err != nil { return } var result asm.Register shape := o.B1 switch shape { case wazeroir.ShapeI8x16: result, err = c.allocateRegister(registerTypeVector) if err != nil { return } c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, arm64.VectorArrangement16B, arm64.VectorIndexNone) case wazeroir.ShapeI16x8: result, err = c.allocateRegister(registerTypeVector) if err != nil { return } c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, arm64.VectorArrangement8H, arm64.VectorIndexNone) case wazeroir.ShapeI32x4: result, err = c.allocateRegister(registerTypeVector) if err != nil { return } c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, arm64.VectorArrangement4S, arm64.VectorIndexNone) case wazeroir.ShapeI64x2: result, err = c.allocateRegister(registerTypeVector) if err != nil { return } c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, arm64.VectorArrangement2D, arm64.VectorIndexNone) case wazeroir.ShapeF32x4: result = origin.register c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result, arm64.VectorArrangementS, 0, arm64.VectorIndexNone) case wazeroir.ShapeF64x2: result = origin.register c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result, arm64.VectorArrangementD, 0, arm64.VectorIndexNone) } c.locationStack.markRegisterUnused(origin.register) c.pushVectorRuntimeValueLocationOnRegister(result) return } func (c *arm64Compiler) onValueReleaseRegisterToStack(reg asm.Register) { for i := uint64(0); i < c.locationStack.sp; i++ { prevValue := &c.locationStack.stack[i] if prevValue.register == reg { c.compileReleaseRegisterToStack(prevValue) break } } } // compileV128Shuffle implements compiler.compileV128Shuffle for arm64. func (c *arm64Compiler) compileV128Shuffle(o *wazeroir.UnionOperation) (err error) { // Shuffle needs two operands (v, w) must be next to each other. // For simplicity, we use V29 for v and V30 for w values respectively. const vReg, wReg = arm64.RegV29, arm64.RegV30 // Ensures that w value is placed on wReg. w := c.locationStack.popV128() if w.register != wReg { // If wReg is already in use, save the value onto the stack. c.onValueReleaseRegisterToStack(wReg) if w.onRegister() { c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, w.register, w.register, wReg, arm64.VectorArrangement16B) // We no longer use the old register. c.markRegisterUnused(w.register) } else { // on stack w.setRegister(wReg) c.compileLoadValueOnStackToRegister(w) } } // Ensures that v value is placed on wReg. v := c.locationStack.popV128() if v.register != vReg { // If vReg is already in use, save the value onto the stack. c.onValueReleaseRegisterToStack(vReg) if v.onRegister() { c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, v.register, v.register, vReg, arm64.VectorArrangement16B) // We no longer use the old register. c.markRegisterUnused(v.register) } else { // on stack v.setRegister(vReg) c.compileLoadValueOnStackToRegister(v) } } c.locationStack.markRegisterUsed(vReg, wReg) result, err := c.allocateRegister(registerTypeVector) if err != nil { return err } lanes := make([]byte, len(o.Us)) for i, lane := range o.Us { lanes[i] = byte(lane) } c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(lanes), result, arm64.VectorArrangementQ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL2, vReg, result, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.locationStack.markRegisterUnused(vReg, wReg) c.pushVectorRuntimeValueLocationOnRegister(result) return } // compileV128Swizzle implements compiler.compileV128Swizzle for arm64. func (c *arm64Compiler) compileV128Swizzle(*wazeroir.UnionOperation) (err error) { indexVec := c.locationStack.popV128() if err = c.compileEnsureOnRegister(indexVec); err != nil { return } baseVec := c.locationStack.popV128() if err = c.compileEnsureOnRegister(baseVec); err != nil { return } c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL1, baseVec.register, indexVec.register, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.markRegisterUnused(baseVec.register) c.pushVectorRuntimeValueLocationOnRegister(indexVec.register) return } // compileV128AnyTrue implements compiler.compileV128AnyTrue for arm64. func (c *arm64Compiler) compileV128AnyTrue(*wazeroir.UnionOperation) (err error) { vector := c.locationStack.popV128() if err = c.compileEnsureOnRegister(vector); err != nil { return } v := vector.register c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMAXP, v, v, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, arm64.VectorArrangementD, 0) c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary) c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE) c.locationStack.markRegisterUnused(v) return } // compileV128AllTrue implements compiler.compileV128AllTrue for arm64. func (c *arm64Compiler) compileV128AllTrue(o *wazeroir.UnionOperation) (err error) { vector := c.locationStack.popV128() if err = c.compileEnsureOnRegister(vector); err != nil { return } v := vector.register shape := o.B1 if shape == wazeroir.ShapeI64x2 { c.assembler.CompileVectorRegisterToVectorRegister(arm64.CMEQZERO, arm64.RegRZR, v, arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDP, v, v, arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileTwoRegistersToNone(arm64.FCMPD, v, v) c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) } else { var arr arm64.VectorArrangement switch shape { case wazeroir.ShapeI8x16: arr = arm64.VectorArrangement16B case wazeroir.ShapeI16x8: arr = arm64.VectorArrangement8H case wazeroir.ShapeI32x4: arr = arm64.VectorArrangement4S } c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMINV, v, v, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, arm64.VectorArrangementD, 0) c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary) c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE) } c.markRegisterUnused(v) return } var ( i8x16BitmaskConst = [16]byte{ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, } i16x8BitmaskConst = [16]byte{ 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00, } i32x4BitmaskConst = [16]byte{ 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, } ) // compileV128BitMask implements compiler.compileV128BitMask for arm64. func (c *arm64Compiler) compileV128BitMask(o *wazeroir.UnionOperation) (err error) { vector := c.locationStack.popV128() if err = c.compileEnsureOnRegister(vector); err != nil { return } v := vector.register result, err := c.allocateRegister(registerTypeGeneralPurpose) if err != nil { return err } shape := o.B1 switch shape { case wazeroir.ShapeI8x16: vecTmp, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: // v[i] = 0xff if vi<0, 0 otherwise. c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement16B, 7) // Load the bit mask into vecTmp. c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i8x16BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) // Lane-wise logical AND with i8x16BitmaskConst, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise. // // Below, we use the following notation: // wi := (1 << i) if vi<0, 0 otherwise. c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) // Swap the lower and higher 8 byte elements, and write it into vecTmp, meaning that we have // vecTmp[i] = w(i+8) if i < 8, w(i-8) otherwise. // c.assembler.CompileTwoVectorRegistersToVectorRegisterWithConst(arm64.EXT, v, v, vecTmp, arm64.VectorArrangement16B, 0x8) // v = [w0, w8, ..., w7, w15] c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.ZIP1, vecTmp, v, v, arm64.VectorArrangement16B) // v.h[0] = w0 + ... + w15 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone) // Extract the v.h[0] as the result. c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0) case wazeroir.ShapeI16x8: vecTmp, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: // v[i] = 0xffff if vi<0, 0 otherwise. c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement8H, 15) // Load the bit mask into vecTmp. c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i16x8BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) // Lane-wise logical AND with i16x8BitmaskConst, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0) case wazeroir.ShapeI32x4: vecTmp, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: // v[i] = 0xffffffff if vi<0, 0 otherwise. c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement4S, 32) // Load the bit mask into vecTmp. c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i32x4BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) // Lane-wise logical AND with i16x8BitmaskConst, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementS, 0) case wazeroir.ShapeI64x2: // Move the lower 64-bit int into result, c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementD, 0) // Move the higher 64-bit int into arm64ReservedRegisterForTemporary. c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, arm64.VectorArrangementD, 1) // Move the sign bit into the least significant bit. c.assembler.CompileConstToRegister(arm64.LSR, 63, result) c.assembler.CompileConstToRegister(arm64.LSR, 63, arm64ReservedRegisterForTemporary) // result = (arm64ReservedRegisterForTemporary<<1) | result c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, 1, result, result) } c.markRegisterUnused(v) c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) return } // compileV128And implements compiler.compileV128And for arm64. func (c *arm64Compiler) compileV128And(*wazeroir.UnionOperation) error { return c.compileV128x2BinOp(arm64.VAND, arm64.VectorArrangement16B) } // compileV128Not implements compiler.compileV128Not for arm64. func (c *arm64Compiler) compileV128Not(*wazeroir.UnionOperation) error { return c.compileV128UniOp(arm64.NOT, arm64.VectorArrangement16B) } // compileV128Or implements compiler.compileV128Or for arm64. func (c *arm64Compiler) compileV128Or(*wazeroir.UnionOperation) error { return c.compileV128x2BinOp(arm64.VORR, arm64.VectorArrangement16B) } // compileV128Xor implements compiler.compileV128Xor for arm64. func (c *arm64Compiler) compileV128Xor(*wazeroir.UnionOperation) error { return c.compileV128x2BinOp(arm64.EOR, arm64.VectorArrangement16B) } // compileV128Bitselect implements compiler.compileV128Bitselect for arm64. func (c *arm64Compiler) compileV128Bitselect(*wazeroir.UnionOperation) error { selector := c.locationStack.popV128() if err := c.compileEnsureOnRegister(selector); err != nil { return err } x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x2.register, x1.register, selector.register, arm64.VectorArrangement16B) c.markRegisterUnused(x1.register, x2.register) c.pushVectorRuntimeValueLocationOnRegister(selector.register) return nil } // compileV128AndNot implements compiler.compileV128AndNot for arm64. func (c *arm64Compiler) compileV128AndNot(*wazeroir.UnionOperation) error { return c.compileV128x2BinOp(arm64.BIC, arm64.VectorArrangement16B) } func (c *arm64Compiler) compileV128UniOp(inst asm.Instruction, arr arm64.VectorArrangement) error { v := c.locationStack.popV128() if err := c.compileEnsureOnRegister(v); err != nil { return err } c.assembler.CompileVectorRegisterToVectorRegister(inst, v.register, v.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) c.pushVectorRuntimeValueLocationOnRegister(v.register) return nil } func (c *arm64Compiler) compileV128x2BinOp(inst asm.Instruction, arr arm64.VectorArrangement) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } c.assembler.CompileVectorRegisterToVectorRegister(inst, x2.register, x1.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) c.markRegisterUnused(x2.register) c.pushVectorRuntimeValueLocationOnRegister(x1.register) return nil } // compileV128Shr implements compiler.compileV128Shr for arm64. func (c *arm64Compiler) compileV128Shr(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 signed := o.B3 if signed { inst = arm64.SSHL } else { inst = arm64.USHL } return c.compileV128ShiftImpl(shape, inst, true) } // compileV128Shl implements compiler.compileV128Shl for arm64. func (c *arm64Compiler) compileV128Shl(o *wazeroir.UnionOperation) error { return c.compileV128ShiftImpl(o.B1 /*shape*/, arm64.SSHL, false) } func (c *arm64Compiler) compileV128ShiftImpl(shape wazeroir.Shape, ins asm.Instruction, rightShift bool) error { s := c.locationStack.pop() if s.register == arm64.RegRZR { // If the shift amount is zero register, nothing to do here. return nil } var modulo asm.ConstantValue var arr arm64.VectorArrangement switch shape { case wazeroir.ShapeI8x16: modulo = 0x7 // modulo 8. arr = arm64.VectorArrangement16B case wazeroir.ShapeI16x8: modulo = 0xf // modulo 16. arr = arm64.VectorArrangement8H case wazeroir.ShapeI32x4: modulo = 0x1f // modulo 32. arr = arm64.VectorArrangement4S case wazeroir.ShapeI64x2: modulo = 0x3f // modulo 64. arr = arm64.VectorArrangement2D } if err := c.compileEnsureOnRegister(s); err != nil { return err } v := c.locationStack.popV128() if err := c.compileEnsureOnRegister(v); err != nil { return err } tmp, err := c.allocateRegister(registerTypeVector) if err != nil { return err } c.assembler.CompileConstToRegister(arm64.ANDIMM32, modulo, s.register) if rightShift { // Negate the amount to make this as right shift. c.assembler.CompileRegisterToRegister(arm64.NEG, s.register, s.register) } // Copy the shift amount into a vector register as SSHL requires it to be there. c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, s.register, tmp, arr, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(ins, tmp, v.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) c.markRegisterUnused(s.register) c.pushVectorRuntimeValueLocationOnRegister(v.register) return nil } // compileV128Cmp implements compiler.compileV128Cmp for arm64. func (c *arm64Compiler) compileV128Cmp(o *wazeroir.UnionOperation) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } var arr arm64.VectorArrangement v128CmpType := o.B1 if v128CmpType <= wazeroir.V128CmpTypeI8x16GeU { arr = arm64.VectorArrangement16B } else if v128CmpType <= wazeroir.V128CmpTypeI16x8GeU { arr = arm64.VectorArrangement8H } else if v128CmpType <= wazeroir.V128CmpTypeI32x4GeU { arr = arm64.VectorArrangement4S } else if v128CmpType <= wazeroir.V128CmpTypeI64x2GeS { arr = arm64.VectorArrangement2D } else if v128CmpType <= wazeroir.V128CmpTypeF32x4Ge { arr = arm64.VectorArrangement4S } else { // f64x2 arr = arm64.VectorArrangement2D } result := x1.register switch v128CmpType { case wazeroir.V128CmpTypeI8x16Eq, wazeroir.V128CmpTypeI16x8Eq, wazeroir.V128CmpTypeI32x4Eq, wazeroir.V128CmpTypeI64x2Eq: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeI8x16Ne, wazeroir.V128CmpTypeI16x8Ne, wazeroir.V128CmpTypeI32x4Ne, wazeroir.V128CmpTypeI64x2Ne: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr) // Reverse the condition by flipping all bits. c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128CmpTypeI8x16LtS, wazeroir.V128CmpTypeI16x8LtS, wazeroir.V128CmpTypeI32x4LtS, wazeroir.V128CmpTypeI64x2LtS: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeI8x16LtU, wazeroir.V128CmpTypeI16x8LtU, wazeroir.V128CmpTypeI32x4LtU: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeI8x16GtS, wazeroir.V128CmpTypeI16x8GtS, wazeroir.V128CmpTypeI32x4GtS, wazeroir.V128CmpTypeI64x2GtS: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x2.register, x1.register, result, arr) case wazeroir.V128CmpTypeI8x16GtU, wazeroir.V128CmpTypeI16x8GtU, wazeroir.V128CmpTypeI32x4GtU: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x2.register, x1.register, result, arr) case wazeroir.V128CmpTypeI8x16LeS, wazeroir.V128CmpTypeI16x8LeS, wazeroir.V128CmpTypeI32x4LeS, wazeroir.V128CmpTypeI64x2LeS: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeI8x16LeU, wazeroir.V128CmpTypeI16x8LeU, wazeroir.V128CmpTypeI32x4LeU: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeI8x16GeS, wazeroir.V128CmpTypeI16x8GeS, wazeroir.V128CmpTypeI32x4GeS, wazeroir.V128CmpTypeI64x2GeS: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x2.register, x1.register, result, arr) case wazeroir.V128CmpTypeI8x16GeU, wazeroir.V128CmpTypeI16x8GeU, wazeroir.V128CmpTypeI32x4GeU: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x2.register, x1.register, result, arr) case wazeroir.V128CmpTypeF32x4Eq, wazeroir.V128CmpTypeF64x2Eq: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr) case wazeroir.V128CmpTypeF32x4Ne, wazeroir.V128CmpTypeF64x2Ne: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr) // Reverse the condition by flipping all bits. c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result, arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) case wazeroir.V128CmpTypeF32x4Lt, wazeroir.V128CmpTypeF64x2Lt: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeF32x4Le, wazeroir.V128CmpTypeF64x2Le: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x1.register, x2.register, result, arr) case wazeroir.V128CmpTypeF32x4Gt, wazeroir.V128CmpTypeF64x2Gt: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2.register, x1.register, result, arr) case wazeroir.V128CmpTypeF32x4Ge, wazeroir.V128CmpTypeF64x2Ge: c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x2.register, x1.register, result, arr) } c.markRegisterUnused(x2.register) c.pushVectorRuntimeValueLocationOnRegister(result) return nil } // compileV128AddSat implements compiler.compileV128AddSat for arm64. func (c *arm64Compiler) compileV128AddSat(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 signed := o.B3 if signed { inst = arm64.VSQADD } else { inst = arm64.VUQADD } return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) } // compileV128SubSat implements compiler.compileV128SubSat for arm64. func (c *arm64Compiler) compileV128SubSat(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 signed := o.B3 if signed { inst = arm64.VSQSUB } else { inst = arm64.VUQSUB } return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) } // compileV128Mul implements compiler.compileV128Mul for arm64. func (c *arm64Compiler) compileV128Mul(o *wazeroir.UnionOperation) (err error) { shape := o.B1 switch shape { case wazeroir.ShapeI8x16, wazeroir.ShapeI16x8, wazeroir.ShapeI32x4: err = c.compileV128x2BinOp(arm64.VMUL, defaultArrangementForShape(shape)) case wazeroir.ShapeF32x4, wazeroir.ShapeF64x2: err = c.compileV128x2BinOp(arm64.VFMUL, defaultArrangementForShape(shape)) case wazeroir.ShapeI64x2: x2 := c.locationStack.popV128() if err = c.compileEnsureOnRegister(x2); err != nil { return } x1 := c.locationStack.popV128() if err = c.compileEnsureOnRegister(x1); err != nil { return } src1, src2 := x1.register, x2.register tmp1, err := c.allocateRegister(registerTypeVector) if err != nil { return err } c.markRegisterUsed(tmp1) tmp2, err := c.allocateRegister(registerTypeVector) if err != nil { return err } c.markRegisterUsed(tmp2) tmp3, err := c.allocateRegister(registerTypeVector) if err != nil { return err } // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 c.assembler.CompileVectorRegisterToVectorRegister(arm64.REV64, src2, tmp2, arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VMUL, src1, tmp2, tmp2, arm64.VectorArrangement4S) c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src1, tmp1, arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.VADDP, tmp2, tmp2, arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone, ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src2, tmp3, arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SHLL, tmp2, src1, arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VUMLAL, tmp3, tmp1, src1, arm64.VectorArrangement2S) c.markRegisterUnused(src2, tmp1, tmp2) c.pushVectorRuntimeValueLocationOnRegister(src1) } return } // compileV128Div implements compiler.compileV128Div for arm64. func (c *arm64Compiler) compileV128Div(o *wazeroir.UnionOperation) error { var arr arm64.VectorArrangement var inst asm.Instruction shape := o.B1 switch shape { case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S inst = arm64.VFDIV case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D inst = arm64.VFDIV } return c.compileV128x2BinOp(inst, arr) } // compileV128Neg implements compiler.compileV128Neg for arm64. func (c *arm64Compiler) compileV128Neg(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 if shape <= wazeroir.ShapeI64x2 { // Integer lanes inst = arm64.VNEG } else { // Floating point lanes inst = arm64.VFNEG } return c.compileV128UniOp(inst, defaultArrangementForShape(shape)) } // compileV128Sqrt implements compiler.compileV128Sqrt for arm64. func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.UnionOperation) error { var arr arm64.VectorArrangement shape := o.B1 switch shape { case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D } return c.compileV128UniOp(arm64.VFSQRT, arr) } // compileV128Abs implements compiler.compileV128Abs for arm64. func (c *arm64Compiler) compileV128Abs(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 if shape <= wazeroir.ShapeI64x2 { // Integer lanes inst = arm64.VABS } else { // Floating point lanes inst = arm64.VFABS } return c.compileV128UniOp(inst, defaultArrangementForShape(shape)) } // compileV128Popcnt implements compiler.compileV128Popcnt for arm64. func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.UnionOperation) error { return c.compileV128UniOp(arm64.VCNT, defaultArrangementForShape(o.B1)) } // compileV128Min implements compiler.compileV128Min for arm64. func (c *arm64Compiler) compileV128Min(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 signed := o.B3 if shape <= wazeroir.ShapeI64x2 { // Integer lanes if signed { inst = arm64.SMIN } else { inst = arm64.UMIN } } else { // Floating point lanes inst = arm64.VFMIN } return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) } func defaultArrangementForShape(s wazeroir.Shape) (arr arm64.VectorArrangement) { switch s { case wazeroir.ShapeI8x16: arr = arm64.VectorArrangement16B case wazeroir.ShapeI16x8: arr = arm64.VectorArrangement8H case wazeroir.ShapeI32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeI64x2: arr = arm64.VectorArrangement2D case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D } return } // compileV128Max implements compiler.compileV128Max for arm64. func (c *arm64Compiler) compileV128Max(o *wazeroir.UnionOperation) error { var inst asm.Instruction shape := o.B1 signed := o.B3 if shape <= wazeroir.ShapeI64x2 { // Integer lanes if signed { inst = arm64.SMAX } else { inst = arm64.UMAX } } else { // Floating point lanes inst = arm64.VFMAX } return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) } // compileV128AvgrU implements compiler.compileV128AvgrU for arm64. func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.UnionOperation) error { return c.compileV128x2BinOp(arm64.URHADD, defaultArrangementForShape(o.B1)) } // compileV128Pmin implements compiler.compileV128Pmin for arm64. func (c *arm64Compiler) compileV128Pmin(o *wazeroir.UnionOperation) error { return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), false) } // compileV128Pmax implements compiler.compileV128Pmax for arm64. func (c *arm64Compiler) compileV128Pmax(o *wazeroir.UnionOperation) error { return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), true) } // compileV128PseudoMinOrMax implements compileV128Pmax and compileV128Pmin. func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } result, err := c.allocateRegister(registerTypeVector) if err != nil { return err } x1r, x2r := x1.register, x2.register // Sets all bits on each lane if x1r's lane satisfies the condition (min or max), zeros otherwise. if max { c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr) } else { c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr) } // Select each bit based on the result bits ^. c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B) c.markRegisterUnused(x1r, x2r) c.pushVectorRuntimeValueLocationOnRegister(result) return nil } // compileV128Ceil implements compiler.compileV128Ceil for arm64. func (c *arm64Compiler) compileV128Ceil(o *wazeroir.UnionOperation) error { var arr arm64.VectorArrangement shape := o.B1 switch shape { case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D } return c.compileV128UniOp(arm64.VFRINTP, arr) } // compileV128Floor implements compiler.compileV128Floor for arm64. func (c *arm64Compiler) compileV128Floor(o *wazeroir.UnionOperation) error { var arr arm64.VectorArrangement shape := o.B1 switch shape { case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D } return c.compileV128UniOp(arm64.VFRINTM, arr) } // compileV128Trunc implements compiler.compileV128Trunc for arm64. func (c *arm64Compiler) compileV128Trunc(o *wazeroir.UnionOperation) error { var arr arm64.VectorArrangement shape := o.B1 switch shape { case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D } return c.compileV128UniOp(arm64.VFRINTZ, arr) } // compileV128Nearest implements compiler.compileV128Nearest for arm64. func (c *arm64Compiler) compileV128Nearest(o *wazeroir.UnionOperation) error { var arr arm64.VectorArrangement shape := o.B1 switch shape { case wazeroir.ShapeF32x4: arr = arm64.VectorArrangement4S case wazeroir.ShapeF64x2: arr = arm64.VectorArrangement2D } return c.compileV128UniOp(arm64.VFRINTN, arr) } // compileV128Extend implements compiler.compileV128Extend for arm64. func (c *arm64Compiler) compileV128Extend(o *wazeroir.UnionOperation) error { var inst asm.Instruction var arr arm64.VectorArrangement originShape := o.B1 signed := o.B2 == 1 useLow := o.B3 if useLow { if signed { inst = arm64.SSHLL } else { inst = arm64.USHLL } switch originShape { case wazeroir.ShapeI8x16: arr = arm64.VectorArrangement8B case wazeroir.ShapeI16x8: arr = arm64.VectorArrangement4H case wazeroir.ShapeI32x4: arr = arm64.VectorArrangement2S } } else { if signed { inst = arm64.SSHLL2 } else { inst = arm64.USHLL2 } arr = defaultArrangementForShape(originShape) } return c.compileV128UniOp(inst, arr) } // compileV128ExtMul implements compiler.compileV128ExtMul for arm64. func (c *arm64Compiler) compileV128ExtMul(o *wazeroir.UnionOperation) error { var inst asm.Instruction var arr arm64.VectorArrangement originShape := o.B1 signed := o.B2 == 1 useLow := o.B3 if useLow { if signed { inst = arm64.SMULL } else { inst = arm64.UMULL } switch originShape { case wazeroir.ShapeI8x16: arr = arm64.VectorArrangement8B case wazeroir.ShapeI16x8: arr = arm64.VectorArrangement4H case wazeroir.ShapeI32x4: arr = arm64.VectorArrangement2S } } else { if signed { inst = arm64.SMULL2 } else { inst = arm64.UMULL2 } arr = defaultArrangementForShape(originShape) } return c.compileV128x2BinOp(inst, arr) } // compileV128Q15mulrSatS implements compiler.compileV128Q15mulrSatS for arm64. func (c *arm64Compiler) compileV128Q15mulrSatS(*wazeroir.UnionOperation) error { return c.compileV128x2BinOp(arm64.SQRDMULH, arm64.VectorArrangement8H) } // compileV128ExtAddPairwise implements compiler.compileV128ExtAddPairwise for arm64. func (c *arm64Compiler) compileV128ExtAddPairwise(o *wazeroir.UnionOperation) error { var inst asm.Instruction originShape := o.B1 signed := o.B3 if signed { inst = arm64.SADDLP } else { inst = arm64.UADDLP } return c.compileV128UniOp(inst, defaultArrangementForShape(originShape)) } // compileV128FloatPromote implements compiler.compileV128FloatPromote for arm64. func (c *arm64Compiler) compileV128FloatPromote(*wazeroir.UnionOperation) error { return c.compileV128UniOp(arm64.FCVTL, arm64.VectorArrangement2S) } // compileV128FloatDemote implements compiler.compileV128FloatDemote for arm64. func (c *arm64Compiler) compileV128FloatDemote(*wazeroir.UnionOperation) error { return c.compileV128UniOp(arm64.FCVTN, arm64.VectorArrangement2S) } // compileV128FConvertFromI implements compiler.compileV128FConvertFromI for arm64. func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.UnionOperation) (err error) { destinationShape := o.B1 signed := o.B3 if destinationShape == wazeroir.ShapeF32x4 { if signed { err = c.compileV128UniOp(arm64.VSCVTF, defaultArrangementForShape(destinationShape)) } else { err = c.compileV128UniOp(arm64.VUCVTF, defaultArrangementForShape(destinationShape)) } return } else { // f64x2 v := c.locationStack.popV128() if err = c.compileEnsureOnRegister(v); err != nil { return } vr := v.register var expand, convert asm.Instruction if signed { expand, convert = arm64.SSHLL, arm64.VSCVTF } else { expand, convert = arm64.USHLL, arm64.VUCVTF } // Expand lower two 32-bit lanes as two 64-bit lanes. c.assembler.CompileVectorRegisterToVectorRegisterWithConst(expand, vr, vr, arm64.VectorArrangement2S, 0) // Convert these two 64-bit (integer) values on each lane as double precision values. c.assembler.CompileVectorRegisterToVectorRegister(convert, vr, vr, arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) c.pushVectorRuntimeValueLocationOnRegister(vr) } return } // compileV128Dot implements compiler.compileV128Dot for arm64. func (c *arm64Compiler) compileV128Dot(*wazeroir.UnionOperation) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } tmp, err := c.allocateRegister(registerTypeVector) if err != nil { return err } x1r, x2r := x1.register, x2.register // Multiply lower integers and get the 32-bit results into tmp. c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H) // Multiply higher integers and get the 32-bit results into x1r. c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H) // Adds these two results into x1r. c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S) c.markRegisterUnused(x2r) c.pushVectorRuntimeValueLocationOnRegister(x1r) return nil } // compileV128Narrow implements compiler.compileV128Narrow for arm64. func (c *arm64Compiler) compileV128Narrow(o *wazeroir.UnionOperation) error { x2 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x2); err != nil { return err } x1 := c.locationStack.popV128() if err := c.compileEnsureOnRegister(x1); err != nil { return err } x1r, x2r := x1.register, x2.register var arr, arr2 arm64.VectorArrangement originShape := o.B1 signed := o.B3 switch originShape { case wazeroir.ShapeI16x8: arr = arm64.VectorArrangement8B arr2 = arm64.VectorArrangement16B case wazeroir.ShapeI32x4: arr = arm64.VectorArrangement4H arr2 = arm64.VectorArrangement8H } var lo, hi asm.Instruction if signed { lo, hi = arm64.SQXTN, arm64.SQXTN2 } else { lo, hi = arm64.SQXTUN, arm64.SQXTUN2 } // Narrow lanes on x1r and write them into lower-half of x1r. c.assembler.CompileVectorRegisterToVectorRegister(lo, x1r, x1r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) // Narrow lanes on x2r and write them into higher-half of x1r. c.assembler.CompileVectorRegisterToVectorRegister(hi, x2r, x1r, arr2, arm64.VectorIndexNone, arm64.VectorIndexNone) c.markRegisterUnused(x2r) c.pushVectorRuntimeValueLocationOnRegister(x1r) return nil } // compileV128ITruncSatFromF implements compiler.compileV128ITruncSatFromF for arm64. func (c *arm64Compiler) compileV128ITruncSatFromF(o *wazeroir.UnionOperation) (err error) { v := c.locationStack.popV128() if err = c.compileEnsureOnRegister(v); err != nil { return err } originShape := o.B1 signed := o.B3 var cvt asm.Instruction if signed { cvt = arm64.VFCVTZS } else { cvt = arm64.VFCVTZU } c.assembler.CompileVectorRegisterToVectorRegister(cvt, v.register, v.register, defaultArrangementForShape(originShape), arm64.VectorIndexNone, arm64.VectorIndexNone, ) if originShape == wazeroir.ShapeF64x2 { var narrow asm.Instruction if signed { narrow = arm64.SQXTN } else { narrow = arm64.UQXTN } c.assembler.CompileVectorRegisterToVectorRegister(narrow, v.register, v.register, arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone, ) } c.pushVectorRuntimeValueLocationOnRegister(v.register) return }