encode_arm64.s

Documentation: github.com/golang/snappy

     1// Copyright 2020 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5// +build !appengine
     6// +build gc
     7// +build !noasm
     8
     9#include "textflag.h"
    10
    11// The asm code generally follows the pure Go code in encode_other.go, except
    12// where marked with a "!!!".
    13
    14// ----------------------------------------------------------------------------
    15
    16// func emitLiteral(dst, lit []byte) int
    17//
    18// All local variables fit into registers. The register allocation:
    19//	- R3	len(lit)
    20//	- R4	n
    21//	- R6	return value
    22//	- R8	&dst[i]
    23//	- R10	&lit[0]
    24//
    25// The 32 bytes of stack space is to call runtime·memmove.
    26//
    27// The unusual register allocation of local variables, such as R10 for the
    28// source pointer, matches the allocation used at the call site in encodeBlock,
    29// which makes it easier to manually inline this function.
    30TEXT ·emitLiteral(SB), NOSPLIT, $32-56
    31	MOVD dst_base+0(FP), R8
    32	MOVD lit_base+24(FP), R10
    33	MOVD lit_len+32(FP), R3
    34	MOVD R3, R6
    35	MOVW R3, R4
    36	SUBW $1, R4, R4
    37
    38	CMPW $60, R4
    39	BLT  oneByte
    40	CMPW $256, R4
    41	BLT  twoBytes
    42
    43threeBytes:
    44	MOVD $0xf4, R2
    45	MOVB R2, 0(R8)
    46	MOVW R4, 1(R8)
    47	ADD  $3, R8, R8
    48	ADD  $3, R6, R6
    49	B    memmove
    50
    51twoBytes:
    52	MOVD $0xf0, R2
    53	MOVB R2, 0(R8)
    54	MOVB R4, 1(R8)
    55	ADD  $2, R8, R8
    56	ADD  $2, R6, R6
    57	B    memmove
    58
    59oneByte:
    60	LSLW $2, R4, R4
    61	MOVB R4, 0(R8)
    62	ADD  $1, R8, R8
    63	ADD  $1, R6, R6
    64
    65memmove:
    66	MOVD R6, ret+48(FP)
    67
    68	// copy(dst[i:], lit)
    69	//
    70	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
    71	// R8, R10 and R3 as arguments.
    72	MOVD R8, 8(RSP)
    73	MOVD R10, 16(RSP)
    74	MOVD R3, 24(RSP)
    75	CALL runtime·memmove(SB)
    76	RET
    77
    78// ----------------------------------------------------------------------------
    79
    80// func emitCopy(dst []byte, offset, length int) int
    81//
    82// All local variables fit into registers. The register allocation:
    83//	- R3	length
    84//	- R7	&dst[0]
    85//	- R8	&dst[i]
    86//	- R11	offset
    87//
    88// The unusual register allocation of local variables, such as R11 for the
    89// offset, matches the allocation used at the call site in encodeBlock, which
    90// makes it easier to manually inline this function.
    91TEXT ·emitCopy(SB), NOSPLIT, $0-48
    92	MOVD dst_base+0(FP), R8
    93	MOVD R8, R7
    94	MOVD offset+24(FP), R11
    95	MOVD length+32(FP), R3
    96
    97loop0:
    98	// for length >= 68 { etc }
    99	CMPW $68, R3
   100	BLT  step1
   101
   102	// Emit a length 64 copy, encoded as 3 bytes.
   103	MOVD $0xfe, R2
   104	MOVB R2, 0(R8)
   105	MOVW R11, 1(R8)
   106	ADD  $3, R8, R8
   107	SUB  $64, R3, R3
   108	B    loop0
   109
   110step1:
   111	// if length > 64 { etc }
   112	CMP $64, R3
   113	BLE step2
   114
   115	// Emit a length 60 copy, encoded as 3 bytes.
   116	MOVD $0xee, R2
   117	MOVB R2, 0(R8)
   118	MOVW R11, 1(R8)
   119	ADD  $3, R8, R8
   120	SUB  $60, R3, R3
   121
   122step2:
   123	// if length >= 12 || offset >= 2048 { goto step3 }
   124	CMP  $12, R3
   125	BGE  step3
   126	CMPW $2048, R11
   127	BGE  step3
   128
   129	// Emit the remaining copy, encoded as 2 bytes.
   130	MOVB R11, 1(R8)
   131	LSRW $3, R11, R11
   132	AND  $0xe0, R11, R11
   133	SUB  $4, R3, R3
   134	LSLW $2, R3
   135	AND  $0xff, R3, R3
   136	ORRW R3, R11, R11
   137	ORRW $1, R11, R11
   138	MOVB R11, 0(R8)
   139	ADD  $2, R8, R8
   140
   141	// Return the number of bytes written.
   142	SUB  R7, R8, R8
   143	MOVD R8, ret+40(FP)
   144	RET
   145
   146step3:
   147	// Emit the remaining copy, encoded as 3 bytes.
   148	SUB  $1, R3, R3
   149	AND  $0xff, R3, R3
   150	LSLW $2, R3, R3
   151	ORRW $2, R3, R3
   152	MOVB R3, 0(R8)
   153	MOVW R11, 1(R8)
   154	ADD  $3, R8, R8
   155
   156	// Return the number of bytes written.
   157	SUB  R7, R8, R8
   158	MOVD R8, ret+40(FP)
   159	RET
   160
   161// ----------------------------------------------------------------------------
   162
   163// func extendMatch(src []byte, i, j int) int
   164//
   165// All local variables fit into registers. The register allocation:
   166//	- R6	&src[0]
   167//	- R7	&src[j]
   168//	- R13	&src[len(src) - 8]
   169//	- R14	&src[len(src)]
   170//	- R15	&src[i]
   171//
   172// The unusual register allocation of local variables, such as R15 for a source
   173// pointer, matches the allocation used at the call site in encodeBlock, which
   174// makes it easier to manually inline this function.
   175TEXT ·extendMatch(SB), NOSPLIT, $0-48
   176	MOVD src_base+0(FP), R6
   177	MOVD src_len+8(FP), R14
   178	MOVD i+24(FP), R15
   179	MOVD j+32(FP), R7
   180	ADD  R6, R14, R14
   181	ADD  R6, R15, R15
   182	ADD  R6, R7, R7
   183	MOVD R14, R13
   184	SUB  $8, R13, R13
   185
   186cmp8:
   187	// As long as we are 8 or more bytes before the end of src, we can load and
   188	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
   189	CMP  R13, R7
   190	BHI  cmp1
   191	MOVD (R15), R3
   192	MOVD (R7), R4
   193	CMP  R4, R3
   194	BNE  bsf
   195	ADD  $8, R15, R15
   196	ADD  $8, R7, R7
   197	B    cmp8
   198
   199bsf:
   200	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
   201	// the index of the first byte that differs.
   202	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
   203	// combination of which finds the least significant bit which is set.
   204	// The arm64 architecture is little-endian, and the shift by 3 converts
   205	// a bit index to a byte index.
   206	EOR  R3, R4, R4
   207	RBIT R4, R4
   208	CLZ  R4, R4
   209	ADD  R4>>3, R7, R7
   210
   211	// Convert from &src[ret] to ret.
   212	SUB  R6, R7, R7
   213	MOVD R7, ret+40(FP)
   214	RET
   215
   216cmp1:
   217	// In src's tail, compare 1 byte at a time.
   218	CMP  R7, R14
   219	BLS  extendMatchEnd
   220	MOVB (R15), R3
   221	MOVB (R7), R4
   222	CMP  R4, R3
   223	BNE  extendMatchEnd
   224	ADD  $1, R15, R15
   225	ADD  $1, R7, R7
   226	B    cmp1
   227
   228extendMatchEnd:
   229	// Convert from &src[ret] to ret.
   230	SUB  R6, R7, R7
   231	MOVD R7, ret+40(FP)
   232	RET
   233
   234// ----------------------------------------------------------------------------
   235
   236// func encodeBlock(dst, src []byte) (d int)
   237//
   238// All local variables fit into registers, other than "var table". The register
   239// allocation:
   240//	- R3	.	.
   241//	- R4	.	.
   242//	- R5	64	shift
   243//	- R6	72	&src[0], tableSize
   244//	- R7	80	&src[s]
   245//	- R8	88	&dst[d]
   246//	- R9	96	sLimit
   247//	- R10	.	&src[nextEmit]
   248//	- R11	104	prevHash, currHash, nextHash, offset
   249//	- R12	112	&src[base], skip
   250//	- R13	.	&src[nextS], &src[len(src) - 8]
   251//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
   252//	- R15	120	candidate
   253//	- R16	.	hash constant, 0x1e35a7bd
   254//	- R17	.	&table
   255//	- .  	128	table
   256//
   257// The second column (64, 72, etc) is the stack offset to spill the registers
   258// when calling other functions. We could pack this slightly tighter, but it's
   259// simpler to have a dedicated spill map independent of the function called.
   260//
   261// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
   262// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
   263// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
   264TEXT ·encodeBlock(SB), 0, $32896-56
   265	MOVD dst_base+0(FP), R8
   266	MOVD src_base+24(FP), R7
   267	MOVD src_len+32(FP), R14
   268
   269	// shift, tableSize := uint32(32-8), 1<<8
   270	MOVD  $24, R5
   271	MOVD  $256, R6
   272	MOVW  $0xa7bd, R16
   273	MOVKW $(0x1e35<<16), R16
   274
   275calcShift:
   276	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
   277	//	shift--
   278	// }
   279	MOVD $16384, R2
   280	CMP  R2, R6
   281	BGE  varTable
   282	CMP  R14, R6
   283	BGE  varTable
   284	SUB  $1, R5, R5
   285	LSL  $1, R6, R6
   286	B    calcShift
   287
   288varTable:
   289	// var table [maxTableSize]uint16
   290	//
   291	// In the asm code, unlike the Go code, we can zero-initialize only the
   292	// first tableSize elements. Each uint16 element is 2 bytes and each
   293	// iterations writes 64 bytes, so we can do only tableSize/32 writes
   294	// instead of the 2048 writes that would zero-initialize all of table's
   295	// 32768 bytes. This clear could overrun the first tableSize elements, but
   296	// it won't overrun the allocated stack size.
   297	ADD  $128, RSP, R17
   298	MOVD R17, R4
   299
   300	// !!! R6 = &src[tableSize]
   301	ADD R6<<1, R17, R6
   302
   303memclr:
   304	STP.P (ZR, ZR), 64(R4)
   305	STP   (ZR, ZR), -48(R4)
   306	STP   (ZR, ZR), -32(R4)
   307	STP   (ZR, ZR), -16(R4)
   308	CMP   R4, R6
   309	BHI   memclr
   310
   311	// !!! R6 = &src[0]
   312	MOVD R7, R6
   313
   314	// sLimit := len(src) - inputMargin
   315	MOVD R14, R9
   316	SUB  $15, R9, R9
   317
   318	// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
   319	// change for the rest of the function.
   320	MOVD R5, 64(RSP)
   321	MOVD R6, 72(RSP)
   322	MOVD R9, 96(RSP)
   323
   324	// nextEmit := 0
   325	MOVD R6, R10
   326
   327	// s := 1
   328	ADD $1, R7, R7
   329
   330	// nextHash := hash(load32(src, s), shift)
   331	MOVW 0(R7), R11
   332	MULW R16, R11, R11
   333	LSRW R5, R11, R11
   334
   335outer:
   336	// for { etc }
   337
   338	// skip := 32
   339	MOVD $32, R12
   340
   341	// nextS := s
   342	MOVD R7, R13
   343
   344	// candidate := 0
   345	MOVD $0, R15
   346
   347inner0:
   348	// for { etc }
   349
   350	// s := nextS
   351	MOVD R13, R7
   352
   353	// bytesBetweenHashLookups := skip >> 5
   354	MOVD R12, R14
   355	LSR  $5, R14, R14
   356
   357	// nextS = s + bytesBetweenHashLookups
   358	ADD R14, R13, R13
   359
   360	// skip += bytesBetweenHashLookups
   361	ADD R14, R12, R12
   362
   363	// if nextS > sLimit { goto emitRemainder }
   364	MOVD R13, R3
   365	SUB  R6, R3, R3
   366	CMP  R9, R3
   367	BHI  emitRemainder
   368
   369	// candidate = int(table[nextHash])
   370	MOVHU 0(R17)(R11<<1), R15
   371
   372	// table[nextHash] = uint16(s)
   373	MOVD R7, R3
   374	SUB  R6, R3, R3
   375
   376	MOVH R3, 0(R17)(R11<<1)
   377
   378	// nextHash = hash(load32(src, nextS), shift)
   379	MOVW 0(R13), R11
   380	MULW R16, R11
   381	LSRW R5, R11, R11
   382
   383	// if load32(src, s) != load32(src, candidate) { continue } break
   384	MOVW 0(R7), R3
   385	MOVW (R6)(R15), R4
   386	CMPW R4, R3
   387	BNE  inner0
   388
   389fourByteMatch:
   390	// As per the encode_other.go code:
   391	//
   392	// A 4-byte match has been found. We'll later see etc.
   393
   394	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
   395	// on inputMargin in encode.go.
   396	MOVD R7, R3
   397	SUB  R10, R3, R3
   398	CMP  $16, R3
   399	BLE  emitLiteralFastPath
   400
   401	// ----------------------------------------
   402	// Begin inline of the emitLiteral call.
   403	//
   404	// d += emitLiteral(dst[d:], src[nextEmit:s])
   405
   406	MOVW R3, R4
   407	SUBW $1, R4, R4
   408
   409	MOVW $60, R2
   410	CMPW R2, R4
   411	BLT  inlineEmitLiteralOneByte
   412	MOVW $256, R2
   413	CMPW R2, R4
   414	BLT  inlineEmitLiteralTwoBytes
   415
   416inlineEmitLiteralThreeBytes:
   417	MOVD $0xf4, R1
   418	MOVB R1, 0(R8)
   419	MOVW R4, 1(R8)
   420	ADD  $3, R8, R8
   421	B    inlineEmitLiteralMemmove
   422
   423inlineEmitLiteralTwoBytes:
   424	MOVD $0xf0, R1
   425	MOVB R1, 0(R8)
   426	MOVB R4, 1(R8)
   427	ADD  $2, R8, R8
   428	B    inlineEmitLiteralMemmove
   429
   430inlineEmitLiteralOneByte:
   431	LSLW $2, R4, R4
   432	MOVB R4, 0(R8)
   433	ADD  $1, R8, R8
   434
   435inlineEmitLiteralMemmove:
   436	// Spill local variables (registers) onto the stack; call; unspill.
   437	//
   438	// copy(dst[i:], lit)
   439	//
   440	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
   441	// R8, R10 and R3 as arguments.
   442	MOVD R8, 8(RSP)
   443	MOVD R10, 16(RSP)
   444	MOVD R3, 24(RSP)
   445
   446	// Finish the "d +=" part of "d += emitLiteral(etc)".
   447	ADD   R3, R8, R8
   448	MOVD  R7, 80(RSP)
   449	MOVD  R8, 88(RSP)
   450	MOVD  R15, 120(RSP)
   451	CALL  runtime·memmove(SB)
   452	MOVD  64(RSP), R5
   453	MOVD  72(RSP), R6
   454	MOVD  80(RSP), R7
   455	MOVD  88(RSP), R8
   456	MOVD  96(RSP), R9
   457	MOVD  120(RSP), R15
   458	ADD   $128, RSP, R17
   459	MOVW  $0xa7bd, R16
   460	MOVKW $(0x1e35<<16), R16
   461	B     inner1
   462
   463inlineEmitLiteralEnd:
   464	// End inline of the emitLiteral call.
   465	// ----------------------------------------
   466
   467emitLiteralFastPath:
   468	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
   469	MOVB R3, R4
   470	SUBW $1, R4, R4
   471	AND  $0xff, R4, R4
   472	LSLW $2, R4, R4
   473	MOVB R4, (R8)
   474	ADD  $1, R8, R8
   475
   476	// !!! Implement the copy from lit to dst as a 16-byte load and store.
   477	// (Encode's documentation says that dst and src must not overlap.)
   478	//
   479	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
   480	// OK. Subsequent iterations will fix up the overrun.
   481	//
   482	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
   483	// 16-byte loads and stores. This technique probably wouldn't be as
   484	// effective on architectures that are fussier about alignment.
   485	LDP 0(R10), (R0, R1)
   486	STP (R0, R1), 0(R8)
   487	ADD R3, R8, R8
   488
   489inner1:
   490	// for { etc }
   491
   492	// base := s
   493	MOVD R7, R12
   494
   495	// !!! offset := base - candidate
   496	MOVD R12, R11
   497	SUB  R15, R11, R11
   498	SUB  R6, R11, R11
   499
   500	// ----------------------------------------
   501	// Begin inline of the extendMatch call.
   502	//
   503	// s = extendMatch(src, candidate+4, s+4)
   504
   505	// !!! R14 = &src[len(src)]
   506	MOVD src_len+32(FP), R14
   507	ADD  R6, R14, R14
   508
   509	// !!! R13 = &src[len(src) - 8]
   510	MOVD R14, R13
   511	SUB  $8, R13, R13
   512
   513	// !!! R15 = &src[candidate + 4]
   514	ADD $4, R15, R15
   515	ADD R6, R15, R15
   516
   517	// !!! s += 4
   518	ADD $4, R7, R7
   519
   520inlineExtendMatchCmp8:
   521	// As long as we are 8 or more bytes before the end of src, we can load and
   522	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
   523	CMP  R13, R7
   524	BHI  inlineExtendMatchCmp1
   525	MOVD (R15), R3
   526	MOVD (R7), R4
   527	CMP  R4, R3
   528	BNE  inlineExtendMatchBSF
   529	ADD  $8, R15, R15
   530	ADD  $8, R7, R7
   531	B    inlineExtendMatchCmp8
   532
   533inlineExtendMatchBSF:
   534	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
   535	// the index of the first byte that differs.
   536	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
   537	// combination of which finds the least significant bit which is set.
   538	// The arm64 architecture is little-endian, and the shift by 3 converts
   539	// a bit index to a byte index.
   540	EOR  R3, R4, R4
   541	RBIT R4, R4
   542	CLZ  R4, R4
   543	ADD  R4>>3, R7, R7
   544	B    inlineExtendMatchEnd
   545
   546inlineExtendMatchCmp1:
   547	// In src's tail, compare 1 byte at a time.
   548	CMP  R7, R14
   549	BLS  inlineExtendMatchEnd
   550	MOVB (R15), R3
   551	MOVB (R7), R4
   552	CMP  R4, R3
   553	BNE  inlineExtendMatchEnd
   554	ADD  $1, R15, R15
   555	ADD  $1, R7, R7
   556	B    inlineExtendMatchCmp1
   557
   558inlineExtendMatchEnd:
   559	// End inline of the extendMatch call.
   560	// ----------------------------------------
   561
   562	// ----------------------------------------
   563	// Begin inline of the emitCopy call.
   564	//
   565	// d += emitCopy(dst[d:], base-candidate, s-base)
   566
   567	// !!! length := s - base
   568	MOVD R7, R3
   569	SUB  R12, R3, R3
   570
   571inlineEmitCopyLoop0:
   572	// for length >= 68 { etc }
   573	MOVW $68, R2
   574	CMPW R2, R3
   575	BLT  inlineEmitCopyStep1
   576
   577	// Emit a length 64 copy, encoded as 3 bytes.
   578	MOVD $0xfe, R1
   579	MOVB R1, 0(R8)
   580	MOVW R11, 1(R8)
   581	ADD  $3, R8, R8
   582	SUBW $64, R3, R3
   583	B    inlineEmitCopyLoop0
   584
   585inlineEmitCopyStep1:
   586	// if length > 64 { etc }
   587	MOVW $64, R2
   588	CMPW R2, R3
   589	BLE  inlineEmitCopyStep2
   590
   591	// Emit a length 60 copy, encoded as 3 bytes.
   592	MOVD $0xee, R1
   593	MOVB R1, 0(R8)
   594	MOVW R11, 1(R8)
   595	ADD  $3, R8, R8
   596	SUBW $60, R3, R3
   597
   598inlineEmitCopyStep2:
   599	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
   600	MOVW $12, R2
   601	CMPW R2, R3
   602	BGE  inlineEmitCopyStep3
   603	MOVW $2048, R2
   604	CMPW R2, R11
   605	BGE  inlineEmitCopyStep3
   606
   607	// Emit the remaining copy, encoded as 2 bytes.
   608	MOVB R11, 1(R8)
   609	LSRW $8, R11, R11
   610	LSLW $5, R11, R11
   611	SUBW $4, R3, R3
   612	AND  $0xff, R3, R3
   613	LSLW $2, R3, R3
   614	ORRW R3, R11, R11
   615	ORRW $1, R11, R11
   616	MOVB R11, 0(R8)
   617	ADD  $2, R8, R8
   618	B    inlineEmitCopyEnd
   619
   620inlineEmitCopyStep3:
   621	// Emit the remaining copy, encoded as 3 bytes.
   622	SUBW $1, R3, R3
   623	LSLW $2, R3, R3
   624	ORRW $2, R3, R3
   625	MOVB R3, 0(R8)
   626	MOVW R11, 1(R8)
   627	ADD  $3, R8, R8
   628
   629inlineEmitCopyEnd:
   630	// End inline of the emitCopy call.
   631	// ----------------------------------------
   632
   633	// nextEmit = s
   634	MOVD R7, R10
   635
   636	// if s >= sLimit { goto emitRemainder }
   637	MOVD R7, R3
   638	SUB  R6, R3, R3
   639	CMP  R3, R9
   640	BLS  emitRemainder
   641
   642	// As per the encode_other.go code:
   643	//
   644	// We could immediately etc.
   645
   646	// x := load64(src, s-1)
   647	MOVD -1(R7), R14
   648
   649	// prevHash := hash(uint32(x>>0), shift)
   650	MOVW R14, R11
   651	MULW R16, R11, R11
   652	LSRW R5, R11, R11
   653
   654	// table[prevHash] = uint16(s-1)
   655	MOVD R7, R3
   656	SUB  R6, R3, R3
   657	SUB  $1, R3, R3
   658
   659	MOVHU R3, 0(R17)(R11<<1)
   660
   661	// currHash := hash(uint32(x>>8), shift)
   662	LSR  $8, R14, R14
   663	MOVW R14, R11
   664	MULW R16, R11, R11
   665	LSRW R5, R11, R11
   666
   667	// candidate = int(table[currHash])
   668	MOVHU 0(R17)(R11<<1), R15
   669
   670	// table[currHash] = uint16(s)
   671	ADD   $1, R3, R3
   672	MOVHU R3, 0(R17)(R11<<1)
   673
   674	// if uint32(x>>8) == load32(src, candidate) { continue }
   675	MOVW (R6)(R15), R4
   676	CMPW R4, R14
   677	BEQ  inner1
   678
   679	// nextHash = hash(uint32(x>>16), shift)
   680	LSR  $8, R14, R14
   681	MOVW R14, R11
   682	MULW R16, R11, R11
   683	LSRW R5, R11, R11
   684
   685	// s++
   686	ADD $1, R7, R7
   687
   688	// break out of the inner1 for loop, i.e. continue the outer loop.
   689	B outer
   690
   691emitRemainder:
   692	// if nextEmit < len(src) { etc }
   693	MOVD src_len+32(FP), R3
   694	ADD  R6, R3, R3
   695	CMP  R3, R10
   696	BEQ  encodeBlockEnd
   697
   698	// d += emitLiteral(dst[d:], src[nextEmit:])
   699	//
   700	// Push args.
   701	MOVD R8, 8(RSP)
   702	MOVD $0, 16(RSP)  // Unnecessary, as the callee ignores it, but conservative.
   703	MOVD $0, 24(RSP)  // Unnecessary, as the callee ignores it, but conservative.
   704	MOVD R10, 32(RSP)
   705	SUB  R10, R3, R3
   706	MOVD R3, 40(RSP)
   707	MOVD R3, 48(RSP)  // Unnecessary, as the callee ignores it, but conservative.
   708
   709	// Spill local variables (registers) onto the stack; call; unspill.
   710	MOVD R8, 88(RSP)
   711	CALL ·emitLiteral(SB)
   712	MOVD 88(RSP), R8
   713
   714	// Finish the "d +=" part of "d += emitLiteral(etc)".
   715	MOVD 56(RSP), R1
   716	ADD  R1, R8, R8
   717
   718encodeBlockEnd:
   719	MOVD dst_base+0(FP), R3
   720	SUB  R3, R8, R8
   721	MOVD R8, d+48(FP)
   722	RET
View as plain text