...

Text file src/github.com/klauspost/compress/s2/encodeblock_amd64.s

Documentation: github.com/klauspost/compress/s2

     1// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
     2
     3//go:build !appengine && !noasm && gc && !noasm
     4
     5#include "textflag.h"
     6
     7// func _dummy_()
     8TEXT ·_dummy_(SB), $0
     9#ifdef GOAMD64_v4
    10#ifndef GOAMD64_v3
    11#define GOAMD64_v3
    12#endif
    13#endif
    14	RET
    15
    16// func encodeBlockAsm(dst []byte, src []byte) int
    17// Requires: BMI, SSE2
    18TEXT ·encodeBlockAsm(SB), $65560-56
    19	MOVQ dst_base+0(FP), AX
    20	MOVQ $0x00000200, CX
    21	LEAQ 24(SP), DX
    22	PXOR X0, X0
    23
    24zero_loop_encodeBlockAsm:
    25	MOVOU X0, (DX)
    26	MOVOU X0, 16(DX)
    27	MOVOU X0, 32(DX)
    28	MOVOU X0, 48(DX)
    29	MOVOU X0, 64(DX)
    30	MOVOU X0, 80(DX)
    31	MOVOU X0, 96(DX)
    32	MOVOU X0, 112(DX)
    33	ADDQ  $0x80, DX
    34	DECQ  CX
    35	JNZ   zero_loop_encodeBlockAsm
    36	MOVL  $0x00000000, 12(SP)
    37	MOVQ  src_len+32(FP), CX
    38	LEAQ  -9(CX), DX
    39	LEAQ  -8(CX), BX
    40	MOVL  BX, 8(SP)
    41	SHRQ  $0x05, CX
    42	SUBL  CX, DX
    43	LEAQ  (AX)(DX*1), DX
    44	MOVQ  DX, (SP)
    45	MOVL  $0x00000001, CX
    46	MOVL  CX, 16(SP)
    47	MOVQ  src_base+24(FP), DX
    48
    49search_loop_encodeBlockAsm:
    50	MOVL  CX, BX
    51	SUBL  12(SP), BX
    52	SHRL  $0x06, BX
    53	LEAL  4(CX)(BX*1), BX
    54	CMPL  BX, 8(SP)
    55	JAE   emit_remainder_encodeBlockAsm
    56	MOVQ  (DX)(CX*1), SI
    57	MOVL  BX, 20(SP)
    58	MOVQ  $0x0000cf1bbcdcbf9b, R8
    59	MOVQ  SI, R9
    60	MOVQ  SI, R10
    61	SHRQ  $0x08, R10
    62	SHLQ  $0x10, R9
    63	IMULQ R8, R9
    64	SHRQ  $0x32, R9
    65	SHLQ  $0x10, R10
    66	IMULQ R8, R10
    67	SHRQ  $0x32, R10
    68	MOVL  24(SP)(R9*4), BX
    69	MOVL  24(SP)(R10*4), DI
    70	MOVL  CX, 24(SP)(R9*4)
    71	LEAL  1(CX), R9
    72	MOVL  R9, 24(SP)(R10*4)
    73	MOVQ  SI, R9
    74	SHRQ  $0x10, R9
    75	SHLQ  $0x10, R9
    76	IMULQ R8, R9
    77	SHRQ  $0x32, R9
    78	MOVL  CX, R8
    79	SUBL  16(SP), R8
    80	MOVL  1(DX)(R8*1), R10
    81	MOVQ  SI, R8
    82	SHRQ  $0x08, R8
    83	CMPL  R8, R10
    84	JNE   no_repeat_found_encodeBlockAsm
    85	LEAL  1(CX), SI
    86	MOVL  12(SP), DI
    87	MOVL  SI, BX
    88	SUBL  16(SP), BX
    89	JZ    repeat_extend_back_end_encodeBlockAsm
    90
    91repeat_extend_back_loop_encodeBlockAsm:
    92	CMPL SI, DI
    93	JBE  repeat_extend_back_end_encodeBlockAsm
    94	MOVB -1(DX)(BX*1), R8
    95	MOVB -1(DX)(SI*1), R9
    96	CMPB R8, R9
    97	JNE  repeat_extend_back_end_encodeBlockAsm
    98	LEAL -1(SI), SI
    99	DECL BX
   100	JNZ  repeat_extend_back_loop_encodeBlockAsm
   101
   102repeat_extend_back_end_encodeBlockAsm:
   103	MOVL SI, BX
   104	SUBL 12(SP), BX
   105	LEAQ 5(AX)(BX*1), BX
   106	CMPQ BX, (SP)
   107	JB   repeat_dst_size_check_encodeBlockAsm
   108	MOVQ $0x00000000, ret+48(FP)
   109	RET
   110
   111repeat_dst_size_check_encodeBlockAsm:
   112	MOVL 12(SP), BX
   113	CMPL BX, SI
   114	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
   115	MOVL SI, R8
   116	MOVL SI, 12(SP)
   117	LEAQ (DX)(BX*1), R9
   118	SUBL BX, R8
   119	LEAL -1(R8), BX
   120	CMPL BX, $0x3c
   121	JB   one_byte_repeat_emit_encodeBlockAsm
   122	CMPL BX, $0x00000100
   123	JB   two_bytes_repeat_emit_encodeBlockAsm
   124	CMPL BX, $0x00010000
   125	JB   three_bytes_repeat_emit_encodeBlockAsm
   126	CMPL BX, $0x01000000
   127	JB   four_bytes_repeat_emit_encodeBlockAsm
   128	MOVB $0xfc, (AX)
   129	MOVL BX, 1(AX)
   130	ADDQ $0x05, AX
   131	JMP  memmove_long_repeat_emit_encodeBlockAsm
   132
   133four_bytes_repeat_emit_encodeBlockAsm:
   134	MOVL BX, R10
   135	SHRL $0x10, R10
   136	MOVB $0xf8, (AX)
   137	MOVW BX, 1(AX)
   138	MOVB R10, 3(AX)
   139	ADDQ $0x04, AX
   140	JMP  memmove_long_repeat_emit_encodeBlockAsm
   141
   142three_bytes_repeat_emit_encodeBlockAsm:
   143	MOVB $0xf4, (AX)
   144	MOVW BX, 1(AX)
   145	ADDQ $0x03, AX
   146	JMP  memmove_long_repeat_emit_encodeBlockAsm
   147
   148two_bytes_repeat_emit_encodeBlockAsm:
   149	MOVB $0xf0, (AX)
   150	MOVB BL, 1(AX)
   151	ADDQ $0x02, AX
   152	CMPL BX, $0x40
   153	JB   memmove_repeat_emit_encodeBlockAsm
   154	JMP  memmove_long_repeat_emit_encodeBlockAsm
   155
   156one_byte_repeat_emit_encodeBlockAsm:
   157	SHLB $0x02, BL
   158	MOVB BL, (AX)
   159	ADDQ $0x01, AX
   160
   161memmove_repeat_emit_encodeBlockAsm:
   162	LEAQ (AX)(R8*1), BX
   163
   164	// genMemMoveShort
   165	CMPQ R8, $0x08
   166	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
   167	CMPQ R8, $0x10
   168	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
   169	CMPQ R8, $0x20
   170	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
   171	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
   172
   173emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
   174	MOVQ (R9), R10
   175	MOVQ R10, (AX)
   176	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
   177
   178emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
   179	MOVQ (R9), R10
   180	MOVQ -8(R9)(R8*1), R9
   181	MOVQ R10, (AX)
   182	MOVQ R9, -8(AX)(R8*1)
   183	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
   184
   185emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
   186	MOVOU (R9), X0
   187	MOVOU -16(R9)(R8*1), X1
   188	MOVOU X0, (AX)
   189	MOVOU X1, -16(AX)(R8*1)
   190	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm
   191
   192emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
   193	MOVOU (R9), X0
   194	MOVOU 16(R9), X1
   195	MOVOU -32(R9)(R8*1), X2
   196	MOVOU -16(R9)(R8*1), X3
   197	MOVOU X0, (AX)
   198	MOVOU X1, 16(AX)
   199	MOVOU X2, -32(AX)(R8*1)
   200	MOVOU X3, -16(AX)(R8*1)
   201
   202memmove_end_copy_repeat_emit_encodeBlockAsm:
   203	MOVQ BX, AX
   204	JMP  emit_literal_done_repeat_emit_encodeBlockAsm
   205
   206memmove_long_repeat_emit_encodeBlockAsm:
   207	LEAQ (AX)(R8*1), BX
   208
   209	// genMemMoveLong
   210	MOVOU (R9), X0
   211	MOVOU 16(R9), X1
   212	MOVOU -32(R9)(R8*1), X2
   213	MOVOU -16(R9)(R8*1), X3
   214	MOVQ  R8, R11
   215	SHRQ  $0x05, R11
   216	MOVQ  AX, R10
   217	ANDL  $0x0000001f, R10
   218	MOVQ  $0x00000040, R12
   219	SUBQ  R10, R12
   220	DECQ  R11
   221	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
   222	LEAQ  -32(R9)(R12*1), R10
   223	LEAQ  -32(AX)(R12*1), R13
   224
   225emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
   226	MOVOU (R10), X4
   227	MOVOU 16(R10), X5
   228	MOVOA X4, (R13)
   229	MOVOA X5, 16(R13)
   230	ADDQ  $0x20, R13
   231	ADDQ  $0x20, R10
   232	ADDQ  $0x20, R12
   233	DECQ  R11
   234	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
   235
   236emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
   237	MOVOU -32(R9)(R12*1), X4
   238	MOVOU -16(R9)(R12*1), X5
   239	MOVOA X4, -32(AX)(R12*1)
   240	MOVOA X5, -16(AX)(R12*1)
   241	ADDQ  $0x20, R12
   242	CMPQ  R8, R12
   243	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
   244	MOVOU X0, (AX)
   245	MOVOU X1, 16(AX)
   246	MOVOU X2, -32(AX)(R8*1)
   247	MOVOU X3, -16(AX)(R8*1)
   248	MOVQ  BX, AX
   249
   250emit_literal_done_repeat_emit_encodeBlockAsm:
   251	ADDL $0x05, CX
   252	MOVL CX, BX
   253	SUBL 16(SP), BX
   254	MOVQ src_len+32(FP), R8
   255	SUBL CX, R8
   256	LEAQ (DX)(CX*1), R9
   257	LEAQ (DX)(BX*1), BX
   258
   259	// matchLen
   260	XORL R11, R11
   261
   262matchlen_loopback_16_repeat_extend_encodeBlockAsm:
   263	CMPL R8, $0x10
   264	JB   matchlen_match8_repeat_extend_encodeBlockAsm
   265	MOVQ (R9)(R11*1), R10
   266	MOVQ 8(R9)(R11*1), R12
   267	XORQ (BX)(R11*1), R10
   268	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
   269	XORQ 8(BX)(R11*1), R12
   270	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm
   271	LEAL -16(R8), R8
   272	LEAL 16(R11), R11
   273	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm
   274
   275matchlen_bsf_16repeat_extend_encodeBlockAsm:
   276#ifdef GOAMD64_v3
   277	TZCNTQ R12, R12
   278
   279#else
   280	BSFQ R12, R12
   281
   282#endif
   283	SARQ $0x03, R12
   284	LEAL 8(R11)(R12*1), R11
   285	JMP  repeat_extend_forward_end_encodeBlockAsm
   286
   287matchlen_match8_repeat_extend_encodeBlockAsm:
   288	CMPL R8, $0x08
   289	JB   matchlen_match4_repeat_extend_encodeBlockAsm
   290	MOVQ (R9)(R11*1), R10
   291	XORQ (BX)(R11*1), R10
   292	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
   293	LEAL -8(R8), R8
   294	LEAL 8(R11), R11
   295	JMP  matchlen_match4_repeat_extend_encodeBlockAsm
   296
   297matchlen_bsf_8_repeat_extend_encodeBlockAsm:
   298#ifdef GOAMD64_v3
   299	TZCNTQ R10, R10
   300
   301#else
   302	BSFQ R10, R10
   303
   304#endif
   305	SARQ $0x03, R10
   306	LEAL (R11)(R10*1), R11
   307	JMP  repeat_extend_forward_end_encodeBlockAsm
   308
   309matchlen_match4_repeat_extend_encodeBlockAsm:
   310	CMPL R8, $0x04
   311	JB   matchlen_match2_repeat_extend_encodeBlockAsm
   312	MOVL (R9)(R11*1), R10
   313	CMPL (BX)(R11*1), R10
   314	JNE  matchlen_match2_repeat_extend_encodeBlockAsm
   315	LEAL -4(R8), R8
   316	LEAL 4(R11), R11
   317
   318matchlen_match2_repeat_extend_encodeBlockAsm:
   319	CMPL R8, $0x01
   320	JE   matchlen_match1_repeat_extend_encodeBlockAsm
   321	JB   repeat_extend_forward_end_encodeBlockAsm
   322	MOVW (R9)(R11*1), R10
   323	CMPW (BX)(R11*1), R10
   324	JNE  matchlen_match1_repeat_extend_encodeBlockAsm
   325	LEAL 2(R11), R11
   326	SUBL $0x02, R8
   327	JZ   repeat_extend_forward_end_encodeBlockAsm
   328
   329matchlen_match1_repeat_extend_encodeBlockAsm:
   330	MOVB (R9)(R11*1), R10
   331	CMPB (BX)(R11*1), R10
   332	JNE  repeat_extend_forward_end_encodeBlockAsm
   333	LEAL 1(R11), R11
   334
   335repeat_extend_forward_end_encodeBlockAsm:
   336	ADDL  R11, CX
   337	MOVL  CX, BX
   338	SUBL  SI, BX
   339	MOVL  16(SP), SI
   340	TESTL DI, DI
   341	JZ    repeat_as_copy_encodeBlockAsm
   342
   343	// emitRepeat
   344emit_repeat_again_match_repeat_encodeBlockAsm:
   345	MOVL BX, DI
   346	LEAL -4(BX), BX
   347	CMPL DI, $0x08
   348	JBE  repeat_two_match_repeat_encodeBlockAsm
   349	CMPL DI, $0x0c
   350	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
   351	CMPL SI, $0x00000800
   352	JB   repeat_two_offset_match_repeat_encodeBlockAsm
   353
   354cant_repeat_two_offset_match_repeat_encodeBlockAsm:
   355	CMPL BX, $0x00000104
   356	JB   repeat_three_match_repeat_encodeBlockAsm
   357	CMPL BX, $0x00010100
   358	JB   repeat_four_match_repeat_encodeBlockAsm
   359	CMPL BX, $0x0100ffff
   360	JB   repeat_five_match_repeat_encodeBlockAsm
   361	LEAL -16842747(BX), BX
   362	MOVL $0xfffb001d, (AX)
   363	MOVB $0xff, 4(AX)
   364	ADDQ $0x05, AX
   365	JMP  emit_repeat_again_match_repeat_encodeBlockAsm
   366
   367repeat_five_match_repeat_encodeBlockAsm:
   368	LEAL -65536(BX), BX
   369	MOVL BX, SI
   370	MOVW $0x001d, (AX)
   371	MOVW BX, 2(AX)
   372	SARL $0x10, SI
   373	MOVB SI, 4(AX)
   374	ADDQ $0x05, AX
   375	JMP  repeat_end_emit_encodeBlockAsm
   376
   377repeat_four_match_repeat_encodeBlockAsm:
   378	LEAL -256(BX), BX
   379	MOVW $0x0019, (AX)
   380	MOVW BX, 2(AX)
   381	ADDQ $0x04, AX
   382	JMP  repeat_end_emit_encodeBlockAsm
   383
   384repeat_three_match_repeat_encodeBlockAsm:
   385	LEAL -4(BX), BX
   386	MOVW $0x0015, (AX)
   387	MOVB BL, 2(AX)
   388	ADDQ $0x03, AX
   389	JMP  repeat_end_emit_encodeBlockAsm
   390
   391repeat_two_match_repeat_encodeBlockAsm:
   392	SHLL $0x02, BX
   393	ORL  $0x01, BX
   394	MOVW BX, (AX)
   395	ADDQ $0x02, AX
   396	JMP  repeat_end_emit_encodeBlockAsm
   397
   398repeat_two_offset_match_repeat_encodeBlockAsm:
   399	XORQ DI, DI
   400	LEAL 1(DI)(BX*4), BX
   401	MOVB SI, 1(AX)
   402	SARL $0x08, SI
   403	SHLL $0x05, SI
   404	ORL  SI, BX
   405	MOVB BL, (AX)
   406	ADDQ $0x02, AX
   407	JMP  repeat_end_emit_encodeBlockAsm
   408
   409repeat_as_copy_encodeBlockAsm:
   410	// emitCopy
   411	CMPL SI, $0x00010000
   412	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm
   413	CMPL BX, $0x40
   414	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
   415	MOVB $0xff, (AX)
   416	MOVL SI, 1(AX)
   417	LEAL -64(BX), BX
   418	ADDQ $0x05, AX
   419	CMPL BX, $0x04
   420	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm
   421
   422	// emitRepeat
   423emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
   424	MOVL BX, DI
   425	LEAL -4(BX), BX
   426	CMPL DI, $0x08
   427	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
   428	CMPL DI, $0x0c
   429	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
   430	CMPL SI, $0x00000800
   431	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
   432
   433cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
   434	CMPL BX, $0x00000104
   435	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
   436	CMPL BX, $0x00010100
   437	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
   438	CMPL BX, $0x0100ffff
   439	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
   440	LEAL -16842747(BX), BX
   441	MOVL $0xfffb001d, (AX)
   442	MOVB $0xff, 4(AX)
   443	ADDQ $0x05, AX
   444	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
   445
   446repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
   447	LEAL -65536(BX), BX
   448	MOVL BX, SI
   449	MOVW $0x001d, (AX)
   450	MOVW BX, 2(AX)
   451	SARL $0x10, SI
   452	MOVB SI, 4(AX)
   453	ADDQ $0x05, AX
   454	JMP  repeat_end_emit_encodeBlockAsm
   455
   456repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
   457	LEAL -256(BX), BX
   458	MOVW $0x0019, (AX)
   459	MOVW BX, 2(AX)
   460	ADDQ $0x04, AX
   461	JMP  repeat_end_emit_encodeBlockAsm
   462
   463repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
   464	LEAL -4(BX), BX
   465	MOVW $0x0015, (AX)
   466	MOVB BL, 2(AX)
   467	ADDQ $0x03, AX
   468	JMP  repeat_end_emit_encodeBlockAsm
   469
   470repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
   471	SHLL $0x02, BX
   472	ORL  $0x01, BX
   473	MOVW BX, (AX)
   474	ADDQ $0x02, AX
   475	JMP  repeat_end_emit_encodeBlockAsm
   476
   477repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
   478	XORQ DI, DI
   479	LEAL 1(DI)(BX*4), BX
   480	MOVB SI, 1(AX)
   481	SARL $0x08, SI
   482	SHLL $0x05, SI
   483	ORL  SI, BX
   484	MOVB BL, (AX)
   485	ADDQ $0x02, AX
   486	JMP  repeat_end_emit_encodeBlockAsm
   487
   488four_bytes_remain_repeat_as_copy_encodeBlockAsm:
   489	TESTL BX, BX
   490	JZ    repeat_end_emit_encodeBlockAsm
   491	XORL  DI, DI
   492	LEAL  -1(DI)(BX*4), BX
   493	MOVB  BL, (AX)
   494	MOVL  SI, 1(AX)
   495	ADDQ  $0x05, AX
   496	JMP   repeat_end_emit_encodeBlockAsm
   497
   498two_byte_offset_repeat_as_copy_encodeBlockAsm:
   499	CMPL BX, $0x40
   500	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
   501	CMPL SI, $0x00000800
   502	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm
   503	MOVL $0x00000001, DI
   504	LEAL 16(DI), DI
   505	MOVB SI, 1(AX)
   506	MOVL SI, R8
   507	SHRL $0x08, R8
   508	SHLL $0x05, R8
   509	ORL  R8, DI
   510	MOVB DI, (AX)
   511	ADDQ $0x02, AX
   512	SUBL $0x08, BX
   513
   514	// emitRepeat
   515	LEAL -4(BX), BX
   516	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   517
   518emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   519	MOVL BX, DI
   520	LEAL -4(BX), BX
   521	CMPL DI, $0x08
   522	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   523	CMPL DI, $0x0c
   524	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   525	CMPL SI, $0x00000800
   526	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   527
   528cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   529	CMPL BX, $0x00000104
   530	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   531	CMPL BX, $0x00010100
   532	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   533	CMPL BX, $0x0100ffff
   534	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   535	LEAL -16842747(BX), BX
   536	MOVL $0xfffb001d, (AX)
   537	MOVB $0xff, 4(AX)
   538	ADDQ $0x05, AX
   539	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
   540
   541repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   542	LEAL -65536(BX), BX
   543	MOVL BX, SI
   544	MOVW $0x001d, (AX)
   545	MOVW BX, 2(AX)
   546	SARL $0x10, SI
   547	MOVB SI, 4(AX)
   548	ADDQ $0x05, AX
   549	JMP  repeat_end_emit_encodeBlockAsm
   550
   551repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   552	LEAL -256(BX), BX
   553	MOVW $0x0019, (AX)
   554	MOVW BX, 2(AX)
   555	ADDQ $0x04, AX
   556	JMP  repeat_end_emit_encodeBlockAsm
   557
   558repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   559	LEAL -4(BX), BX
   560	MOVW $0x0015, (AX)
   561	MOVB BL, 2(AX)
   562	ADDQ $0x03, AX
   563	JMP  repeat_end_emit_encodeBlockAsm
   564
   565repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   566	SHLL $0x02, BX
   567	ORL  $0x01, BX
   568	MOVW BX, (AX)
   569	ADDQ $0x02, AX
   570	JMP  repeat_end_emit_encodeBlockAsm
   571
   572repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
   573	XORQ DI, DI
   574	LEAL 1(DI)(BX*4), BX
   575	MOVB SI, 1(AX)
   576	SARL $0x08, SI
   577	SHLL $0x05, SI
   578	ORL  SI, BX
   579	MOVB BL, (AX)
   580	ADDQ $0x02, AX
   581	JMP  repeat_end_emit_encodeBlockAsm
   582
   583long_offset_short_repeat_as_copy_encodeBlockAsm:
   584	MOVB $0xee, (AX)
   585	MOVW SI, 1(AX)
   586	LEAL -60(BX), BX
   587	ADDQ $0x03, AX
   588
   589	// emitRepeat
   590emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   591	MOVL BX, DI
   592	LEAL -4(BX), BX
   593	CMPL DI, $0x08
   594	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
   595	CMPL DI, $0x0c
   596	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
   597	CMPL SI, $0x00000800
   598	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
   599
   600cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   601	CMPL BX, $0x00000104
   602	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
   603	CMPL BX, $0x00010100
   604	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
   605	CMPL BX, $0x0100ffff
   606	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
   607	LEAL -16842747(BX), BX
   608	MOVL $0xfffb001d, (AX)
   609	MOVB $0xff, 4(AX)
   610	ADDQ $0x05, AX
   611	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
   612
   613repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   614	LEAL -65536(BX), BX
   615	MOVL BX, SI
   616	MOVW $0x001d, (AX)
   617	MOVW BX, 2(AX)
   618	SARL $0x10, SI
   619	MOVB SI, 4(AX)
   620	ADDQ $0x05, AX
   621	JMP  repeat_end_emit_encodeBlockAsm
   622
   623repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   624	LEAL -256(BX), BX
   625	MOVW $0x0019, (AX)
   626	MOVW BX, 2(AX)
   627	ADDQ $0x04, AX
   628	JMP  repeat_end_emit_encodeBlockAsm
   629
   630repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   631	LEAL -4(BX), BX
   632	MOVW $0x0015, (AX)
   633	MOVB BL, 2(AX)
   634	ADDQ $0x03, AX
   635	JMP  repeat_end_emit_encodeBlockAsm
   636
   637repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   638	SHLL $0x02, BX
   639	ORL  $0x01, BX
   640	MOVW BX, (AX)
   641	ADDQ $0x02, AX
   642	JMP  repeat_end_emit_encodeBlockAsm
   643
   644repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
   645	XORQ DI, DI
   646	LEAL 1(DI)(BX*4), BX
   647	MOVB SI, 1(AX)
   648	SARL $0x08, SI
   649	SHLL $0x05, SI
   650	ORL  SI, BX
   651	MOVB BL, (AX)
   652	ADDQ $0x02, AX
   653	JMP  repeat_end_emit_encodeBlockAsm
   654
   655two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
   656	MOVL BX, DI
   657	SHLL $0x02, DI
   658	CMPL BX, $0x0c
   659	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
   660	CMPL SI, $0x00000800
   661	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
   662	LEAL -15(DI), DI
   663	MOVB SI, 1(AX)
   664	SHRL $0x08, SI
   665	SHLL $0x05, SI
   666	ORL  SI, DI
   667	MOVB DI, (AX)
   668	ADDQ $0x02, AX
   669	JMP  repeat_end_emit_encodeBlockAsm
   670
   671emit_copy_three_repeat_as_copy_encodeBlockAsm:
   672	LEAL -2(DI), DI
   673	MOVB DI, (AX)
   674	MOVW SI, 1(AX)
   675	ADDQ $0x03, AX
   676
   677repeat_end_emit_encodeBlockAsm:
   678	MOVL CX, 12(SP)
   679	JMP  search_loop_encodeBlockAsm
   680
   681no_repeat_found_encodeBlockAsm:
   682	CMPL (DX)(BX*1), SI
   683	JEQ  candidate_match_encodeBlockAsm
   684	SHRQ $0x08, SI
   685	MOVL 24(SP)(R9*4), BX
   686	LEAL 2(CX), R8
   687	CMPL (DX)(DI*1), SI
   688	JEQ  candidate2_match_encodeBlockAsm
   689	MOVL R8, 24(SP)(R9*4)
   690	SHRQ $0x08, SI
   691	CMPL (DX)(BX*1), SI
   692	JEQ  candidate3_match_encodeBlockAsm
   693	MOVL 20(SP), CX
   694	JMP  search_loop_encodeBlockAsm
   695
   696candidate3_match_encodeBlockAsm:
   697	ADDL $0x02, CX
   698	JMP  candidate_match_encodeBlockAsm
   699
   700candidate2_match_encodeBlockAsm:
   701	MOVL R8, 24(SP)(R9*4)
   702	INCL CX
   703	MOVL DI, BX
   704
   705candidate_match_encodeBlockAsm:
   706	MOVL  12(SP), SI
   707	TESTL BX, BX
   708	JZ    match_extend_back_end_encodeBlockAsm
   709
   710match_extend_back_loop_encodeBlockAsm:
   711	CMPL CX, SI
   712	JBE  match_extend_back_end_encodeBlockAsm
   713	MOVB -1(DX)(BX*1), DI
   714	MOVB -1(DX)(CX*1), R8
   715	CMPB DI, R8
   716	JNE  match_extend_back_end_encodeBlockAsm
   717	LEAL -1(CX), CX
   718	DECL BX
   719	JZ   match_extend_back_end_encodeBlockAsm
   720	JMP  match_extend_back_loop_encodeBlockAsm
   721
   722match_extend_back_end_encodeBlockAsm:
   723	MOVL CX, SI
   724	SUBL 12(SP), SI
   725	LEAQ 5(AX)(SI*1), SI
   726	CMPQ SI, (SP)
   727	JB   match_dst_size_check_encodeBlockAsm
   728	MOVQ $0x00000000, ret+48(FP)
   729	RET
   730
   731match_dst_size_check_encodeBlockAsm:
   732	MOVL CX, SI
   733	MOVL 12(SP), DI
   734	CMPL DI, SI
   735	JEQ  emit_literal_done_match_emit_encodeBlockAsm
   736	MOVL SI, R8
   737	MOVL SI, 12(SP)
   738	LEAQ (DX)(DI*1), SI
   739	SUBL DI, R8
   740	LEAL -1(R8), DI
   741	CMPL DI, $0x3c
   742	JB   one_byte_match_emit_encodeBlockAsm
   743	CMPL DI, $0x00000100
   744	JB   two_bytes_match_emit_encodeBlockAsm
   745	CMPL DI, $0x00010000
   746	JB   three_bytes_match_emit_encodeBlockAsm
   747	CMPL DI, $0x01000000
   748	JB   four_bytes_match_emit_encodeBlockAsm
   749	MOVB $0xfc, (AX)
   750	MOVL DI, 1(AX)
   751	ADDQ $0x05, AX
   752	JMP  memmove_long_match_emit_encodeBlockAsm
   753
   754four_bytes_match_emit_encodeBlockAsm:
   755	MOVL DI, R9
   756	SHRL $0x10, R9
   757	MOVB $0xf8, (AX)
   758	MOVW DI, 1(AX)
   759	MOVB R9, 3(AX)
   760	ADDQ $0x04, AX
   761	JMP  memmove_long_match_emit_encodeBlockAsm
   762
   763three_bytes_match_emit_encodeBlockAsm:
   764	MOVB $0xf4, (AX)
   765	MOVW DI, 1(AX)
   766	ADDQ $0x03, AX
   767	JMP  memmove_long_match_emit_encodeBlockAsm
   768
   769two_bytes_match_emit_encodeBlockAsm:
   770	MOVB $0xf0, (AX)
   771	MOVB DI, 1(AX)
   772	ADDQ $0x02, AX
   773	CMPL DI, $0x40
   774	JB   memmove_match_emit_encodeBlockAsm
   775	JMP  memmove_long_match_emit_encodeBlockAsm
   776
   777one_byte_match_emit_encodeBlockAsm:
   778	SHLB $0x02, DI
   779	MOVB DI, (AX)
   780	ADDQ $0x01, AX
   781
   782memmove_match_emit_encodeBlockAsm:
   783	LEAQ (AX)(R8*1), DI
   784
   785	// genMemMoveShort
   786	CMPQ R8, $0x08
   787	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
   788	CMPQ R8, $0x10
   789	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
   790	CMPQ R8, $0x20
   791	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
   792	JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
   793
   794emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
   795	MOVQ (SI), R9
   796	MOVQ R9, (AX)
   797	JMP  memmove_end_copy_match_emit_encodeBlockAsm
   798
   799emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
   800	MOVQ (SI), R9
   801	MOVQ -8(SI)(R8*1), SI
   802	MOVQ R9, (AX)
   803	MOVQ SI, -8(AX)(R8*1)
   804	JMP  memmove_end_copy_match_emit_encodeBlockAsm
   805
   806emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
   807	MOVOU (SI), X0
   808	MOVOU -16(SI)(R8*1), X1
   809	MOVOU X0, (AX)
   810	MOVOU X1, -16(AX)(R8*1)
   811	JMP   memmove_end_copy_match_emit_encodeBlockAsm
   812
   813emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
   814	MOVOU (SI), X0
   815	MOVOU 16(SI), X1
   816	MOVOU -32(SI)(R8*1), X2
   817	MOVOU -16(SI)(R8*1), X3
   818	MOVOU X0, (AX)
   819	MOVOU X1, 16(AX)
   820	MOVOU X2, -32(AX)(R8*1)
   821	MOVOU X3, -16(AX)(R8*1)
   822
   823memmove_end_copy_match_emit_encodeBlockAsm:
   824	MOVQ DI, AX
   825	JMP  emit_literal_done_match_emit_encodeBlockAsm
   826
   827memmove_long_match_emit_encodeBlockAsm:
   828	LEAQ (AX)(R8*1), DI
   829
   830	// genMemMoveLong
   831	MOVOU (SI), X0
   832	MOVOU 16(SI), X1
   833	MOVOU -32(SI)(R8*1), X2
   834	MOVOU -16(SI)(R8*1), X3
   835	MOVQ  R8, R10
   836	SHRQ  $0x05, R10
   837	MOVQ  AX, R9
   838	ANDL  $0x0000001f, R9
   839	MOVQ  $0x00000040, R11
   840	SUBQ  R9, R11
   841	DECQ  R10
   842	JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
   843	LEAQ  -32(SI)(R11*1), R9
   844	LEAQ  -32(AX)(R11*1), R12
   845
   846emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
   847	MOVOU (R9), X4
   848	MOVOU 16(R9), X5
   849	MOVOA X4, (R12)
   850	MOVOA X5, 16(R12)
   851	ADDQ  $0x20, R12
   852	ADDQ  $0x20, R9
   853	ADDQ  $0x20, R11
   854	DECQ  R10
   855	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
   856
   857emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
   858	MOVOU -32(SI)(R11*1), X4
   859	MOVOU -16(SI)(R11*1), X5
   860	MOVOA X4, -32(AX)(R11*1)
   861	MOVOA X5, -16(AX)(R11*1)
   862	ADDQ  $0x20, R11
   863	CMPQ  R8, R11
   864	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
   865	MOVOU X0, (AX)
   866	MOVOU X1, 16(AX)
   867	MOVOU X2, -32(AX)(R8*1)
   868	MOVOU X3, -16(AX)(R8*1)
   869	MOVQ  DI, AX
   870
   871emit_literal_done_match_emit_encodeBlockAsm:
   872match_nolit_loop_encodeBlockAsm:
   873	MOVL CX, SI
   874	SUBL BX, SI
   875	MOVL SI, 16(SP)
   876	ADDL $0x04, CX
   877	ADDL $0x04, BX
   878	MOVQ src_len+32(FP), SI
   879	SUBL CX, SI
   880	LEAQ (DX)(CX*1), DI
   881	LEAQ (DX)(BX*1), BX
   882
   883	// matchLen
   884	XORL R9, R9
   885
   886matchlen_loopback_16_match_nolit_encodeBlockAsm:
   887	CMPL SI, $0x10
   888	JB   matchlen_match8_match_nolit_encodeBlockAsm
   889	MOVQ (DI)(R9*1), R8
   890	MOVQ 8(DI)(R9*1), R10
   891	XORQ (BX)(R9*1), R8
   892	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
   893	XORQ 8(BX)(R9*1), R10
   894	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm
   895	LEAL -16(SI), SI
   896	LEAL 16(R9), R9
   897	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm
   898
   899matchlen_bsf_16match_nolit_encodeBlockAsm:
   900#ifdef GOAMD64_v3
   901	TZCNTQ R10, R10
   902
   903#else
   904	BSFQ R10, R10
   905
   906#endif
   907	SARQ $0x03, R10
   908	LEAL 8(R9)(R10*1), R9
   909	JMP  match_nolit_end_encodeBlockAsm
   910
   911matchlen_match8_match_nolit_encodeBlockAsm:
   912	CMPL SI, $0x08
   913	JB   matchlen_match4_match_nolit_encodeBlockAsm
   914	MOVQ (DI)(R9*1), R8
   915	XORQ (BX)(R9*1), R8
   916	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
   917	LEAL -8(SI), SI
   918	LEAL 8(R9), R9
   919	JMP  matchlen_match4_match_nolit_encodeBlockAsm
   920
   921matchlen_bsf_8_match_nolit_encodeBlockAsm:
   922#ifdef GOAMD64_v3
   923	TZCNTQ R8, R8
   924
   925#else
   926	BSFQ R8, R8
   927
   928#endif
   929	SARQ $0x03, R8
   930	LEAL (R9)(R8*1), R9
   931	JMP  match_nolit_end_encodeBlockAsm
   932
   933matchlen_match4_match_nolit_encodeBlockAsm:
   934	CMPL SI, $0x04
   935	JB   matchlen_match2_match_nolit_encodeBlockAsm
   936	MOVL (DI)(R9*1), R8
   937	CMPL (BX)(R9*1), R8
   938	JNE  matchlen_match2_match_nolit_encodeBlockAsm
   939	LEAL -4(SI), SI
   940	LEAL 4(R9), R9
   941
   942matchlen_match2_match_nolit_encodeBlockAsm:
   943	CMPL SI, $0x01
   944	JE   matchlen_match1_match_nolit_encodeBlockAsm
   945	JB   match_nolit_end_encodeBlockAsm
   946	MOVW (DI)(R9*1), R8
   947	CMPW (BX)(R9*1), R8
   948	JNE  matchlen_match1_match_nolit_encodeBlockAsm
   949	LEAL 2(R9), R9
   950	SUBL $0x02, SI
   951	JZ   match_nolit_end_encodeBlockAsm
   952
   953matchlen_match1_match_nolit_encodeBlockAsm:
   954	MOVB (DI)(R9*1), R8
   955	CMPB (BX)(R9*1), R8
   956	JNE  match_nolit_end_encodeBlockAsm
   957	LEAL 1(R9), R9
   958
   959match_nolit_end_encodeBlockAsm:
   960	ADDL R9, CX
   961	MOVL 16(SP), BX
   962	ADDL $0x04, R9
   963	MOVL CX, 12(SP)
   964
   965	// emitCopy
   966	CMPL BX, $0x00010000
   967	JB   two_byte_offset_match_nolit_encodeBlockAsm
   968	CMPL R9, $0x40
   969	JBE  four_bytes_remain_match_nolit_encodeBlockAsm
   970	MOVB $0xff, (AX)
   971	MOVL BX, 1(AX)
   972	LEAL -64(R9), R9
   973	ADDQ $0x05, AX
   974	CMPL R9, $0x04
   975	JB   four_bytes_remain_match_nolit_encodeBlockAsm
   976
   977	// emitRepeat
   978emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
   979	MOVL R9, SI
   980	LEAL -4(R9), R9
   981	CMPL SI, $0x08
   982	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
   983	CMPL SI, $0x0c
   984	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
   985	CMPL BX, $0x00000800
   986	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
   987
   988cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
   989	CMPL R9, $0x00000104
   990	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy
   991	CMPL R9, $0x00010100
   992	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy
   993	CMPL R9, $0x0100ffff
   994	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy
   995	LEAL -16842747(R9), R9
   996	MOVL $0xfffb001d, (AX)
   997	MOVB $0xff, 4(AX)
   998	ADDQ $0x05, AX
   999	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
  1000
  1001repeat_five_match_nolit_encodeBlockAsm_emit_copy:
  1002	LEAL -65536(R9), R9
  1003	MOVL R9, BX
  1004	MOVW $0x001d, (AX)
  1005	MOVW R9, 2(AX)
  1006	SARL $0x10, BX
  1007	MOVB BL, 4(AX)
  1008	ADDQ $0x05, AX
  1009	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1010
  1011repeat_four_match_nolit_encodeBlockAsm_emit_copy:
  1012	LEAL -256(R9), R9
  1013	MOVW $0x0019, (AX)
  1014	MOVW R9, 2(AX)
  1015	ADDQ $0x04, AX
  1016	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1017
  1018repeat_three_match_nolit_encodeBlockAsm_emit_copy:
  1019	LEAL -4(R9), R9
  1020	MOVW $0x0015, (AX)
  1021	MOVB R9, 2(AX)
  1022	ADDQ $0x03, AX
  1023	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1024
  1025repeat_two_match_nolit_encodeBlockAsm_emit_copy:
  1026	SHLL $0x02, R9
  1027	ORL  $0x01, R9
  1028	MOVW R9, (AX)
  1029	ADDQ $0x02, AX
  1030	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1031
  1032repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  1033	XORQ SI, SI
  1034	LEAL 1(SI)(R9*4), R9
  1035	MOVB BL, 1(AX)
  1036	SARL $0x08, BX
  1037	SHLL $0x05, BX
  1038	ORL  BX, R9
  1039	MOVB R9, (AX)
  1040	ADDQ $0x02, AX
  1041	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1042
  1043four_bytes_remain_match_nolit_encodeBlockAsm:
  1044	TESTL R9, R9
  1045	JZ    match_nolit_emitcopy_end_encodeBlockAsm
  1046	XORL  SI, SI
  1047	LEAL  -1(SI)(R9*4), R9
  1048	MOVB  R9, (AX)
  1049	MOVL  BX, 1(AX)
  1050	ADDQ  $0x05, AX
  1051	JMP   match_nolit_emitcopy_end_encodeBlockAsm
  1052
  1053two_byte_offset_match_nolit_encodeBlockAsm:
  1054	CMPL R9, $0x40
  1055	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm
  1056	CMPL BX, $0x00000800
  1057	JAE  long_offset_short_match_nolit_encodeBlockAsm
  1058	MOVL $0x00000001, SI
  1059	LEAL 16(SI), SI
  1060	MOVB BL, 1(AX)
  1061	MOVL BX, DI
  1062	SHRL $0x08, DI
  1063	SHLL $0x05, DI
  1064	ORL  DI, SI
  1065	MOVB SI, (AX)
  1066	ADDQ $0x02, AX
  1067	SUBL $0x08, R9
  1068
  1069	// emitRepeat
  1070	LEAL -4(R9), R9
  1071	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1072
  1073emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1074	MOVL R9, SI
  1075	LEAL -4(R9), R9
  1076	CMPL SI, $0x08
  1077	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1078	CMPL SI, $0x0c
  1079	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1080	CMPL BX, $0x00000800
  1081	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1082
  1083cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1084	CMPL R9, $0x00000104
  1085	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1086	CMPL R9, $0x00010100
  1087	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1088	CMPL R9, $0x0100ffff
  1089	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1090	LEAL -16842747(R9), R9
  1091	MOVL $0xfffb001d, (AX)
  1092	MOVB $0xff, 4(AX)
  1093	ADDQ $0x05, AX
  1094	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
  1095
  1096repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1097	LEAL -65536(R9), R9
  1098	MOVL R9, BX
  1099	MOVW $0x001d, (AX)
  1100	MOVW R9, 2(AX)
  1101	SARL $0x10, BX
  1102	MOVB BL, 4(AX)
  1103	ADDQ $0x05, AX
  1104	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1105
  1106repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1107	LEAL -256(R9), R9
  1108	MOVW $0x0019, (AX)
  1109	MOVW R9, 2(AX)
  1110	ADDQ $0x04, AX
  1111	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1112
  1113repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1114	LEAL -4(R9), R9
  1115	MOVW $0x0015, (AX)
  1116	MOVB R9, 2(AX)
  1117	ADDQ $0x03, AX
  1118	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1119
  1120repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1121	SHLL $0x02, R9
  1122	ORL  $0x01, R9
  1123	MOVW R9, (AX)
  1124	ADDQ $0x02, AX
  1125	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1126
  1127repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1128	XORQ SI, SI
  1129	LEAL 1(SI)(R9*4), R9
  1130	MOVB BL, 1(AX)
  1131	SARL $0x08, BX
  1132	SHLL $0x05, BX
  1133	ORL  BX, R9
  1134	MOVB R9, (AX)
  1135	ADDQ $0x02, AX
  1136	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1137
  1138long_offset_short_match_nolit_encodeBlockAsm:
  1139	MOVB $0xee, (AX)
  1140	MOVW BX, 1(AX)
  1141	LEAL -60(R9), R9
  1142	ADDQ $0x03, AX
  1143
  1144	// emitRepeat
  1145emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
  1146	MOVL R9, SI
  1147	LEAL -4(R9), R9
  1148	CMPL SI, $0x08
  1149	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
  1150	CMPL SI, $0x0c
  1151	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  1152	CMPL BX, $0x00000800
  1153	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  1154
  1155cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  1156	CMPL R9, $0x00000104
  1157	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
  1158	CMPL R9, $0x00010100
  1159	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
  1160	CMPL R9, $0x0100ffff
  1161	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
  1162	LEAL -16842747(R9), R9
  1163	MOVL $0xfffb001d, (AX)
  1164	MOVB $0xff, 4(AX)
  1165	ADDQ $0x05, AX
  1166	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
  1167
  1168repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
  1169	LEAL -65536(R9), R9
  1170	MOVL R9, BX
  1171	MOVW $0x001d, (AX)
  1172	MOVW R9, 2(AX)
  1173	SARL $0x10, BX
  1174	MOVB BL, 4(AX)
  1175	ADDQ $0x05, AX
  1176	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1177
  1178repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
  1179	LEAL -256(R9), R9
  1180	MOVW $0x0019, (AX)
  1181	MOVW R9, 2(AX)
  1182	ADDQ $0x04, AX
  1183	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1184
  1185repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
  1186	LEAL -4(R9), R9
  1187	MOVW $0x0015, (AX)
  1188	MOVB R9, 2(AX)
  1189	ADDQ $0x03, AX
  1190	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1191
  1192repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
  1193	SHLL $0x02, R9
  1194	ORL  $0x01, R9
  1195	MOVW R9, (AX)
  1196	ADDQ $0x02, AX
  1197	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1198
  1199repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  1200	XORQ SI, SI
  1201	LEAL 1(SI)(R9*4), R9
  1202	MOVB BL, 1(AX)
  1203	SARL $0x08, BX
  1204	SHLL $0x05, BX
  1205	ORL  BX, R9
  1206	MOVB R9, (AX)
  1207	ADDQ $0x02, AX
  1208	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1209
  1210two_byte_offset_short_match_nolit_encodeBlockAsm:
  1211	MOVL R9, SI
  1212	SHLL $0x02, SI
  1213	CMPL R9, $0x0c
  1214	JAE  emit_copy_three_match_nolit_encodeBlockAsm
  1215	CMPL BX, $0x00000800
  1216	JAE  emit_copy_three_match_nolit_encodeBlockAsm
  1217	LEAL -15(SI), SI
  1218	MOVB BL, 1(AX)
  1219	SHRL $0x08, BX
  1220	SHLL $0x05, BX
  1221	ORL  BX, SI
  1222	MOVB SI, (AX)
  1223	ADDQ $0x02, AX
  1224	JMP  match_nolit_emitcopy_end_encodeBlockAsm
  1225
  1226emit_copy_three_match_nolit_encodeBlockAsm:
  1227	LEAL -2(SI), SI
  1228	MOVB SI, (AX)
  1229	MOVW BX, 1(AX)
  1230	ADDQ $0x03, AX
  1231
  1232match_nolit_emitcopy_end_encodeBlockAsm:
  1233	CMPL CX, 8(SP)
  1234	JAE  emit_remainder_encodeBlockAsm
  1235	MOVQ -2(DX)(CX*1), SI
  1236	CMPQ AX, (SP)
  1237	JB   match_nolit_dst_ok_encodeBlockAsm
  1238	MOVQ $0x00000000, ret+48(FP)
  1239	RET
  1240
  1241match_nolit_dst_ok_encodeBlockAsm:
  1242	MOVQ  $0x0000cf1bbcdcbf9b, R8
  1243	MOVQ  SI, DI
  1244	SHRQ  $0x10, SI
  1245	MOVQ  SI, BX
  1246	SHLQ  $0x10, DI
  1247	IMULQ R8, DI
  1248	SHRQ  $0x32, DI
  1249	SHLQ  $0x10, BX
  1250	IMULQ R8, BX
  1251	SHRQ  $0x32, BX
  1252	LEAL  -2(CX), R8
  1253	LEAQ  24(SP)(BX*4), R9
  1254	MOVL  (R9), BX
  1255	MOVL  R8, 24(SP)(DI*4)
  1256	MOVL  CX, (R9)
  1257	CMPL  (DX)(BX*1), SI
  1258	JEQ   match_nolit_loop_encodeBlockAsm
  1259	INCL  CX
  1260	JMP   search_loop_encodeBlockAsm
  1261
  1262emit_remainder_encodeBlockAsm:
  1263	MOVQ src_len+32(FP), CX
  1264	SUBL 12(SP), CX
  1265	LEAQ 5(AX)(CX*1), CX
  1266	CMPQ CX, (SP)
  1267	JB   emit_remainder_ok_encodeBlockAsm
  1268	MOVQ $0x00000000, ret+48(FP)
  1269	RET
  1270
  1271emit_remainder_ok_encodeBlockAsm:
  1272	MOVQ src_len+32(FP), CX
  1273	MOVL 12(SP), BX
  1274	CMPL BX, CX
  1275	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
  1276	MOVL CX, SI
  1277	MOVL CX, 12(SP)
  1278	LEAQ (DX)(BX*1), CX
  1279	SUBL BX, SI
  1280	LEAL -1(SI), DX
  1281	CMPL DX, $0x3c
  1282	JB   one_byte_emit_remainder_encodeBlockAsm
  1283	CMPL DX, $0x00000100
  1284	JB   two_bytes_emit_remainder_encodeBlockAsm
  1285	CMPL DX, $0x00010000
  1286	JB   three_bytes_emit_remainder_encodeBlockAsm
  1287	CMPL DX, $0x01000000
  1288	JB   four_bytes_emit_remainder_encodeBlockAsm
  1289	MOVB $0xfc, (AX)
  1290	MOVL DX, 1(AX)
  1291	ADDQ $0x05, AX
  1292	JMP  memmove_long_emit_remainder_encodeBlockAsm
  1293
  1294four_bytes_emit_remainder_encodeBlockAsm:
  1295	MOVL DX, BX
  1296	SHRL $0x10, BX
  1297	MOVB $0xf8, (AX)
  1298	MOVW DX, 1(AX)
  1299	MOVB BL, 3(AX)
  1300	ADDQ $0x04, AX
  1301	JMP  memmove_long_emit_remainder_encodeBlockAsm
  1302
  1303three_bytes_emit_remainder_encodeBlockAsm:
  1304	MOVB $0xf4, (AX)
  1305	MOVW DX, 1(AX)
  1306	ADDQ $0x03, AX
  1307	JMP  memmove_long_emit_remainder_encodeBlockAsm
  1308
  1309two_bytes_emit_remainder_encodeBlockAsm:
  1310	MOVB $0xf0, (AX)
  1311	MOVB DL, 1(AX)
  1312	ADDQ $0x02, AX
  1313	CMPL DX, $0x40
  1314	JB   memmove_emit_remainder_encodeBlockAsm
  1315	JMP  memmove_long_emit_remainder_encodeBlockAsm
  1316
  1317one_byte_emit_remainder_encodeBlockAsm:
  1318	SHLB $0x02, DL
  1319	MOVB DL, (AX)
  1320	ADDQ $0x01, AX
  1321
  1322memmove_emit_remainder_encodeBlockAsm:
  1323	LEAQ (AX)(SI*1), DX
  1324	MOVL SI, BX
  1325
  1326	// genMemMoveShort
  1327	CMPQ BX, $0x03
  1328	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
  1329	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
  1330	CMPQ BX, $0x08
  1331	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
  1332	CMPQ BX, $0x10
  1333	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
  1334	CMPQ BX, $0x20
  1335	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
  1336	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
  1337
  1338emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
  1339	MOVB (CX), SI
  1340	MOVB -1(CX)(BX*1), CL
  1341	MOVB SI, (AX)
  1342	MOVB CL, -1(AX)(BX*1)
  1343	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
  1344
  1345emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
  1346	MOVW (CX), SI
  1347	MOVB 2(CX), CL
  1348	MOVW SI, (AX)
  1349	MOVB CL, 2(AX)
  1350	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
  1351
  1352emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
  1353	MOVL (CX), SI
  1354	MOVL -4(CX)(BX*1), CX
  1355	MOVL SI, (AX)
  1356	MOVL CX, -4(AX)(BX*1)
  1357	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
  1358
  1359emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
  1360	MOVQ (CX), SI
  1361	MOVQ -8(CX)(BX*1), CX
  1362	MOVQ SI, (AX)
  1363	MOVQ CX, -8(AX)(BX*1)
  1364	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
  1365
  1366emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
  1367	MOVOU (CX), X0
  1368	MOVOU -16(CX)(BX*1), X1
  1369	MOVOU X0, (AX)
  1370	MOVOU X1, -16(AX)(BX*1)
  1371	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm
  1372
  1373emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
  1374	MOVOU (CX), X0
  1375	MOVOU 16(CX), X1
  1376	MOVOU -32(CX)(BX*1), X2
  1377	MOVOU -16(CX)(BX*1), X3
  1378	MOVOU X0, (AX)
  1379	MOVOU X1, 16(AX)
  1380	MOVOU X2, -32(AX)(BX*1)
  1381	MOVOU X3, -16(AX)(BX*1)
  1382
  1383memmove_end_copy_emit_remainder_encodeBlockAsm:
  1384	MOVQ DX, AX
  1385	JMP  emit_literal_done_emit_remainder_encodeBlockAsm
  1386
  1387memmove_long_emit_remainder_encodeBlockAsm:
  1388	LEAQ (AX)(SI*1), DX
  1389	MOVL SI, BX
  1390
  1391	// genMemMoveLong
  1392	MOVOU (CX), X0
  1393	MOVOU 16(CX), X1
  1394	MOVOU -32(CX)(BX*1), X2
  1395	MOVOU -16(CX)(BX*1), X3
  1396	MOVQ  BX, DI
  1397	SHRQ  $0x05, DI
  1398	MOVQ  AX, SI
  1399	ANDL  $0x0000001f, SI
  1400	MOVQ  $0x00000040, R8
  1401	SUBQ  SI, R8
  1402	DECQ  DI
  1403	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
  1404	LEAQ  -32(CX)(R8*1), SI
  1405	LEAQ  -32(AX)(R8*1), R9
  1406
  1407emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
  1408	MOVOU (SI), X4
  1409	MOVOU 16(SI), X5
  1410	MOVOA X4, (R9)
  1411	MOVOA X5, 16(R9)
  1412	ADDQ  $0x20, R9
  1413	ADDQ  $0x20, SI
  1414	ADDQ  $0x20, R8
  1415	DECQ  DI
  1416	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
  1417
  1418emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
  1419	MOVOU -32(CX)(R8*1), X4
  1420	MOVOU -16(CX)(R8*1), X5
  1421	MOVOA X4, -32(AX)(R8*1)
  1422	MOVOA X5, -16(AX)(R8*1)
  1423	ADDQ  $0x20, R8
  1424	CMPQ  BX, R8
  1425	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
  1426	MOVOU X0, (AX)
  1427	MOVOU X1, 16(AX)
  1428	MOVOU X2, -32(AX)(BX*1)
  1429	MOVOU X3, -16(AX)(BX*1)
  1430	MOVQ  DX, AX
  1431
  1432emit_literal_done_emit_remainder_encodeBlockAsm:
  1433	MOVQ dst_base+0(FP), CX
  1434	SUBQ CX, AX
  1435	MOVQ AX, ret+48(FP)
  1436	RET
  1437
  1438// func encodeBlockAsm4MB(dst []byte, src []byte) int
  1439// Requires: BMI, SSE2
  1440TEXT ·encodeBlockAsm4MB(SB), $65560-56
  1441	MOVQ dst_base+0(FP), AX
  1442	MOVQ $0x00000200, CX
  1443	LEAQ 24(SP), DX
  1444	PXOR X0, X0
  1445
  1446zero_loop_encodeBlockAsm4MB:
  1447	MOVOU X0, (DX)
  1448	MOVOU X0, 16(DX)
  1449	MOVOU X0, 32(DX)
  1450	MOVOU X0, 48(DX)
  1451	MOVOU X0, 64(DX)
  1452	MOVOU X0, 80(DX)
  1453	MOVOU X0, 96(DX)
  1454	MOVOU X0, 112(DX)
  1455	ADDQ  $0x80, DX
  1456	DECQ  CX
  1457	JNZ   zero_loop_encodeBlockAsm4MB
  1458	MOVL  $0x00000000, 12(SP)
  1459	MOVQ  src_len+32(FP), CX
  1460	LEAQ  -9(CX), DX
  1461	LEAQ  -8(CX), BX
  1462	MOVL  BX, 8(SP)
  1463	SHRQ  $0x05, CX
  1464	SUBL  CX, DX
  1465	LEAQ  (AX)(DX*1), DX
  1466	MOVQ  DX, (SP)
  1467	MOVL  $0x00000001, CX
  1468	MOVL  CX, 16(SP)
  1469	MOVQ  src_base+24(FP), DX
  1470
  1471search_loop_encodeBlockAsm4MB:
  1472	MOVL  CX, BX
  1473	SUBL  12(SP), BX
  1474	SHRL  $0x06, BX
  1475	LEAL  4(CX)(BX*1), BX
  1476	CMPL  BX, 8(SP)
  1477	JAE   emit_remainder_encodeBlockAsm4MB
  1478	MOVQ  (DX)(CX*1), SI
  1479	MOVL  BX, 20(SP)
  1480	MOVQ  $0x0000cf1bbcdcbf9b, R8
  1481	MOVQ  SI, R9
  1482	MOVQ  SI, R10
  1483	SHRQ  $0x08, R10
  1484	SHLQ  $0x10, R9
  1485	IMULQ R8, R9
  1486	SHRQ  $0x32, R9
  1487	SHLQ  $0x10, R10
  1488	IMULQ R8, R10
  1489	SHRQ  $0x32, R10
  1490	MOVL  24(SP)(R9*4), BX
  1491	MOVL  24(SP)(R10*4), DI
  1492	MOVL  CX, 24(SP)(R9*4)
  1493	LEAL  1(CX), R9
  1494	MOVL  R9, 24(SP)(R10*4)
  1495	MOVQ  SI, R9
  1496	SHRQ  $0x10, R9
  1497	SHLQ  $0x10, R9
  1498	IMULQ R8, R9
  1499	SHRQ  $0x32, R9
  1500	MOVL  CX, R8
  1501	SUBL  16(SP), R8
  1502	MOVL  1(DX)(R8*1), R10
  1503	MOVQ  SI, R8
  1504	SHRQ  $0x08, R8
  1505	CMPL  R8, R10
  1506	JNE   no_repeat_found_encodeBlockAsm4MB
  1507	LEAL  1(CX), SI
  1508	MOVL  12(SP), DI
  1509	MOVL  SI, BX
  1510	SUBL  16(SP), BX
  1511	JZ    repeat_extend_back_end_encodeBlockAsm4MB
  1512
  1513repeat_extend_back_loop_encodeBlockAsm4MB:
  1514	CMPL SI, DI
  1515	JBE  repeat_extend_back_end_encodeBlockAsm4MB
  1516	MOVB -1(DX)(BX*1), R8
  1517	MOVB -1(DX)(SI*1), R9
  1518	CMPB R8, R9
  1519	JNE  repeat_extend_back_end_encodeBlockAsm4MB
  1520	LEAL -1(SI), SI
  1521	DECL BX
  1522	JNZ  repeat_extend_back_loop_encodeBlockAsm4MB
  1523
  1524repeat_extend_back_end_encodeBlockAsm4MB:
  1525	MOVL SI, BX
  1526	SUBL 12(SP), BX
  1527	LEAQ 4(AX)(BX*1), BX
  1528	CMPQ BX, (SP)
  1529	JB   repeat_dst_size_check_encodeBlockAsm4MB
  1530	MOVQ $0x00000000, ret+48(FP)
  1531	RET
  1532
  1533repeat_dst_size_check_encodeBlockAsm4MB:
  1534	MOVL 12(SP), BX
  1535	CMPL BX, SI
  1536	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm4MB
  1537	MOVL SI, R8
  1538	MOVL SI, 12(SP)
  1539	LEAQ (DX)(BX*1), R9
  1540	SUBL BX, R8
  1541	LEAL -1(R8), BX
  1542	CMPL BX, $0x3c
  1543	JB   one_byte_repeat_emit_encodeBlockAsm4MB
  1544	CMPL BX, $0x00000100
  1545	JB   two_bytes_repeat_emit_encodeBlockAsm4MB
  1546	CMPL BX, $0x00010000
  1547	JB   three_bytes_repeat_emit_encodeBlockAsm4MB
  1548	MOVL BX, R10
  1549	SHRL $0x10, R10
  1550	MOVB $0xf8, (AX)
  1551	MOVW BX, 1(AX)
  1552	MOVB R10, 3(AX)
  1553	ADDQ $0x04, AX
  1554	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
  1555
  1556three_bytes_repeat_emit_encodeBlockAsm4MB:
  1557	MOVB $0xf4, (AX)
  1558	MOVW BX, 1(AX)
  1559	ADDQ $0x03, AX
  1560	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
  1561
  1562two_bytes_repeat_emit_encodeBlockAsm4MB:
  1563	MOVB $0xf0, (AX)
  1564	MOVB BL, 1(AX)
  1565	ADDQ $0x02, AX
  1566	CMPL BX, $0x40
  1567	JB   memmove_repeat_emit_encodeBlockAsm4MB
  1568	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
  1569
  1570one_byte_repeat_emit_encodeBlockAsm4MB:
  1571	SHLB $0x02, BL
  1572	MOVB BL, (AX)
  1573	ADDQ $0x01, AX
  1574
  1575memmove_repeat_emit_encodeBlockAsm4MB:
  1576	LEAQ (AX)(R8*1), BX
  1577
  1578	// genMemMoveShort
  1579	CMPQ R8, $0x08
  1580	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
  1581	CMPQ R8, $0x10
  1582	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
  1583	CMPQ R8, $0x20
  1584	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
  1585	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
  1586
  1587emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
  1588	MOVQ (R9), R10
  1589	MOVQ R10, (AX)
  1590	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
  1591
  1592emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
  1593	MOVQ (R9), R10
  1594	MOVQ -8(R9)(R8*1), R9
  1595	MOVQ R10, (AX)
  1596	MOVQ R9, -8(AX)(R8*1)
  1597	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
  1598
  1599emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
  1600	MOVOU (R9), X0
  1601	MOVOU -16(R9)(R8*1), X1
  1602	MOVOU X0, (AX)
  1603	MOVOU X1, -16(AX)(R8*1)
  1604	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm4MB
  1605
  1606emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
  1607	MOVOU (R9), X0
  1608	MOVOU 16(R9), X1
  1609	MOVOU -32(R9)(R8*1), X2
  1610	MOVOU -16(R9)(R8*1), X3
  1611	MOVOU X0, (AX)
  1612	MOVOU X1, 16(AX)
  1613	MOVOU X2, -32(AX)(R8*1)
  1614	MOVOU X3, -16(AX)(R8*1)
  1615
  1616memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
  1617	MOVQ BX, AX
  1618	JMP  emit_literal_done_repeat_emit_encodeBlockAsm4MB
  1619
  1620memmove_long_repeat_emit_encodeBlockAsm4MB:
  1621	LEAQ (AX)(R8*1), BX
  1622
  1623	// genMemMoveLong
  1624	MOVOU (R9), X0
  1625	MOVOU 16(R9), X1
  1626	MOVOU -32(R9)(R8*1), X2
  1627	MOVOU -16(R9)(R8*1), X3
  1628	MOVQ  R8, R11
  1629	SHRQ  $0x05, R11
  1630	MOVQ  AX, R10
  1631	ANDL  $0x0000001f, R10
  1632	MOVQ  $0x00000040, R12
  1633	SUBQ  R10, R12
  1634	DECQ  R11
  1635	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  1636	LEAQ  -32(R9)(R12*1), R10
  1637	LEAQ  -32(AX)(R12*1), R13
  1638
  1639emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
  1640	MOVOU (R10), X4
  1641	MOVOU 16(R10), X5
  1642	MOVOA X4, (R13)
  1643	MOVOA X5, 16(R13)
  1644	ADDQ  $0x20, R13
  1645	ADDQ  $0x20, R10
  1646	ADDQ  $0x20, R12
  1647	DECQ  R11
  1648	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
  1649
  1650emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
  1651	MOVOU -32(R9)(R12*1), X4
  1652	MOVOU -16(R9)(R12*1), X5
  1653	MOVOA X4, -32(AX)(R12*1)
  1654	MOVOA X5, -16(AX)(R12*1)
  1655	ADDQ  $0x20, R12
  1656	CMPQ  R8, R12
  1657	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  1658	MOVOU X0, (AX)
  1659	MOVOU X1, 16(AX)
  1660	MOVOU X2, -32(AX)(R8*1)
  1661	MOVOU X3, -16(AX)(R8*1)
  1662	MOVQ  BX, AX
  1663
  1664emit_literal_done_repeat_emit_encodeBlockAsm4MB:
  1665	ADDL $0x05, CX
  1666	MOVL CX, BX
  1667	SUBL 16(SP), BX
  1668	MOVQ src_len+32(FP), R8
  1669	SUBL CX, R8
  1670	LEAQ (DX)(CX*1), R9
  1671	LEAQ (DX)(BX*1), BX
  1672
  1673	// matchLen
  1674	XORL R11, R11
  1675
  1676matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
  1677	CMPL R8, $0x10
  1678	JB   matchlen_match8_repeat_extend_encodeBlockAsm4MB
  1679	MOVQ (R9)(R11*1), R10
  1680	MOVQ 8(R9)(R11*1), R12
  1681	XORQ (BX)(R11*1), R10
  1682	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
  1683	XORQ 8(BX)(R11*1), R12
  1684	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
  1685	LEAL -16(R8), R8
  1686	LEAL 16(R11), R11
  1687	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
  1688
  1689matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
  1690#ifdef GOAMD64_v3
  1691	TZCNTQ R12, R12
  1692
  1693#else
  1694	BSFQ R12, R12
  1695
  1696#endif
  1697	SARQ $0x03, R12
  1698	LEAL 8(R11)(R12*1), R11
  1699	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
  1700
  1701matchlen_match8_repeat_extend_encodeBlockAsm4MB:
  1702	CMPL R8, $0x08
  1703	JB   matchlen_match4_repeat_extend_encodeBlockAsm4MB
  1704	MOVQ (R9)(R11*1), R10
  1705	XORQ (BX)(R11*1), R10
  1706	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
  1707	LEAL -8(R8), R8
  1708	LEAL 8(R11), R11
  1709	JMP  matchlen_match4_repeat_extend_encodeBlockAsm4MB
  1710
  1711matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
  1712#ifdef GOAMD64_v3
  1713	TZCNTQ R10, R10
  1714
  1715#else
  1716	BSFQ R10, R10
  1717
  1718#endif
  1719	SARQ $0x03, R10
  1720	LEAL (R11)(R10*1), R11
  1721	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
  1722
  1723matchlen_match4_repeat_extend_encodeBlockAsm4MB:
  1724	CMPL R8, $0x04
  1725	JB   matchlen_match2_repeat_extend_encodeBlockAsm4MB
  1726	MOVL (R9)(R11*1), R10
  1727	CMPL (BX)(R11*1), R10
  1728	JNE  matchlen_match2_repeat_extend_encodeBlockAsm4MB
  1729	LEAL -4(R8), R8
  1730	LEAL 4(R11), R11
  1731
  1732matchlen_match2_repeat_extend_encodeBlockAsm4MB:
  1733	CMPL R8, $0x01
  1734	JE   matchlen_match1_repeat_extend_encodeBlockAsm4MB
  1735	JB   repeat_extend_forward_end_encodeBlockAsm4MB
  1736	MOVW (R9)(R11*1), R10
  1737	CMPW (BX)(R11*1), R10
  1738	JNE  matchlen_match1_repeat_extend_encodeBlockAsm4MB
  1739	LEAL 2(R11), R11
  1740	SUBL $0x02, R8
  1741	JZ   repeat_extend_forward_end_encodeBlockAsm4MB
  1742
  1743matchlen_match1_repeat_extend_encodeBlockAsm4MB:
  1744	MOVB (R9)(R11*1), R10
  1745	CMPB (BX)(R11*1), R10
  1746	JNE  repeat_extend_forward_end_encodeBlockAsm4MB
  1747	LEAL 1(R11), R11
  1748
  1749repeat_extend_forward_end_encodeBlockAsm4MB:
  1750	ADDL  R11, CX
  1751	MOVL  CX, BX
  1752	SUBL  SI, BX
  1753	MOVL  16(SP), SI
  1754	TESTL DI, DI
  1755	JZ    repeat_as_copy_encodeBlockAsm4MB
  1756
  1757	// emitRepeat
  1758	MOVL BX, DI
  1759	LEAL -4(BX), BX
  1760	CMPL DI, $0x08
  1761	JBE  repeat_two_match_repeat_encodeBlockAsm4MB
  1762	CMPL DI, $0x0c
  1763	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
  1764	CMPL SI, $0x00000800
  1765	JB   repeat_two_offset_match_repeat_encodeBlockAsm4MB
  1766
  1767cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
  1768	CMPL BX, $0x00000104
  1769	JB   repeat_three_match_repeat_encodeBlockAsm4MB
  1770	CMPL BX, $0x00010100
  1771	JB   repeat_four_match_repeat_encodeBlockAsm4MB
  1772	LEAL -65536(BX), BX
  1773	MOVL BX, SI
  1774	MOVW $0x001d, (AX)
  1775	MOVW BX, 2(AX)
  1776	SARL $0x10, SI
  1777	MOVB SI, 4(AX)
  1778	ADDQ $0x05, AX
  1779	JMP  repeat_end_emit_encodeBlockAsm4MB
  1780
  1781repeat_four_match_repeat_encodeBlockAsm4MB:
  1782	LEAL -256(BX), BX
  1783	MOVW $0x0019, (AX)
  1784	MOVW BX, 2(AX)
  1785	ADDQ $0x04, AX
  1786	JMP  repeat_end_emit_encodeBlockAsm4MB
  1787
  1788repeat_three_match_repeat_encodeBlockAsm4MB:
  1789	LEAL -4(BX), BX
  1790	MOVW $0x0015, (AX)
  1791	MOVB BL, 2(AX)
  1792	ADDQ $0x03, AX
  1793	JMP  repeat_end_emit_encodeBlockAsm4MB
  1794
  1795repeat_two_match_repeat_encodeBlockAsm4MB:
  1796	SHLL $0x02, BX
  1797	ORL  $0x01, BX
  1798	MOVW BX, (AX)
  1799	ADDQ $0x02, AX
  1800	JMP  repeat_end_emit_encodeBlockAsm4MB
  1801
  1802repeat_two_offset_match_repeat_encodeBlockAsm4MB:
  1803	XORQ DI, DI
  1804	LEAL 1(DI)(BX*4), BX
  1805	MOVB SI, 1(AX)
  1806	SARL $0x08, SI
  1807	SHLL $0x05, SI
  1808	ORL  SI, BX
  1809	MOVB BL, (AX)
  1810	ADDQ $0x02, AX
  1811	JMP  repeat_end_emit_encodeBlockAsm4MB
  1812
  1813repeat_as_copy_encodeBlockAsm4MB:
  1814	// emitCopy
  1815	CMPL SI, $0x00010000
  1816	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
  1817	CMPL BX, $0x40
  1818	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
  1819	MOVB $0xff, (AX)
  1820	MOVL SI, 1(AX)
  1821	LEAL -64(BX), BX
  1822	ADDQ $0x05, AX
  1823	CMPL BX, $0x04
  1824	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
  1825
  1826	// emitRepeat
  1827	MOVL BX, DI
  1828	LEAL -4(BX), BX
  1829	CMPL DI, $0x08
  1830	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1831	CMPL DI, $0x0c
  1832	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1833	CMPL SI, $0x00000800
  1834	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1835
  1836cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1837	CMPL BX, $0x00000104
  1838	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1839	CMPL BX, $0x00010100
  1840	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1841	LEAL -65536(BX), BX
  1842	MOVL BX, SI
  1843	MOVW $0x001d, (AX)
  1844	MOVW BX, 2(AX)
  1845	SARL $0x10, SI
  1846	MOVB SI, 4(AX)
  1847	ADDQ $0x05, AX
  1848	JMP  repeat_end_emit_encodeBlockAsm4MB
  1849
  1850repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1851	LEAL -256(BX), BX
  1852	MOVW $0x0019, (AX)
  1853	MOVW BX, 2(AX)
  1854	ADDQ $0x04, AX
  1855	JMP  repeat_end_emit_encodeBlockAsm4MB
  1856
  1857repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1858	LEAL -4(BX), BX
  1859	MOVW $0x0015, (AX)
  1860	MOVB BL, 2(AX)
  1861	ADDQ $0x03, AX
  1862	JMP  repeat_end_emit_encodeBlockAsm4MB
  1863
  1864repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1865	SHLL $0x02, BX
  1866	ORL  $0x01, BX
  1867	MOVW BX, (AX)
  1868	ADDQ $0x02, AX
  1869	JMP  repeat_end_emit_encodeBlockAsm4MB
  1870
  1871repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1872	XORQ DI, DI
  1873	LEAL 1(DI)(BX*4), BX
  1874	MOVB SI, 1(AX)
  1875	SARL $0x08, SI
  1876	SHLL $0x05, SI
  1877	ORL  SI, BX
  1878	MOVB BL, (AX)
  1879	ADDQ $0x02, AX
  1880	JMP  repeat_end_emit_encodeBlockAsm4MB
  1881
  1882four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
  1883	TESTL BX, BX
  1884	JZ    repeat_end_emit_encodeBlockAsm4MB
  1885	XORL  DI, DI
  1886	LEAL  -1(DI)(BX*4), BX
  1887	MOVB  BL, (AX)
  1888	MOVL  SI, 1(AX)
  1889	ADDQ  $0x05, AX
  1890	JMP   repeat_end_emit_encodeBlockAsm4MB
  1891
  1892two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
  1893	CMPL BX, $0x40
  1894	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
  1895	CMPL SI, $0x00000800
  1896	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm4MB
  1897	MOVL $0x00000001, DI
  1898	LEAL 16(DI), DI
  1899	MOVB SI, 1(AX)
  1900	SHRL $0x08, SI
  1901	SHLL $0x05, SI
  1902	ORL  SI, DI
  1903	MOVB DI, (AX)
  1904	ADDQ $0x02, AX
  1905	SUBL $0x08, BX
  1906
  1907	// emitRepeat
  1908	LEAL -4(BX), BX
  1909	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1910	MOVL BX, DI
  1911	LEAL -4(BX), BX
  1912	CMPL DI, $0x08
  1913	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1914	CMPL DI, $0x0c
  1915	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1916	CMPL SI, $0x00000800
  1917	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1918
  1919cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1920	CMPL BX, $0x00000104
  1921	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1922	CMPL BX, $0x00010100
  1923	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1924	LEAL -65536(BX), BX
  1925	MOVL BX, SI
  1926	MOVW $0x001d, (AX)
  1927	MOVW BX, 2(AX)
  1928	SARL $0x10, SI
  1929	MOVB SI, 4(AX)
  1930	ADDQ $0x05, AX
  1931	JMP  repeat_end_emit_encodeBlockAsm4MB
  1932
  1933repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1934	LEAL -256(BX), BX
  1935	MOVW $0x0019, (AX)
  1936	MOVW BX, 2(AX)
  1937	ADDQ $0x04, AX
  1938	JMP  repeat_end_emit_encodeBlockAsm4MB
  1939
  1940repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1941	LEAL -4(BX), BX
  1942	MOVW $0x0015, (AX)
  1943	MOVB BL, 2(AX)
  1944	ADDQ $0x03, AX
  1945	JMP  repeat_end_emit_encodeBlockAsm4MB
  1946
  1947repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1948	SHLL $0x02, BX
  1949	ORL  $0x01, BX
  1950	MOVW BX, (AX)
  1951	ADDQ $0x02, AX
  1952	JMP  repeat_end_emit_encodeBlockAsm4MB
  1953
  1954repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1955	XORQ DI, DI
  1956	LEAL 1(DI)(BX*4), BX
  1957	MOVB SI, 1(AX)
  1958	SARL $0x08, SI
  1959	SHLL $0x05, SI
  1960	ORL  SI, BX
  1961	MOVB BL, (AX)
  1962	ADDQ $0x02, AX
  1963	JMP  repeat_end_emit_encodeBlockAsm4MB
  1964
  1965long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
  1966	MOVB $0xee, (AX)
  1967	MOVW SI, 1(AX)
  1968	LEAL -60(BX), BX
  1969	ADDQ $0x03, AX
  1970
  1971	// emitRepeat
  1972	MOVL BX, DI
  1973	LEAL -4(BX), BX
  1974	CMPL DI, $0x08
  1975	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1976	CMPL DI, $0x0c
  1977	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1978	CMPL SI, $0x00000800
  1979	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1980
  1981cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1982	CMPL BX, $0x00000104
  1983	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1984	CMPL BX, $0x00010100
  1985	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1986	LEAL -65536(BX), BX
  1987	MOVL BX, SI
  1988	MOVW $0x001d, (AX)
  1989	MOVW BX, 2(AX)
  1990	SARL $0x10, SI
  1991	MOVB SI, 4(AX)
  1992	ADDQ $0x05, AX
  1993	JMP  repeat_end_emit_encodeBlockAsm4MB
  1994
  1995repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1996	LEAL -256(BX), BX
  1997	MOVW $0x0019, (AX)
  1998	MOVW BX, 2(AX)
  1999	ADDQ $0x04, AX
  2000	JMP  repeat_end_emit_encodeBlockAsm4MB
  2001
  2002repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  2003	LEAL -4(BX), BX
  2004	MOVW $0x0015, (AX)
  2005	MOVB BL, 2(AX)
  2006	ADDQ $0x03, AX
  2007	JMP  repeat_end_emit_encodeBlockAsm4MB
  2008
  2009repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  2010	SHLL $0x02, BX
  2011	ORL  $0x01, BX
  2012	MOVW BX, (AX)
  2013	ADDQ $0x02, AX
  2014	JMP  repeat_end_emit_encodeBlockAsm4MB
  2015
  2016repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  2017	XORQ DI, DI
  2018	LEAL 1(DI)(BX*4), BX
  2019	MOVB SI, 1(AX)
  2020	SARL $0x08, SI
  2021	SHLL $0x05, SI
  2022	ORL  SI, BX
  2023	MOVB BL, (AX)
  2024	ADDQ $0x02, AX
  2025	JMP  repeat_end_emit_encodeBlockAsm4MB
  2026
  2027two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
  2028	MOVL BX, DI
  2029	SHLL $0x02, DI
  2030	CMPL BX, $0x0c
  2031	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
  2032	CMPL SI, $0x00000800
  2033	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
  2034	LEAL -15(DI), DI
  2035	MOVB SI, 1(AX)
  2036	SHRL $0x08, SI
  2037	SHLL $0x05, SI
  2038	ORL  SI, DI
  2039	MOVB DI, (AX)
  2040	ADDQ $0x02, AX
  2041	JMP  repeat_end_emit_encodeBlockAsm4MB
  2042
  2043emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
  2044	LEAL -2(DI), DI
  2045	MOVB DI, (AX)
  2046	MOVW SI, 1(AX)
  2047	ADDQ $0x03, AX
  2048
  2049repeat_end_emit_encodeBlockAsm4MB:
  2050	MOVL CX, 12(SP)
  2051	JMP  search_loop_encodeBlockAsm4MB
  2052
  2053no_repeat_found_encodeBlockAsm4MB:
  2054	CMPL (DX)(BX*1), SI
  2055	JEQ  candidate_match_encodeBlockAsm4MB
  2056	SHRQ $0x08, SI
  2057	MOVL 24(SP)(R9*4), BX
  2058	LEAL 2(CX), R8
  2059	CMPL (DX)(DI*1), SI
  2060	JEQ  candidate2_match_encodeBlockAsm4MB
  2061	MOVL R8, 24(SP)(R9*4)
  2062	SHRQ $0x08, SI
  2063	CMPL (DX)(BX*1), SI
  2064	JEQ  candidate3_match_encodeBlockAsm4MB
  2065	MOVL 20(SP), CX
  2066	JMP  search_loop_encodeBlockAsm4MB
  2067
  2068candidate3_match_encodeBlockAsm4MB:
  2069	ADDL $0x02, CX
  2070	JMP  candidate_match_encodeBlockAsm4MB
  2071
  2072candidate2_match_encodeBlockAsm4MB:
  2073	MOVL R8, 24(SP)(R9*4)
  2074	INCL CX
  2075	MOVL DI, BX
  2076
  2077candidate_match_encodeBlockAsm4MB:
  2078	MOVL  12(SP), SI
  2079	TESTL BX, BX
  2080	JZ    match_extend_back_end_encodeBlockAsm4MB
  2081
  2082match_extend_back_loop_encodeBlockAsm4MB:
  2083	CMPL CX, SI
  2084	JBE  match_extend_back_end_encodeBlockAsm4MB
  2085	MOVB -1(DX)(BX*1), DI
  2086	MOVB -1(DX)(CX*1), R8
  2087	CMPB DI, R8
  2088	JNE  match_extend_back_end_encodeBlockAsm4MB
  2089	LEAL -1(CX), CX
  2090	DECL BX
  2091	JZ   match_extend_back_end_encodeBlockAsm4MB
  2092	JMP  match_extend_back_loop_encodeBlockAsm4MB
  2093
  2094match_extend_back_end_encodeBlockAsm4MB:
  2095	MOVL CX, SI
  2096	SUBL 12(SP), SI
  2097	LEAQ 4(AX)(SI*1), SI
  2098	CMPQ SI, (SP)
  2099	JB   match_dst_size_check_encodeBlockAsm4MB
  2100	MOVQ $0x00000000, ret+48(FP)
  2101	RET
  2102
  2103match_dst_size_check_encodeBlockAsm4MB:
  2104	MOVL CX, SI
  2105	MOVL 12(SP), DI
  2106	CMPL DI, SI
  2107	JEQ  emit_literal_done_match_emit_encodeBlockAsm4MB
  2108	MOVL SI, R8
  2109	MOVL SI, 12(SP)
  2110	LEAQ (DX)(DI*1), SI
  2111	SUBL DI, R8
  2112	LEAL -1(R8), DI
  2113	CMPL DI, $0x3c
  2114	JB   one_byte_match_emit_encodeBlockAsm4MB
  2115	CMPL DI, $0x00000100
  2116	JB   two_bytes_match_emit_encodeBlockAsm4MB
  2117	CMPL DI, $0x00010000
  2118	JB   three_bytes_match_emit_encodeBlockAsm4MB
  2119	MOVL DI, R9
  2120	SHRL $0x10, R9
  2121	MOVB $0xf8, (AX)
  2122	MOVW DI, 1(AX)
  2123	MOVB R9, 3(AX)
  2124	ADDQ $0x04, AX
  2125	JMP  memmove_long_match_emit_encodeBlockAsm4MB
  2126
  2127three_bytes_match_emit_encodeBlockAsm4MB:
  2128	MOVB $0xf4, (AX)
  2129	MOVW DI, 1(AX)
  2130	ADDQ $0x03, AX
  2131	JMP  memmove_long_match_emit_encodeBlockAsm4MB
  2132
  2133two_bytes_match_emit_encodeBlockAsm4MB:
  2134	MOVB $0xf0, (AX)
  2135	MOVB DI, 1(AX)
  2136	ADDQ $0x02, AX
  2137	CMPL DI, $0x40
  2138	JB   memmove_match_emit_encodeBlockAsm4MB
  2139	JMP  memmove_long_match_emit_encodeBlockAsm4MB
  2140
  2141one_byte_match_emit_encodeBlockAsm4MB:
  2142	SHLB $0x02, DI
  2143	MOVB DI, (AX)
  2144	ADDQ $0x01, AX
  2145
  2146memmove_match_emit_encodeBlockAsm4MB:
  2147	LEAQ (AX)(R8*1), DI
  2148
  2149	// genMemMoveShort
  2150	CMPQ R8, $0x08
  2151	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
  2152	CMPQ R8, $0x10
  2153	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
  2154	CMPQ R8, $0x20
  2155	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
  2156	JMP  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
  2157
  2158emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
  2159	MOVQ (SI), R9
  2160	MOVQ R9, (AX)
  2161	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
  2162
  2163emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
  2164	MOVQ (SI), R9
  2165	MOVQ -8(SI)(R8*1), SI
  2166	MOVQ R9, (AX)
  2167	MOVQ SI, -8(AX)(R8*1)
  2168	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
  2169
  2170emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
  2171	MOVOU (SI), X0
  2172	MOVOU -16(SI)(R8*1), X1
  2173	MOVOU X0, (AX)
  2174	MOVOU X1, -16(AX)(R8*1)
  2175	JMP   memmove_end_copy_match_emit_encodeBlockAsm4MB
  2176
  2177emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
  2178	MOVOU (SI), X0
  2179	MOVOU 16(SI), X1
  2180	MOVOU -32(SI)(R8*1), X2
  2181	MOVOU -16(SI)(R8*1), X3
  2182	MOVOU X0, (AX)
  2183	MOVOU X1, 16(AX)
  2184	MOVOU X2, -32(AX)(R8*1)
  2185	MOVOU X3, -16(AX)(R8*1)
  2186
  2187memmove_end_copy_match_emit_encodeBlockAsm4MB:
  2188	MOVQ DI, AX
  2189	JMP  emit_literal_done_match_emit_encodeBlockAsm4MB
  2190
  2191memmove_long_match_emit_encodeBlockAsm4MB:
  2192	LEAQ (AX)(R8*1), DI
  2193
  2194	// genMemMoveLong
  2195	MOVOU (SI), X0
  2196	MOVOU 16(SI), X1
  2197	MOVOU -32(SI)(R8*1), X2
  2198	MOVOU -16(SI)(R8*1), X3
  2199	MOVQ  R8, R10
  2200	SHRQ  $0x05, R10
  2201	MOVQ  AX, R9
  2202	ANDL  $0x0000001f, R9
  2203	MOVQ  $0x00000040, R11
  2204	SUBQ  R9, R11
  2205	DECQ  R10
  2206	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  2207	LEAQ  -32(SI)(R11*1), R9
  2208	LEAQ  -32(AX)(R11*1), R12
  2209
  2210emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
  2211	MOVOU (R9), X4
  2212	MOVOU 16(R9), X5
  2213	MOVOA X4, (R12)
  2214	MOVOA X5, 16(R12)
  2215	ADDQ  $0x20, R12
  2216	ADDQ  $0x20, R9
  2217	ADDQ  $0x20, R11
  2218	DECQ  R10
  2219	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
  2220
  2221emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
  2222	MOVOU -32(SI)(R11*1), X4
  2223	MOVOU -16(SI)(R11*1), X5
  2224	MOVOA X4, -32(AX)(R11*1)
  2225	MOVOA X5, -16(AX)(R11*1)
  2226	ADDQ  $0x20, R11
  2227	CMPQ  R8, R11
  2228	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  2229	MOVOU X0, (AX)
  2230	MOVOU X1, 16(AX)
  2231	MOVOU X2, -32(AX)(R8*1)
  2232	MOVOU X3, -16(AX)(R8*1)
  2233	MOVQ  DI, AX
  2234
  2235emit_literal_done_match_emit_encodeBlockAsm4MB:
  2236match_nolit_loop_encodeBlockAsm4MB:
  2237	MOVL CX, SI
  2238	SUBL BX, SI
  2239	MOVL SI, 16(SP)
  2240	ADDL $0x04, CX
  2241	ADDL $0x04, BX
  2242	MOVQ src_len+32(FP), SI
  2243	SUBL CX, SI
  2244	LEAQ (DX)(CX*1), DI
  2245	LEAQ (DX)(BX*1), BX
  2246
  2247	// matchLen
  2248	XORL R9, R9
  2249
  2250matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
  2251	CMPL SI, $0x10
  2252	JB   matchlen_match8_match_nolit_encodeBlockAsm4MB
  2253	MOVQ (DI)(R9*1), R8
  2254	MOVQ 8(DI)(R9*1), R10
  2255	XORQ (BX)(R9*1), R8
  2256	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
  2257	XORQ 8(BX)(R9*1), R10
  2258	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm4MB
  2259	LEAL -16(SI), SI
  2260	LEAL 16(R9), R9
  2261	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
  2262
  2263matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
  2264#ifdef GOAMD64_v3
  2265	TZCNTQ R10, R10
  2266
  2267#else
  2268	BSFQ R10, R10
  2269
  2270#endif
  2271	SARQ $0x03, R10
  2272	LEAL 8(R9)(R10*1), R9
  2273	JMP  match_nolit_end_encodeBlockAsm4MB
  2274
  2275matchlen_match8_match_nolit_encodeBlockAsm4MB:
  2276	CMPL SI, $0x08
  2277	JB   matchlen_match4_match_nolit_encodeBlockAsm4MB
  2278	MOVQ (DI)(R9*1), R8
  2279	XORQ (BX)(R9*1), R8
  2280	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
  2281	LEAL -8(SI), SI
  2282	LEAL 8(R9), R9
  2283	JMP  matchlen_match4_match_nolit_encodeBlockAsm4MB
  2284
  2285matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
  2286#ifdef GOAMD64_v3
  2287	TZCNTQ R8, R8
  2288
  2289#else
  2290	BSFQ R8, R8
  2291
  2292#endif
  2293	SARQ $0x03, R8
  2294	LEAL (R9)(R8*1), R9
  2295	JMP  match_nolit_end_encodeBlockAsm4MB
  2296
  2297matchlen_match4_match_nolit_encodeBlockAsm4MB:
  2298	CMPL SI, $0x04
  2299	JB   matchlen_match2_match_nolit_encodeBlockAsm4MB
  2300	MOVL (DI)(R9*1), R8
  2301	CMPL (BX)(R9*1), R8
  2302	JNE  matchlen_match2_match_nolit_encodeBlockAsm4MB
  2303	LEAL -4(SI), SI
  2304	LEAL 4(R9), R9
  2305
  2306matchlen_match2_match_nolit_encodeBlockAsm4MB:
  2307	CMPL SI, $0x01
  2308	JE   matchlen_match1_match_nolit_encodeBlockAsm4MB
  2309	JB   match_nolit_end_encodeBlockAsm4MB
  2310	MOVW (DI)(R9*1), R8
  2311	CMPW (BX)(R9*1), R8
  2312	JNE  matchlen_match1_match_nolit_encodeBlockAsm4MB
  2313	LEAL 2(R9), R9
  2314	SUBL $0x02, SI
  2315	JZ   match_nolit_end_encodeBlockAsm4MB
  2316
  2317matchlen_match1_match_nolit_encodeBlockAsm4MB:
  2318	MOVB (DI)(R9*1), R8
  2319	CMPB (BX)(R9*1), R8
  2320	JNE  match_nolit_end_encodeBlockAsm4MB
  2321	LEAL 1(R9), R9
  2322
  2323match_nolit_end_encodeBlockAsm4MB:
  2324	ADDL R9, CX
  2325	MOVL 16(SP), BX
  2326	ADDL $0x04, R9
  2327	MOVL CX, 12(SP)
  2328
  2329	// emitCopy
  2330	CMPL BX, $0x00010000
  2331	JB   two_byte_offset_match_nolit_encodeBlockAsm4MB
  2332	CMPL R9, $0x40
  2333	JBE  four_bytes_remain_match_nolit_encodeBlockAsm4MB
  2334	MOVB $0xff, (AX)
  2335	MOVL BX, 1(AX)
  2336	LEAL -64(R9), R9
  2337	ADDQ $0x05, AX
  2338	CMPL R9, $0x04
  2339	JB   four_bytes_remain_match_nolit_encodeBlockAsm4MB
  2340
  2341	// emitRepeat
  2342	MOVL R9, SI
  2343	LEAL -4(R9), R9
  2344	CMPL SI, $0x08
  2345	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
  2346	CMPL SI, $0x0c
  2347	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
  2348	CMPL BX, $0x00000800
  2349	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
  2350
  2351cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
  2352	CMPL R9, $0x00000104
  2353	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
  2354	CMPL R9, $0x00010100
  2355	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
  2356	LEAL -65536(R9), R9
  2357	MOVL R9, BX
  2358	MOVW $0x001d, (AX)
  2359	MOVW R9, 2(AX)
  2360	SARL $0x10, BX
  2361	MOVB BL, 4(AX)
  2362	ADDQ $0x05, AX
  2363	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2364
  2365repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
  2366	LEAL -256(R9), R9
  2367	MOVW $0x0019, (AX)
  2368	MOVW R9, 2(AX)
  2369	ADDQ $0x04, AX
  2370	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2371
  2372repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
  2373	LEAL -4(R9), R9
  2374	MOVW $0x0015, (AX)
  2375	MOVB R9, 2(AX)
  2376	ADDQ $0x03, AX
  2377	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2378
  2379repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
  2380	SHLL $0x02, R9
  2381	ORL  $0x01, R9
  2382	MOVW R9, (AX)
  2383	ADDQ $0x02, AX
  2384	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2385
  2386repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
  2387	XORQ SI, SI
  2388	LEAL 1(SI)(R9*4), R9
  2389	MOVB BL, 1(AX)
  2390	SARL $0x08, BX
  2391	SHLL $0x05, BX
  2392	ORL  BX, R9
  2393	MOVB R9, (AX)
  2394	ADDQ $0x02, AX
  2395	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2396
  2397four_bytes_remain_match_nolit_encodeBlockAsm4MB:
  2398	TESTL R9, R9
  2399	JZ    match_nolit_emitcopy_end_encodeBlockAsm4MB
  2400	XORL  SI, SI
  2401	LEAL  -1(SI)(R9*4), R9
  2402	MOVB  R9, (AX)
  2403	MOVL  BX, 1(AX)
  2404	ADDQ  $0x05, AX
  2405	JMP   match_nolit_emitcopy_end_encodeBlockAsm4MB
  2406
  2407two_byte_offset_match_nolit_encodeBlockAsm4MB:
  2408	CMPL R9, $0x40
  2409	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm4MB
  2410	CMPL BX, $0x00000800
  2411	JAE  long_offset_short_match_nolit_encodeBlockAsm4MB
  2412	MOVL $0x00000001, SI
  2413	LEAL 16(SI), SI
  2414	MOVB BL, 1(AX)
  2415	SHRL $0x08, BX
  2416	SHLL $0x05, BX
  2417	ORL  BX, SI
  2418	MOVB SI, (AX)
  2419	ADDQ $0x02, AX
  2420	SUBL $0x08, R9
  2421
  2422	// emitRepeat
  2423	LEAL -4(R9), R9
  2424	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2425	MOVL R9, SI
  2426	LEAL -4(R9), R9
  2427	CMPL SI, $0x08
  2428	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2429	CMPL SI, $0x0c
  2430	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2431	CMPL BX, $0x00000800
  2432	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2433
  2434cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2435	CMPL R9, $0x00000104
  2436	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2437	CMPL R9, $0x00010100
  2438	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2439	LEAL -65536(R9), R9
  2440	MOVL R9, BX
  2441	MOVW $0x001d, (AX)
  2442	MOVW R9, 2(AX)
  2443	SARL $0x10, BX
  2444	MOVB BL, 4(AX)
  2445	ADDQ $0x05, AX
  2446	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2447
  2448repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2449	LEAL -256(R9), R9
  2450	MOVW $0x0019, (AX)
  2451	MOVW R9, 2(AX)
  2452	ADDQ $0x04, AX
  2453	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2454
  2455repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2456	LEAL -4(R9), R9
  2457	MOVW $0x0015, (AX)
  2458	MOVB R9, 2(AX)
  2459	ADDQ $0x03, AX
  2460	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2461
  2462repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2463	SHLL $0x02, R9
  2464	ORL  $0x01, R9
  2465	MOVW R9, (AX)
  2466	ADDQ $0x02, AX
  2467	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2468
  2469repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2470	XORQ SI, SI
  2471	LEAL 1(SI)(R9*4), R9
  2472	MOVB BL, 1(AX)
  2473	SARL $0x08, BX
  2474	SHLL $0x05, BX
  2475	ORL  BX, R9
  2476	MOVB R9, (AX)
  2477	ADDQ $0x02, AX
  2478	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2479
  2480long_offset_short_match_nolit_encodeBlockAsm4MB:
  2481	MOVB $0xee, (AX)
  2482	MOVW BX, 1(AX)
  2483	LEAL -60(R9), R9
  2484	ADDQ $0x03, AX
  2485
  2486	// emitRepeat
  2487	MOVL R9, SI
  2488	LEAL -4(R9), R9
  2489	CMPL SI, $0x08
  2490	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2491	CMPL SI, $0x0c
  2492	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2493	CMPL BX, $0x00000800
  2494	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2495
  2496cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2497	CMPL R9, $0x00000104
  2498	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2499	CMPL R9, $0x00010100
  2500	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2501	LEAL -65536(R9), R9
  2502	MOVL R9, BX
  2503	MOVW $0x001d, (AX)
  2504	MOVW R9, 2(AX)
  2505	SARL $0x10, BX
  2506	MOVB BL, 4(AX)
  2507	ADDQ $0x05, AX
  2508	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2509
  2510repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2511	LEAL -256(R9), R9
  2512	MOVW $0x0019, (AX)
  2513	MOVW R9, 2(AX)
  2514	ADDQ $0x04, AX
  2515	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2516
  2517repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2518	LEAL -4(R9), R9
  2519	MOVW $0x0015, (AX)
  2520	MOVB R9, 2(AX)
  2521	ADDQ $0x03, AX
  2522	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2523
  2524repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2525	SHLL $0x02, R9
  2526	ORL  $0x01, R9
  2527	MOVW R9, (AX)
  2528	ADDQ $0x02, AX
  2529	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2530
  2531repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2532	XORQ SI, SI
  2533	LEAL 1(SI)(R9*4), R9
  2534	MOVB BL, 1(AX)
  2535	SARL $0x08, BX
  2536	SHLL $0x05, BX
  2537	ORL  BX, R9
  2538	MOVB R9, (AX)
  2539	ADDQ $0x02, AX
  2540	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2541
  2542two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
  2543	MOVL R9, SI
  2544	SHLL $0x02, SI
  2545	CMPL R9, $0x0c
  2546	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
  2547	CMPL BX, $0x00000800
  2548	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
  2549	LEAL -15(SI), SI
  2550	MOVB BL, 1(AX)
  2551	SHRL $0x08, BX
  2552	SHLL $0x05, BX
  2553	ORL  BX, SI
  2554	MOVB SI, (AX)
  2555	ADDQ $0x02, AX
  2556	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
  2557
  2558emit_copy_three_match_nolit_encodeBlockAsm4MB:
  2559	LEAL -2(SI), SI
  2560	MOVB SI, (AX)
  2561	MOVW BX, 1(AX)
  2562	ADDQ $0x03, AX
  2563
  2564match_nolit_emitcopy_end_encodeBlockAsm4MB:
  2565	CMPL CX, 8(SP)
  2566	JAE  emit_remainder_encodeBlockAsm4MB
  2567	MOVQ -2(DX)(CX*1), SI
  2568	CMPQ AX, (SP)
  2569	JB   match_nolit_dst_ok_encodeBlockAsm4MB
  2570	MOVQ $0x00000000, ret+48(FP)
  2571	RET
  2572
  2573match_nolit_dst_ok_encodeBlockAsm4MB:
  2574	MOVQ  $0x0000cf1bbcdcbf9b, R8
  2575	MOVQ  SI, DI
  2576	SHRQ  $0x10, SI
  2577	MOVQ  SI, BX
  2578	SHLQ  $0x10, DI
  2579	IMULQ R8, DI
  2580	SHRQ  $0x32, DI
  2581	SHLQ  $0x10, BX
  2582	IMULQ R8, BX
  2583	SHRQ  $0x32, BX
  2584	LEAL  -2(CX), R8
  2585	LEAQ  24(SP)(BX*4), R9
  2586	MOVL  (R9), BX
  2587	MOVL  R8, 24(SP)(DI*4)
  2588	MOVL  CX, (R9)
  2589	CMPL  (DX)(BX*1), SI
  2590	JEQ   match_nolit_loop_encodeBlockAsm4MB
  2591	INCL  CX
  2592	JMP   search_loop_encodeBlockAsm4MB
  2593
  2594emit_remainder_encodeBlockAsm4MB:
  2595	MOVQ src_len+32(FP), CX
  2596	SUBL 12(SP), CX
  2597	LEAQ 4(AX)(CX*1), CX
  2598	CMPQ CX, (SP)
  2599	JB   emit_remainder_ok_encodeBlockAsm4MB
  2600	MOVQ $0x00000000, ret+48(FP)
  2601	RET
  2602
  2603emit_remainder_ok_encodeBlockAsm4MB:
  2604	MOVQ src_len+32(FP), CX
  2605	MOVL 12(SP), BX
  2606	CMPL BX, CX
  2607	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm4MB
  2608	MOVL CX, SI
  2609	MOVL CX, 12(SP)
  2610	LEAQ (DX)(BX*1), CX
  2611	SUBL BX, SI
  2612	LEAL -1(SI), DX
  2613	CMPL DX, $0x3c
  2614	JB   one_byte_emit_remainder_encodeBlockAsm4MB
  2615	CMPL DX, $0x00000100
  2616	JB   two_bytes_emit_remainder_encodeBlockAsm4MB
  2617	CMPL DX, $0x00010000
  2618	JB   three_bytes_emit_remainder_encodeBlockAsm4MB
  2619	MOVL DX, BX
  2620	SHRL $0x10, BX
  2621	MOVB $0xf8, (AX)
  2622	MOVW DX, 1(AX)
  2623	MOVB BL, 3(AX)
  2624	ADDQ $0x04, AX
  2625	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
  2626
  2627three_bytes_emit_remainder_encodeBlockAsm4MB:
  2628	MOVB $0xf4, (AX)
  2629	MOVW DX, 1(AX)
  2630	ADDQ $0x03, AX
  2631	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
  2632
  2633two_bytes_emit_remainder_encodeBlockAsm4MB:
  2634	MOVB $0xf0, (AX)
  2635	MOVB DL, 1(AX)
  2636	ADDQ $0x02, AX
  2637	CMPL DX, $0x40
  2638	JB   memmove_emit_remainder_encodeBlockAsm4MB
  2639	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
  2640
  2641one_byte_emit_remainder_encodeBlockAsm4MB:
  2642	SHLB $0x02, DL
  2643	MOVB DL, (AX)
  2644	ADDQ $0x01, AX
  2645
  2646memmove_emit_remainder_encodeBlockAsm4MB:
  2647	LEAQ (AX)(SI*1), DX
  2648	MOVL SI, BX
  2649
  2650	// genMemMoveShort
  2651	CMPQ BX, $0x03
  2652	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2
  2653	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3
  2654	CMPQ BX, $0x08
  2655	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7
  2656	CMPQ BX, $0x10
  2657	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
  2658	CMPQ BX, $0x20
  2659	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
  2660	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
  2661
  2662emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
  2663	MOVB (CX), SI
  2664	MOVB -1(CX)(BX*1), CL
  2665	MOVB SI, (AX)
  2666	MOVB CL, -1(AX)(BX*1)
  2667	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2668
  2669emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
  2670	MOVW (CX), SI
  2671	MOVB 2(CX), CL
  2672	MOVW SI, (AX)
  2673	MOVB CL, 2(AX)
  2674	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2675
  2676emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
  2677	MOVL (CX), SI
  2678	MOVL -4(CX)(BX*1), CX
  2679	MOVL SI, (AX)
  2680	MOVL CX, -4(AX)(BX*1)
  2681	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2682
  2683emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
  2684	MOVQ (CX), SI
  2685	MOVQ -8(CX)(BX*1), CX
  2686	MOVQ SI, (AX)
  2687	MOVQ CX, -8(AX)(BX*1)
  2688	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2689
  2690emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
  2691	MOVOU (CX), X0
  2692	MOVOU -16(CX)(BX*1), X1
  2693	MOVOU X0, (AX)
  2694	MOVOU X1, -16(AX)(BX*1)
  2695	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2696
  2697emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
  2698	MOVOU (CX), X0
  2699	MOVOU 16(CX), X1
  2700	MOVOU -32(CX)(BX*1), X2
  2701	MOVOU -16(CX)(BX*1), X3
  2702	MOVOU X0, (AX)
  2703	MOVOU X1, 16(AX)
  2704	MOVOU X2, -32(AX)(BX*1)
  2705	MOVOU X3, -16(AX)(BX*1)
  2706
  2707memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
  2708	MOVQ DX, AX
  2709	JMP  emit_literal_done_emit_remainder_encodeBlockAsm4MB
  2710
  2711memmove_long_emit_remainder_encodeBlockAsm4MB:
  2712	LEAQ (AX)(SI*1), DX
  2713	MOVL SI, BX
  2714
  2715	// genMemMoveLong
  2716	MOVOU (CX), X0
  2717	MOVOU 16(CX), X1
  2718	MOVOU -32(CX)(BX*1), X2
  2719	MOVOU -16(CX)(BX*1), X3
  2720	MOVQ  BX, DI
  2721	SHRQ  $0x05, DI
  2722	MOVQ  AX, SI
  2723	ANDL  $0x0000001f, SI
  2724	MOVQ  $0x00000040, R8
  2725	SUBQ  SI, R8
  2726	DECQ  DI
  2727	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
  2728	LEAQ  -32(CX)(R8*1), SI
  2729	LEAQ  -32(AX)(R8*1), R9
  2730
  2731emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
  2732	MOVOU (SI), X4
  2733	MOVOU 16(SI), X5
  2734	MOVOA X4, (R9)
  2735	MOVOA X5, 16(R9)
  2736	ADDQ  $0x20, R9
  2737	ADDQ  $0x20, SI
  2738	ADDQ  $0x20, R8
  2739	DECQ  DI
  2740	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
  2741
  2742emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
  2743	MOVOU -32(CX)(R8*1), X4
  2744	MOVOU -16(CX)(R8*1), X5
  2745	MOVOA X4, -32(AX)(R8*1)
  2746	MOVOA X5, -16(AX)(R8*1)
  2747	ADDQ  $0x20, R8
  2748	CMPQ  BX, R8
  2749	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
  2750	MOVOU X0, (AX)
  2751	MOVOU X1, 16(AX)
  2752	MOVOU X2, -32(AX)(BX*1)
  2753	MOVOU X3, -16(AX)(BX*1)
  2754	MOVQ  DX, AX
  2755
  2756emit_literal_done_emit_remainder_encodeBlockAsm4MB:
  2757	MOVQ dst_base+0(FP), CX
  2758	SUBQ CX, AX
  2759	MOVQ AX, ret+48(FP)
  2760	RET
  2761
  2762// func encodeBlockAsm12B(dst []byte, src []byte) int
  2763// Requires: BMI, SSE2
  2764TEXT ·encodeBlockAsm12B(SB), $16408-56
  2765	MOVQ dst_base+0(FP), AX
  2766	MOVQ $0x00000080, CX
  2767	LEAQ 24(SP), DX
  2768	PXOR X0, X0
  2769
  2770zero_loop_encodeBlockAsm12B:
  2771	MOVOU X0, (DX)
  2772	MOVOU X0, 16(DX)
  2773	MOVOU X0, 32(DX)
  2774	MOVOU X0, 48(DX)
  2775	MOVOU X0, 64(DX)
  2776	MOVOU X0, 80(DX)
  2777	MOVOU X0, 96(DX)
  2778	MOVOU X0, 112(DX)
  2779	ADDQ  $0x80, DX
  2780	DECQ  CX
  2781	JNZ   zero_loop_encodeBlockAsm12B
  2782	MOVL  $0x00000000, 12(SP)
  2783	MOVQ  src_len+32(FP), CX
  2784	LEAQ  -9(CX), DX
  2785	LEAQ  -8(CX), BX
  2786	MOVL  BX, 8(SP)
  2787	SHRQ  $0x05, CX
  2788	SUBL  CX, DX
  2789	LEAQ  (AX)(DX*1), DX
  2790	MOVQ  DX, (SP)
  2791	MOVL  $0x00000001, CX
  2792	MOVL  CX, 16(SP)
  2793	MOVQ  src_base+24(FP), DX
  2794
  2795search_loop_encodeBlockAsm12B:
  2796	MOVL  CX, BX
  2797	SUBL  12(SP), BX
  2798	SHRL  $0x05, BX
  2799	LEAL  4(CX)(BX*1), BX
  2800	CMPL  BX, 8(SP)
  2801	JAE   emit_remainder_encodeBlockAsm12B
  2802	MOVQ  (DX)(CX*1), SI
  2803	MOVL  BX, 20(SP)
  2804	MOVQ  $0x000000cf1bbcdcbb, R8
  2805	MOVQ  SI, R9
  2806	MOVQ  SI, R10
  2807	SHRQ  $0x08, R10
  2808	SHLQ  $0x18, R9
  2809	IMULQ R8, R9
  2810	SHRQ  $0x34, R9
  2811	SHLQ  $0x18, R10
  2812	IMULQ R8, R10
  2813	SHRQ  $0x34, R10
  2814	MOVL  24(SP)(R9*4), BX
  2815	MOVL  24(SP)(R10*4), DI
  2816	MOVL  CX, 24(SP)(R9*4)
  2817	LEAL  1(CX), R9
  2818	MOVL  R9, 24(SP)(R10*4)
  2819	MOVQ  SI, R9
  2820	SHRQ  $0x10, R9
  2821	SHLQ  $0x18, R9
  2822	IMULQ R8, R9
  2823	SHRQ  $0x34, R9
  2824	MOVL  CX, R8
  2825	SUBL  16(SP), R8
  2826	MOVL  1(DX)(R8*1), R10
  2827	MOVQ  SI, R8
  2828	SHRQ  $0x08, R8
  2829	CMPL  R8, R10
  2830	JNE   no_repeat_found_encodeBlockAsm12B
  2831	LEAL  1(CX), SI
  2832	MOVL  12(SP), DI
  2833	MOVL  SI, BX
  2834	SUBL  16(SP), BX
  2835	JZ    repeat_extend_back_end_encodeBlockAsm12B
  2836
  2837repeat_extend_back_loop_encodeBlockAsm12B:
  2838	CMPL SI, DI
  2839	JBE  repeat_extend_back_end_encodeBlockAsm12B
  2840	MOVB -1(DX)(BX*1), R8
  2841	MOVB -1(DX)(SI*1), R9
  2842	CMPB R8, R9
  2843	JNE  repeat_extend_back_end_encodeBlockAsm12B
  2844	LEAL -1(SI), SI
  2845	DECL BX
  2846	JNZ  repeat_extend_back_loop_encodeBlockAsm12B
  2847
  2848repeat_extend_back_end_encodeBlockAsm12B:
  2849	MOVL SI, BX
  2850	SUBL 12(SP), BX
  2851	LEAQ 3(AX)(BX*1), BX
  2852	CMPQ BX, (SP)
  2853	JB   repeat_dst_size_check_encodeBlockAsm12B
  2854	MOVQ $0x00000000, ret+48(FP)
  2855	RET
  2856
  2857repeat_dst_size_check_encodeBlockAsm12B:
  2858	MOVL 12(SP), BX
  2859	CMPL BX, SI
  2860	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
  2861	MOVL SI, R8
  2862	MOVL SI, 12(SP)
  2863	LEAQ (DX)(BX*1), R9
  2864	SUBL BX, R8
  2865	LEAL -1(R8), BX
  2866	CMPL BX, $0x3c
  2867	JB   one_byte_repeat_emit_encodeBlockAsm12B
  2868	CMPL BX, $0x00000100
  2869	JB   two_bytes_repeat_emit_encodeBlockAsm12B
  2870	JB   three_bytes_repeat_emit_encodeBlockAsm12B
  2871
  2872three_bytes_repeat_emit_encodeBlockAsm12B:
  2873	MOVB $0xf4, (AX)
  2874	MOVW BX, 1(AX)
  2875	ADDQ $0x03, AX
  2876	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
  2877
  2878two_bytes_repeat_emit_encodeBlockAsm12B:
  2879	MOVB $0xf0, (AX)
  2880	MOVB BL, 1(AX)
  2881	ADDQ $0x02, AX
  2882	CMPL BX, $0x40
  2883	JB   memmove_repeat_emit_encodeBlockAsm12B
  2884	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
  2885
  2886one_byte_repeat_emit_encodeBlockAsm12B:
  2887	SHLB $0x02, BL
  2888	MOVB BL, (AX)
  2889	ADDQ $0x01, AX
  2890
  2891memmove_repeat_emit_encodeBlockAsm12B:
  2892	LEAQ (AX)(R8*1), BX
  2893
  2894	// genMemMoveShort
  2895	CMPQ R8, $0x08
  2896	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
  2897	CMPQ R8, $0x10
  2898	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
  2899	CMPQ R8, $0x20
  2900	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
  2901	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
  2902
  2903emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
  2904	MOVQ (R9), R10
  2905	MOVQ R10, (AX)
  2906	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
  2907
  2908emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
  2909	MOVQ (R9), R10
  2910	MOVQ -8(R9)(R8*1), R9
  2911	MOVQ R10, (AX)
  2912	MOVQ R9, -8(AX)(R8*1)
  2913	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
  2914
  2915emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
  2916	MOVOU (R9), X0
  2917	MOVOU -16(R9)(R8*1), X1
  2918	MOVOU X0, (AX)
  2919	MOVOU X1, -16(AX)(R8*1)
  2920	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B
  2921
  2922emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
  2923	MOVOU (R9), X0
  2924	MOVOU 16(R9), X1
  2925	MOVOU -32(R9)(R8*1), X2
  2926	MOVOU -16(R9)(R8*1), X3
  2927	MOVOU X0, (AX)
  2928	MOVOU X1, 16(AX)
  2929	MOVOU X2, -32(AX)(R8*1)
  2930	MOVOU X3, -16(AX)(R8*1)
  2931
  2932memmove_end_copy_repeat_emit_encodeBlockAsm12B:
  2933	MOVQ BX, AX
  2934	JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B
  2935
  2936memmove_long_repeat_emit_encodeBlockAsm12B:
  2937	LEAQ (AX)(R8*1), BX
  2938
  2939	// genMemMoveLong
  2940	MOVOU (R9), X0
  2941	MOVOU 16(R9), X1
  2942	MOVOU -32(R9)(R8*1), X2
  2943	MOVOU -16(R9)(R8*1), X3
  2944	MOVQ  R8, R11
  2945	SHRQ  $0x05, R11
  2946	MOVQ  AX, R10
  2947	ANDL  $0x0000001f, R10
  2948	MOVQ  $0x00000040, R12
  2949	SUBQ  R10, R12
  2950	DECQ  R11
  2951	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  2952	LEAQ  -32(R9)(R12*1), R10
  2953	LEAQ  -32(AX)(R12*1), R13
  2954
  2955emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
  2956	MOVOU (R10), X4
  2957	MOVOU 16(R10), X5
  2958	MOVOA X4, (R13)
  2959	MOVOA X5, 16(R13)
  2960	ADDQ  $0x20, R13
  2961	ADDQ  $0x20, R10
  2962	ADDQ  $0x20, R12
  2963	DECQ  R11
  2964	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
  2965
  2966emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
  2967	MOVOU -32(R9)(R12*1), X4
  2968	MOVOU -16(R9)(R12*1), X5
  2969	MOVOA X4, -32(AX)(R12*1)
  2970	MOVOA X5, -16(AX)(R12*1)
  2971	ADDQ  $0x20, R12
  2972	CMPQ  R8, R12
  2973	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  2974	MOVOU X0, (AX)
  2975	MOVOU X1, 16(AX)
  2976	MOVOU X2, -32(AX)(R8*1)
  2977	MOVOU X3, -16(AX)(R8*1)
  2978	MOVQ  BX, AX
  2979
  2980emit_literal_done_repeat_emit_encodeBlockAsm12B:
  2981	ADDL $0x05, CX
  2982	MOVL CX, BX
  2983	SUBL 16(SP), BX
  2984	MOVQ src_len+32(FP), R8
  2985	SUBL CX, R8
  2986	LEAQ (DX)(CX*1), R9
  2987	LEAQ (DX)(BX*1), BX
  2988
  2989	// matchLen
  2990	XORL R11, R11
  2991
  2992matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
  2993	CMPL R8, $0x10
  2994	JB   matchlen_match8_repeat_extend_encodeBlockAsm12B
  2995	MOVQ (R9)(R11*1), R10
  2996	MOVQ 8(R9)(R11*1), R12
  2997	XORQ (BX)(R11*1), R10
  2998	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
  2999	XORQ 8(BX)(R11*1), R12
  3000	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm12B
  3001	LEAL -16(R8), R8
  3002	LEAL 16(R11), R11
  3003	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
  3004
  3005matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
  3006#ifdef GOAMD64_v3
  3007	TZCNTQ R12, R12
  3008
  3009#else
  3010	BSFQ R12, R12
  3011
  3012#endif
  3013	SARQ $0x03, R12
  3014	LEAL 8(R11)(R12*1), R11
  3015	JMP  repeat_extend_forward_end_encodeBlockAsm12B
  3016
  3017matchlen_match8_repeat_extend_encodeBlockAsm12B:
  3018	CMPL R8, $0x08
  3019	JB   matchlen_match4_repeat_extend_encodeBlockAsm12B
  3020	MOVQ (R9)(R11*1), R10
  3021	XORQ (BX)(R11*1), R10
  3022	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
  3023	LEAL -8(R8), R8
  3024	LEAL 8(R11), R11
  3025	JMP  matchlen_match4_repeat_extend_encodeBlockAsm12B
  3026
  3027matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
  3028#ifdef GOAMD64_v3
  3029	TZCNTQ R10, R10
  3030
  3031#else
  3032	BSFQ R10, R10
  3033
  3034#endif
  3035	SARQ $0x03, R10
  3036	LEAL (R11)(R10*1), R11
  3037	JMP  repeat_extend_forward_end_encodeBlockAsm12B
  3038
  3039matchlen_match4_repeat_extend_encodeBlockAsm12B:
  3040	CMPL R8, $0x04
  3041	JB   matchlen_match2_repeat_extend_encodeBlockAsm12B
  3042	MOVL (R9)(R11*1), R10
  3043	CMPL (BX)(R11*1), R10
  3044	JNE  matchlen_match2_repeat_extend_encodeBlockAsm12B
  3045	LEAL -4(R8), R8
  3046	LEAL 4(R11), R11
  3047
  3048matchlen_match2_repeat_extend_encodeBlockAsm12B:
  3049	CMPL R8, $0x01
  3050	JE   matchlen_match1_repeat_extend_encodeBlockAsm12B
  3051	JB   repeat_extend_forward_end_encodeBlockAsm12B
  3052	MOVW (R9)(R11*1), R10
  3053	CMPW (BX)(R11*1), R10
  3054	JNE  matchlen_match1_repeat_extend_encodeBlockAsm12B
  3055	LEAL 2(R11), R11
  3056	SUBL $0x02, R8
  3057	JZ   repeat_extend_forward_end_encodeBlockAsm12B
  3058
  3059matchlen_match1_repeat_extend_encodeBlockAsm12B:
  3060	MOVB (R9)(R11*1), R10
  3061	CMPB (BX)(R11*1), R10
  3062	JNE  repeat_extend_forward_end_encodeBlockAsm12B
  3063	LEAL 1(R11), R11
  3064
  3065repeat_extend_forward_end_encodeBlockAsm12B:
  3066	ADDL  R11, CX
  3067	MOVL  CX, BX
  3068	SUBL  SI, BX
  3069	MOVL  16(SP), SI
  3070	TESTL DI, DI
  3071	JZ    repeat_as_copy_encodeBlockAsm12B
  3072
  3073	// emitRepeat
  3074	MOVL BX, DI
  3075	LEAL -4(BX), BX
  3076	CMPL DI, $0x08
  3077	JBE  repeat_two_match_repeat_encodeBlockAsm12B
  3078	CMPL DI, $0x0c
  3079	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
  3080	CMPL SI, $0x00000800
  3081	JB   repeat_two_offset_match_repeat_encodeBlockAsm12B
  3082
  3083cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
  3084	CMPL BX, $0x00000104
  3085	JB   repeat_three_match_repeat_encodeBlockAsm12B
  3086	LEAL -256(BX), BX
  3087	MOVW $0x0019, (AX)
  3088	MOVW BX, 2(AX)
  3089	ADDQ $0x04, AX
  3090	JMP  repeat_end_emit_encodeBlockAsm12B
  3091
  3092repeat_three_match_repeat_encodeBlockAsm12B:
  3093	LEAL -4(BX), BX
  3094	MOVW $0x0015, (AX)
  3095	MOVB BL, 2(AX)
  3096	ADDQ $0x03, AX
  3097	JMP  repeat_end_emit_encodeBlockAsm12B
  3098
  3099repeat_two_match_repeat_encodeBlockAsm12B:
  3100	SHLL $0x02, BX
  3101	ORL  $0x01, BX
  3102	MOVW BX, (AX)
  3103	ADDQ $0x02, AX
  3104	JMP  repeat_end_emit_encodeBlockAsm12B
  3105
  3106repeat_two_offset_match_repeat_encodeBlockAsm12B:
  3107	XORQ DI, DI
  3108	LEAL 1(DI)(BX*4), BX
  3109	MOVB SI, 1(AX)
  3110	SARL $0x08, SI
  3111	SHLL $0x05, SI
  3112	ORL  SI, BX
  3113	MOVB BL, (AX)
  3114	ADDQ $0x02, AX
  3115	JMP  repeat_end_emit_encodeBlockAsm12B
  3116
  3117repeat_as_copy_encodeBlockAsm12B:
  3118	// emitCopy
  3119	CMPL BX, $0x40
  3120	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
  3121	CMPL SI, $0x00000800
  3122	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm12B
  3123	MOVL $0x00000001, DI
  3124	LEAL 16(DI), DI
  3125	MOVB SI, 1(AX)
  3126	SHRL $0x08, SI
  3127	SHLL $0x05, SI
  3128	ORL  SI, DI
  3129	MOVB DI, (AX)
  3130	ADDQ $0x02, AX
  3131	SUBL $0x08, BX
  3132
  3133	// emitRepeat
  3134	LEAL -4(BX), BX
  3135	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  3136	MOVL BX, DI
  3137	LEAL -4(BX), BX
  3138	CMPL DI, $0x08
  3139	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  3140	CMPL DI, $0x0c
  3141	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  3142	CMPL SI, $0x00000800
  3143	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  3144
  3145cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  3146	CMPL BX, $0x00000104
  3147	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  3148	LEAL -256(BX), BX
  3149	MOVW $0x0019, (AX)
  3150	MOVW BX, 2(AX)
  3151	ADDQ $0x04, AX
  3152	JMP  repeat_end_emit_encodeBlockAsm12B
  3153
  3154repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  3155	LEAL -4(BX), BX
  3156	MOVW $0x0015, (AX)
  3157	MOVB BL, 2(AX)
  3158	ADDQ $0x03, AX
  3159	JMP  repeat_end_emit_encodeBlockAsm12B
  3160
  3161repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  3162	SHLL $0x02, BX
  3163	ORL  $0x01, BX
  3164	MOVW BX, (AX)
  3165	ADDQ $0x02, AX
  3166	JMP  repeat_end_emit_encodeBlockAsm12B
  3167
  3168repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  3169	XORQ DI, DI
  3170	LEAL 1(DI)(BX*4), BX
  3171	MOVB SI, 1(AX)
  3172	SARL $0x08, SI
  3173	SHLL $0x05, SI
  3174	ORL  SI, BX
  3175	MOVB BL, (AX)
  3176	ADDQ $0x02, AX
  3177	JMP  repeat_end_emit_encodeBlockAsm12B
  3178
  3179long_offset_short_repeat_as_copy_encodeBlockAsm12B:
  3180	MOVB $0xee, (AX)
  3181	MOVW SI, 1(AX)
  3182	LEAL -60(BX), BX
  3183	ADDQ $0x03, AX
  3184
  3185	// emitRepeat
  3186	MOVL BX, DI
  3187	LEAL -4(BX), BX
  3188	CMPL DI, $0x08
  3189	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  3190	CMPL DI, $0x0c
  3191	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  3192	CMPL SI, $0x00000800
  3193	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  3194
  3195cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  3196	CMPL BX, $0x00000104
  3197	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  3198	LEAL -256(BX), BX
  3199	MOVW $0x0019, (AX)
  3200	MOVW BX, 2(AX)
  3201	ADDQ $0x04, AX
  3202	JMP  repeat_end_emit_encodeBlockAsm12B
  3203
  3204repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  3205	LEAL -4(BX), BX
  3206	MOVW $0x0015, (AX)
  3207	MOVB BL, 2(AX)
  3208	ADDQ $0x03, AX
  3209	JMP  repeat_end_emit_encodeBlockAsm12B
  3210
  3211repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  3212	SHLL $0x02, BX
  3213	ORL  $0x01, BX
  3214	MOVW BX, (AX)
  3215	ADDQ $0x02, AX
  3216	JMP  repeat_end_emit_encodeBlockAsm12B
  3217
  3218repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  3219	XORQ DI, DI
  3220	LEAL 1(DI)(BX*4), BX
  3221	MOVB SI, 1(AX)
  3222	SARL $0x08, SI
  3223	SHLL $0x05, SI
  3224	ORL  SI, BX
  3225	MOVB BL, (AX)
  3226	ADDQ $0x02, AX
  3227	JMP  repeat_end_emit_encodeBlockAsm12B
  3228
  3229two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
  3230	MOVL BX, DI
  3231	SHLL $0x02, DI
  3232	CMPL BX, $0x0c
  3233	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
  3234	CMPL SI, $0x00000800
  3235	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
  3236	LEAL -15(DI), DI
  3237	MOVB SI, 1(AX)
  3238	SHRL $0x08, SI
  3239	SHLL $0x05, SI
  3240	ORL  SI, DI
  3241	MOVB DI, (AX)
  3242	ADDQ $0x02, AX
  3243	JMP  repeat_end_emit_encodeBlockAsm12B
  3244
  3245emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
  3246	LEAL -2(DI), DI
  3247	MOVB DI, (AX)
  3248	MOVW SI, 1(AX)
  3249	ADDQ $0x03, AX
  3250
  3251repeat_end_emit_encodeBlockAsm12B:
  3252	MOVL CX, 12(SP)
  3253	JMP  search_loop_encodeBlockAsm12B
  3254
  3255no_repeat_found_encodeBlockAsm12B:
  3256	CMPL (DX)(BX*1), SI
  3257	JEQ  candidate_match_encodeBlockAsm12B
  3258	SHRQ $0x08, SI
  3259	MOVL 24(SP)(R9*4), BX
  3260	LEAL 2(CX), R8
  3261	CMPL (DX)(DI*1), SI
  3262	JEQ  candidate2_match_encodeBlockAsm12B
  3263	MOVL R8, 24(SP)(R9*4)
  3264	SHRQ $0x08, SI
  3265	CMPL (DX)(BX*1), SI
  3266	JEQ  candidate3_match_encodeBlockAsm12B
  3267	MOVL 20(SP), CX
  3268	JMP  search_loop_encodeBlockAsm12B
  3269
  3270candidate3_match_encodeBlockAsm12B:
  3271	ADDL $0x02, CX
  3272	JMP  candidate_match_encodeBlockAsm12B
  3273
  3274candidate2_match_encodeBlockAsm12B:
  3275	MOVL R8, 24(SP)(R9*4)
  3276	INCL CX
  3277	MOVL DI, BX
  3278
  3279candidate_match_encodeBlockAsm12B:
  3280	MOVL  12(SP), SI
  3281	TESTL BX, BX
  3282	JZ    match_extend_back_end_encodeBlockAsm12B
  3283
  3284match_extend_back_loop_encodeBlockAsm12B:
  3285	CMPL CX, SI
  3286	JBE  match_extend_back_end_encodeBlockAsm12B
  3287	MOVB -1(DX)(BX*1), DI
  3288	MOVB -1(DX)(CX*1), R8
  3289	CMPB DI, R8
  3290	JNE  match_extend_back_end_encodeBlockAsm12B
  3291	LEAL -1(CX), CX
  3292	DECL BX
  3293	JZ   match_extend_back_end_encodeBlockAsm12B
  3294	JMP  match_extend_back_loop_encodeBlockAsm12B
  3295
  3296match_extend_back_end_encodeBlockAsm12B:
  3297	MOVL CX, SI
  3298	SUBL 12(SP), SI
  3299	LEAQ 3(AX)(SI*1), SI
  3300	CMPQ SI, (SP)
  3301	JB   match_dst_size_check_encodeBlockAsm12B
  3302	MOVQ $0x00000000, ret+48(FP)
  3303	RET
  3304
  3305match_dst_size_check_encodeBlockAsm12B:
  3306	MOVL CX, SI
  3307	MOVL 12(SP), DI
  3308	CMPL DI, SI
  3309	JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
  3310	MOVL SI, R8
  3311	MOVL SI, 12(SP)
  3312	LEAQ (DX)(DI*1), SI
  3313	SUBL DI, R8
  3314	LEAL -1(R8), DI
  3315	CMPL DI, $0x3c
  3316	JB   one_byte_match_emit_encodeBlockAsm12B
  3317	CMPL DI, $0x00000100
  3318	JB   two_bytes_match_emit_encodeBlockAsm12B
  3319	JB   three_bytes_match_emit_encodeBlockAsm12B
  3320
  3321three_bytes_match_emit_encodeBlockAsm12B:
  3322	MOVB $0xf4, (AX)
  3323	MOVW DI, 1(AX)
  3324	ADDQ $0x03, AX
  3325	JMP  memmove_long_match_emit_encodeBlockAsm12B
  3326
  3327two_bytes_match_emit_encodeBlockAsm12B:
  3328	MOVB $0xf0, (AX)
  3329	MOVB DI, 1(AX)
  3330	ADDQ $0x02, AX
  3331	CMPL DI, $0x40
  3332	JB   memmove_match_emit_encodeBlockAsm12B
  3333	JMP  memmove_long_match_emit_encodeBlockAsm12B
  3334
  3335one_byte_match_emit_encodeBlockAsm12B:
  3336	SHLB $0x02, DI
  3337	MOVB DI, (AX)
  3338	ADDQ $0x01, AX
  3339
  3340memmove_match_emit_encodeBlockAsm12B:
  3341	LEAQ (AX)(R8*1), DI
  3342
  3343	// genMemMoveShort
  3344	CMPQ R8, $0x08
  3345	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
  3346	CMPQ R8, $0x10
  3347	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
  3348	CMPQ R8, $0x20
  3349	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
  3350	JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
  3351
  3352emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
  3353	MOVQ (SI), R9
  3354	MOVQ R9, (AX)
  3355	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
  3356
  3357emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
  3358	MOVQ (SI), R9
  3359	MOVQ -8(SI)(R8*1), SI
  3360	MOVQ R9, (AX)
  3361	MOVQ SI, -8(AX)(R8*1)
  3362	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
  3363
  3364emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
  3365	MOVOU (SI), X0
  3366	MOVOU -16(SI)(R8*1), X1
  3367	MOVOU X0, (AX)
  3368	MOVOU X1, -16(AX)(R8*1)
  3369	JMP   memmove_end_copy_match_emit_encodeBlockAsm12B
  3370
  3371emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
  3372	MOVOU (SI), X0
  3373	MOVOU 16(SI), X1
  3374	MOVOU -32(SI)(R8*1), X2
  3375	MOVOU -16(SI)(R8*1), X3
  3376	MOVOU X0, (AX)
  3377	MOVOU X1, 16(AX)
  3378	MOVOU X2, -32(AX)(R8*1)
  3379	MOVOU X3, -16(AX)(R8*1)
  3380
  3381memmove_end_copy_match_emit_encodeBlockAsm12B:
  3382	MOVQ DI, AX
  3383	JMP  emit_literal_done_match_emit_encodeBlockAsm12B
  3384
  3385memmove_long_match_emit_encodeBlockAsm12B:
  3386	LEAQ (AX)(R8*1), DI
  3387
  3388	// genMemMoveLong
  3389	MOVOU (SI), X0
  3390	MOVOU 16(SI), X1
  3391	MOVOU -32(SI)(R8*1), X2
  3392	MOVOU -16(SI)(R8*1), X3
  3393	MOVQ  R8, R10
  3394	SHRQ  $0x05, R10
  3395	MOVQ  AX, R9
  3396	ANDL  $0x0000001f, R9
  3397	MOVQ  $0x00000040, R11
  3398	SUBQ  R9, R11
  3399	DECQ  R10
  3400	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  3401	LEAQ  -32(SI)(R11*1), R9
  3402	LEAQ  -32(AX)(R11*1), R12
  3403
  3404emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
  3405	MOVOU (R9), X4
  3406	MOVOU 16(R9), X5
  3407	MOVOA X4, (R12)
  3408	MOVOA X5, 16(R12)
  3409	ADDQ  $0x20, R12
  3410	ADDQ  $0x20, R9
  3411	ADDQ  $0x20, R11
  3412	DECQ  R10
  3413	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
  3414
  3415emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
  3416	MOVOU -32(SI)(R11*1), X4
  3417	MOVOU -16(SI)(R11*1), X5
  3418	MOVOA X4, -32(AX)(R11*1)
  3419	MOVOA X5, -16(AX)(R11*1)
  3420	ADDQ  $0x20, R11
  3421	CMPQ  R8, R11
  3422	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  3423	MOVOU X0, (AX)
  3424	MOVOU X1, 16(AX)
  3425	MOVOU X2, -32(AX)(R8*1)
  3426	MOVOU X3, -16(AX)(R8*1)
  3427	MOVQ  DI, AX
  3428
  3429emit_literal_done_match_emit_encodeBlockAsm12B:
  3430match_nolit_loop_encodeBlockAsm12B:
  3431	MOVL CX, SI
  3432	SUBL BX, SI
  3433	MOVL SI, 16(SP)
  3434	ADDL $0x04, CX
  3435	ADDL $0x04, BX
  3436	MOVQ src_len+32(FP), SI
  3437	SUBL CX, SI
  3438	LEAQ (DX)(CX*1), DI
  3439	LEAQ (DX)(BX*1), BX
  3440
  3441	// matchLen
  3442	XORL R9, R9
  3443
  3444matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
  3445	CMPL SI, $0x10
  3446	JB   matchlen_match8_match_nolit_encodeBlockAsm12B
  3447	MOVQ (DI)(R9*1), R8
  3448	MOVQ 8(DI)(R9*1), R10
  3449	XORQ (BX)(R9*1), R8
  3450	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
  3451	XORQ 8(BX)(R9*1), R10
  3452	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm12B
  3453	LEAL -16(SI), SI
  3454	LEAL 16(R9), R9
  3455	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm12B
  3456
  3457matchlen_bsf_16match_nolit_encodeBlockAsm12B:
  3458#ifdef GOAMD64_v3
  3459	TZCNTQ R10, R10
  3460
  3461#else
  3462	BSFQ R10, R10
  3463
  3464#endif
  3465	SARQ $0x03, R10
  3466	LEAL 8(R9)(R10*1), R9
  3467	JMP  match_nolit_end_encodeBlockAsm12B
  3468
  3469matchlen_match8_match_nolit_encodeBlockAsm12B:
  3470	CMPL SI, $0x08
  3471	JB   matchlen_match4_match_nolit_encodeBlockAsm12B
  3472	MOVQ (DI)(R9*1), R8
  3473	XORQ (BX)(R9*1), R8
  3474	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
  3475	LEAL -8(SI), SI
  3476	LEAL 8(R9), R9
  3477	JMP  matchlen_match4_match_nolit_encodeBlockAsm12B
  3478
  3479matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
  3480#ifdef GOAMD64_v3
  3481	TZCNTQ R8, R8
  3482
  3483#else
  3484	BSFQ R8, R8
  3485
  3486#endif
  3487	SARQ $0x03, R8
  3488	LEAL (R9)(R8*1), R9
  3489	JMP  match_nolit_end_encodeBlockAsm12B
  3490
  3491matchlen_match4_match_nolit_encodeBlockAsm12B:
  3492	CMPL SI, $0x04
  3493	JB   matchlen_match2_match_nolit_encodeBlockAsm12B
  3494	MOVL (DI)(R9*1), R8
  3495	CMPL (BX)(R9*1), R8
  3496	JNE  matchlen_match2_match_nolit_encodeBlockAsm12B
  3497	LEAL -4(SI), SI
  3498	LEAL 4(R9), R9
  3499
  3500matchlen_match2_match_nolit_encodeBlockAsm12B:
  3501	CMPL SI, $0x01
  3502	JE   matchlen_match1_match_nolit_encodeBlockAsm12B
  3503	JB   match_nolit_end_encodeBlockAsm12B
  3504	MOVW (DI)(R9*1), R8
  3505	CMPW (BX)(R9*1), R8
  3506	JNE  matchlen_match1_match_nolit_encodeBlockAsm12B
  3507	LEAL 2(R9), R9
  3508	SUBL $0x02, SI
  3509	JZ   match_nolit_end_encodeBlockAsm12B
  3510
  3511matchlen_match1_match_nolit_encodeBlockAsm12B:
  3512	MOVB (DI)(R9*1), R8
  3513	CMPB (BX)(R9*1), R8
  3514	JNE  match_nolit_end_encodeBlockAsm12B
  3515	LEAL 1(R9), R9
  3516
  3517match_nolit_end_encodeBlockAsm12B:
  3518	ADDL R9, CX
  3519	MOVL 16(SP), BX
  3520	ADDL $0x04, R9
  3521	MOVL CX, 12(SP)
  3522
  3523	// emitCopy
  3524	CMPL R9, $0x40
  3525	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
  3526	CMPL BX, $0x00000800
  3527	JAE  long_offset_short_match_nolit_encodeBlockAsm12B
  3528	MOVL $0x00000001, SI
  3529	LEAL 16(SI), SI
  3530	MOVB BL, 1(AX)
  3531	SHRL $0x08, BX
  3532	SHLL $0x05, BX
  3533	ORL  BX, SI
  3534	MOVB SI, (AX)
  3535	ADDQ $0x02, AX
  3536	SUBL $0x08, R9
  3537
  3538	// emitRepeat
  3539	LEAL -4(R9), R9
  3540	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3541	MOVL R9, SI
  3542	LEAL -4(R9), R9
  3543	CMPL SI, $0x08
  3544	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3545	CMPL SI, $0x0c
  3546	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3547	CMPL BX, $0x00000800
  3548	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3549
  3550cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3551	CMPL R9, $0x00000104
  3552	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3553	LEAL -256(R9), R9
  3554	MOVW $0x0019, (AX)
  3555	MOVW R9, 2(AX)
  3556	ADDQ $0x04, AX
  3557	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3558
  3559repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3560	LEAL -4(R9), R9
  3561	MOVW $0x0015, (AX)
  3562	MOVB R9, 2(AX)
  3563	ADDQ $0x03, AX
  3564	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3565
  3566repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3567	SHLL $0x02, R9
  3568	ORL  $0x01, R9
  3569	MOVW R9, (AX)
  3570	ADDQ $0x02, AX
  3571	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3572
  3573repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3574	XORQ SI, SI
  3575	LEAL 1(SI)(R9*4), R9
  3576	MOVB BL, 1(AX)
  3577	SARL $0x08, BX
  3578	SHLL $0x05, BX
  3579	ORL  BX, R9
  3580	MOVB R9, (AX)
  3581	ADDQ $0x02, AX
  3582	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3583
  3584long_offset_short_match_nolit_encodeBlockAsm12B:
  3585	MOVB $0xee, (AX)
  3586	MOVW BX, 1(AX)
  3587	LEAL -60(R9), R9
  3588	ADDQ $0x03, AX
  3589
  3590	// emitRepeat
  3591	MOVL R9, SI
  3592	LEAL -4(R9), R9
  3593	CMPL SI, $0x08
  3594	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
  3595	CMPL SI, $0x0c
  3596	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
  3597	CMPL BX, $0x00000800
  3598	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
  3599
  3600cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3601	CMPL R9, $0x00000104
  3602	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
  3603	LEAL -256(R9), R9
  3604	MOVW $0x0019, (AX)
  3605	MOVW R9, 2(AX)
  3606	ADDQ $0x04, AX
  3607	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3608
  3609repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3610	LEAL -4(R9), R9
  3611	MOVW $0x0015, (AX)
  3612	MOVB R9, 2(AX)
  3613	ADDQ $0x03, AX
  3614	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3615
  3616repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3617	SHLL $0x02, R9
  3618	ORL  $0x01, R9
  3619	MOVW R9, (AX)
  3620	ADDQ $0x02, AX
  3621	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3622
  3623repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3624	XORQ SI, SI
  3625	LEAL 1(SI)(R9*4), R9
  3626	MOVB BL, 1(AX)
  3627	SARL $0x08, BX
  3628	SHLL $0x05, BX
  3629	ORL  BX, R9
  3630	MOVB R9, (AX)
  3631	ADDQ $0x02, AX
  3632	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3633
  3634two_byte_offset_short_match_nolit_encodeBlockAsm12B:
  3635	MOVL R9, SI
  3636	SHLL $0x02, SI
  3637	CMPL R9, $0x0c
  3638	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
  3639	CMPL BX, $0x00000800
  3640	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
  3641	LEAL -15(SI), SI
  3642	MOVB BL, 1(AX)
  3643	SHRL $0x08, BX
  3644	SHLL $0x05, BX
  3645	ORL  BX, SI
  3646	MOVB SI, (AX)
  3647	ADDQ $0x02, AX
  3648	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
  3649
  3650emit_copy_three_match_nolit_encodeBlockAsm12B:
  3651	LEAL -2(SI), SI
  3652	MOVB SI, (AX)
  3653	MOVW BX, 1(AX)
  3654	ADDQ $0x03, AX
  3655
  3656match_nolit_emitcopy_end_encodeBlockAsm12B:
  3657	CMPL CX, 8(SP)
  3658	JAE  emit_remainder_encodeBlockAsm12B
  3659	MOVQ -2(DX)(CX*1), SI
  3660	CMPQ AX, (SP)
  3661	JB   match_nolit_dst_ok_encodeBlockAsm12B
  3662	MOVQ $0x00000000, ret+48(FP)
  3663	RET
  3664
  3665match_nolit_dst_ok_encodeBlockAsm12B:
  3666	MOVQ  $0x000000cf1bbcdcbb, R8
  3667	MOVQ  SI, DI
  3668	SHRQ  $0x10, SI
  3669	MOVQ  SI, BX
  3670	SHLQ  $0x18, DI
  3671	IMULQ R8, DI
  3672	SHRQ  $0x34, DI
  3673	SHLQ  $0x18, BX
  3674	IMULQ R8, BX
  3675	SHRQ  $0x34, BX
  3676	LEAL  -2(CX), R8
  3677	LEAQ  24(SP)(BX*4), R9
  3678	MOVL  (R9), BX
  3679	MOVL  R8, 24(SP)(DI*4)
  3680	MOVL  CX, (R9)
  3681	CMPL  (DX)(BX*1), SI
  3682	JEQ   match_nolit_loop_encodeBlockAsm12B
  3683	INCL  CX
  3684	JMP   search_loop_encodeBlockAsm12B
  3685
  3686emit_remainder_encodeBlockAsm12B:
  3687	MOVQ src_len+32(FP), CX
  3688	SUBL 12(SP), CX
  3689	LEAQ 3(AX)(CX*1), CX
  3690	CMPQ CX, (SP)
  3691	JB   emit_remainder_ok_encodeBlockAsm12B
  3692	MOVQ $0x00000000, ret+48(FP)
  3693	RET
  3694
  3695emit_remainder_ok_encodeBlockAsm12B:
  3696	MOVQ src_len+32(FP), CX
  3697	MOVL 12(SP), BX
  3698	CMPL BX, CX
  3699	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
  3700	MOVL CX, SI
  3701	MOVL CX, 12(SP)
  3702	LEAQ (DX)(BX*1), CX
  3703	SUBL BX, SI
  3704	LEAL -1(SI), DX
  3705	CMPL DX, $0x3c
  3706	JB   one_byte_emit_remainder_encodeBlockAsm12B
  3707	CMPL DX, $0x00000100
  3708	JB   two_bytes_emit_remainder_encodeBlockAsm12B
  3709	JB   three_bytes_emit_remainder_encodeBlockAsm12B
  3710
  3711three_bytes_emit_remainder_encodeBlockAsm12B:
  3712	MOVB $0xf4, (AX)
  3713	MOVW DX, 1(AX)
  3714	ADDQ $0x03, AX
  3715	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
  3716
  3717two_bytes_emit_remainder_encodeBlockAsm12B:
  3718	MOVB $0xf0, (AX)
  3719	MOVB DL, 1(AX)
  3720	ADDQ $0x02, AX
  3721	CMPL DX, $0x40
  3722	JB   memmove_emit_remainder_encodeBlockAsm12B
  3723	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
  3724
  3725one_byte_emit_remainder_encodeBlockAsm12B:
  3726	SHLB $0x02, DL
  3727	MOVB DL, (AX)
  3728	ADDQ $0x01, AX
  3729
  3730memmove_emit_remainder_encodeBlockAsm12B:
  3731	LEAQ (AX)(SI*1), DX
  3732	MOVL SI, BX
  3733
  3734	// genMemMoveShort
  3735	CMPQ BX, $0x03
  3736	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
  3737	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
  3738	CMPQ BX, $0x08
  3739	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
  3740	CMPQ BX, $0x10
  3741	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
  3742	CMPQ BX, $0x20
  3743	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
  3744	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
  3745
  3746emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
  3747	MOVB (CX), SI
  3748	MOVB -1(CX)(BX*1), CL
  3749	MOVB SI, (AX)
  3750	MOVB CL, -1(AX)(BX*1)
  3751	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3752
  3753emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
  3754	MOVW (CX), SI
  3755	MOVB 2(CX), CL
  3756	MOVW SI, (AX)
  3757	MOVB CL, 2(AX)
  3758	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3759
  3760emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
  3761	MOVL (CX), SI
  3762	MOVL -4(CX)(BX*1), CX
  3763	MOVL SI, (AX)
  3764	MOVL CX, -4(AX)(BX*1)
  3765	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3766
  3767emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
  3768	MOVQ (CX), SI
  3769	MOVQ -8(CX)(BX*1), CX
  3770	MOVQ SI, (AX)
  3771	MOVQ CX, -8(AX)(BX*1)
  3772	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3773
  3774emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
  3775	MOVOU (CX), X0
  3776	MOVOU -16(CX)(BX*1), X1
  3777	MOVOU X0, (AX)
  3778	MOVOU X1, -16(AX)(BX*1)
  3779	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3780
  3781emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
  3782	MOVOU (CX), X0
  3783	MOVOU 16(CX), X1
  3784	MOVOU -32(CX)(BX*1), X2
  3785	MOVOU -16(CX)(BX*1), X3
  3786	MOVOU X0, (AX)
  3787	MOVOU X1, 16(AX)
  3788	MOVOU X2, -32(AX)(BX*1)
  3789	MOVOU X3, -16(AX)(BX*1)
  3790
  3791memmove_end_copy_emit_remainder_encodeBlockAsm12B:
  3792	MOVQ DX, AX
  3793	JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B
  3794
  3795memmove_long_emit_remainder_encodeBlockAsm12B:
  3796	LEAQ (AX)(SI*1), DX
  3797	MOVL SI, BX
  3798
  3799	// genMemMoveLong
  3800	MOVOU (CX), X0
  3801	MOVOU 16(CX), X1
  3802	MOVOU -32(CX)(BX*1), X2
  3803	MOVOU -16(CX)(BX*1), X3
  3804	MOVQ  BX, DI
  3805	SHRQ  $0x05, DI
  3806	MOVQ  AX, SI
  3807	ANDL  $0x0000001f, SI
  3808	MOVQ  $0x00000040, R8
  3809	SUBQ  SI, R8
  3810	DECQ  DI
  3811	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
  3812	LEAQ  -32(CX)(R8*1), SI
  3813	LEAQ  -32(AX)(R8*1), R9
  3814
  3815emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
  3816	MOVOU (SI), X4
  3817	MOVOU 16(SI), X5
  3818	MOVOA X4, (R9)
  3819	MOVOA X5, 16(R9)
  3820	ADDQ  $0x20, R9
  3821	ADDQ  $0x20, SI
  3822	ADDQ  $0x20, R8
  3823	DECQ  DI
  3824	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
  3825
  3826emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
  3827	MOVOU -32(CX)(R8*1), X4
  3828	MOVOU -16(CX)(R8*1), X5
  3829	MOVOA X4, -32(AX)(R8*1)
  3830	MOVOA X5, -16(AX)(R8*1)
  3831	ADDQ  $0x20, R8
  3832	CMPQ  BX, R8
  3833	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
  3834	MOVOU X0, (AX)
  3835	MOVOU X1, 16(AX)
  3836	MOVOU X2, -32(AX)(BX*1)
  3837	MOVOU X3, -16(AX)(BX*1)
  3838	MOVQ  DX, AX
  3839
  3840emit_literal_done_emit_remainder_encodeBlockAsm12B:
  3841	MOVQ dst_base+0(FP), CX
  3842	SUBQ CX, AX
  3843	MOVQ AX, ret+48(FP)
  3844	RET
  3845
  3846// func encodeBlockAsm10B(dst []byte, src []byte) int
  3847// Requires: BMI, SSE2
  3848TEXT ·encodeBlockAsm10B(SB), $4120-56
  3849	MOVQ dst_base+0(FP), AX
  3850	MOVQ $0x00000020, CX
  3851	LEAQ 24(SP), DX
  3852	PXOR X0, X0
  3853
  3854zero_loop_encodeBlockAsm10B:
  3855	MOVOU X0, (DX)
  3856	MOVOU X0, 16(DX)
  3857	MOVOU X0, 32(DX)
  3858	MOVOU X0, 48(DX)
  3859	MOVOU X0, 64(DX)
  3860	MOVOU X0, 80(DX)
  3861	MOVOU X0, 96(DX)
  3862	MOVOU X0, 112(DX)
  3863	ADDQ  $0x80, DX
  3864	DECQ  CX
  3865	JNZ   zero_loop_encodeBlockAsm10B
  3866	MOVL  $0x00000000, 12(SP)
  3867	MOVQ  src_len+32(FP), CX
  3868	LEAQ  -9(CX), DX
  3869	LEAQ  -8(CX), BX
  3870	MOVL  BX, 8(SP)
  3871	SHRQ  $0x05, CX
  3872	SUBL  CX, DX
  3873	LEAQ  (AX)(DX*1), DX
  3874	MOVQ  DX, (SP)
  3875	MOVL  $0x00000001, CX
  3876	MOVL  CX, 16(SP)
  3877	MOVQ  src_base+24(FP), DX
  3878
  3879search_loop_encodeBlockAsm10B:
  3880	MOVL  CX, BX
  3881	SUBL  12(SP), BX
  3882	SHRL  $0x05, BX
  3883	LEAL  4(CX)(BX*1), BX
  3884	CMPL  BX, 8(SP)
  3885	JAE   emit_remainder_encodeBlockAsm10B
  3886	MOVQ  (DX)(CX*1), SI
  3887	MOVL  BX, 20(SP)
  3888	MOVQ  $0x9e3779b1, R8
  3889	MOVQ  SI, R9
  3890	MOVQ  SI, R10
  3891	SHRQ  $0x08, R10
  3892	SHLQ  $0x20, R9
  3893	IMULQ R8, R9
  3894	SHRQ  $0x36, R9
  3895	SHLQ  $0x20, R10
  3896	IMULQ R8, R10
  3897	SHRQ  $0x36, R10
  3898	MOVL  24(SP)(R9*4), BX
  3899	MOVL  24(SP)(R10*4), DI
  3900	MOVL  CX, 24(SP)(R9*4)
  3901	LEAL  1(CX), R9
  3902	MOVL  R9, 24(SP)(R10*4)
  3903	MOVQ  SI, R9
  3904	SHRQ  $0x10, R9
  3905	SHLQ  $0x20, R9
  3906	IMULQ R8, R9
  3907	SHRQ  $0x36, R9
  3908	MOVL  CX, R8
  3909	SUBL  16(SP), R8
  3910	MOVL  1(DX)(R8*1), R10
  3911	MOVQ  SI, R8
  3912	SHRQ  $0x08, R8
  3913	CMPL  R8, R10
  3914	JNE   no_repeat_found_encodeBlockAsm10B
  3915	LEAL  1(CX), SI
  3916	MOVL  12(SP), DI
  3917	MOVL  SI, BX
  3918	SUBL  16(SP), BX
  3919	JZ    repeat_extend_back_end_encodeBlockAsm10B
  3920
  3921repeat_extend_back_loop_encodeBlockAsm10B:
  3922	CMPL SI, DI
  3923	JBE  repeat_extend_back_end_encodeBlockAsm10B
  3924	MOVB -1(DX)(BX*1), R8
  3925	MOVB -1(DX)(SI*1), R9
  3926	CMPB R8, R9
  3927	JNE  repeat_extend_back_end_encodeBlockAsm10B
  3928	LEAL -1(SI), SI
  3929	DECL BX
  3930	JNZ  repeat_extend_back_loop_encodeBlockAsm10B
  3931
  3932repeat_extend_back_end_encodeBlockAsm10B:
  3933	MOVL SI, BX
  3934	SUBL 12(SP), BX
  3935	LEAQ 3(AX)(BX*1), BX
  3936	CMPQ BX, (SP)
  3937	JB   repeat_dst_size_check_encodeBlockAsm10B
  3938	MOVQ $0x00000000, ret+48(FP)
  3939	RET
  3940
  3941repeat_dst_size_check_encodeBlockAsm10B:
  3942	MOVL 12(SP), BX
  3943	CMPL BX, SI
  3944	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
  3945	MOVL SI, R8
  3946	MOVL SI, 12(SP)
  3947	LEAQ (DX)(BX*1), R9
  3948	SUBL BX, R8
  3949	LEAL -1(R8), BX
  3950	CMPL BX, $0x3c
  3951	JB   one_byte_repeat_emit_encodeBlockAsm10B
  3952	CMPL BX, $0x00000100
  3953	JB   two_bytes_repeat_emit_encodeBlockAsm10B
  3954	JB   three_bytes_repeat_emit_encodeBlockAsm10B
  3955
  3956three_bytes_repeat_emit_encodeBlockAsm10B:
  3957	MOVB $0xf4, (AX)
  3958	MOVW BX, 1(AX)
  3959	ADDQ $0x03, AX
  3960	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
  3961
  3962two_bytes_repeat_emit_encodeBlockAsm10B:
  3963	MOVB $0xf0, (AX)
  3964	MOVB BL, 1(AX)
  3965	ADDQ $0x02, AX
  3966	CMPL BX, $0x40
  3967	JB   memmove_repeat_emit_encodeBlockAsm10B
  3968	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
  3969
  3970one_byte_repeat_emit_encodeBlockAsm10B:
  3971	SHLB $0x02, BL
  3972	MOVB BL, (AX)
  3973	ADDQ $0x01, AX
  3974
  3975memmove_repeat_emit_encodeBlockAsm10B:
  3976	LEAQ (AX)(R8*1), BX
  3977
  3978	// genMemMoveShort
  3979	CMPQ R8, $0x08
  3980	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
  3981	CMPQ R8, $0x10
  3982	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
  3983	CMPQ R8, $0x20
  3984	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
  3985	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
  3986
  3987emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
  3988	MOVQ (R9), R10
  3989	MOVQ R10, (AX)
  3990	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
  3991
  3992emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
  3993	MOVQ (R9), R10
  3994	MOVQ -8(R9)(R8*1), R9
  3995	MOVQ R10, (AX)
  3996	MOVQ R9, -8(AX)(R8*1)
  3997	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
  3998
  3999emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
  4000	MOVOU (R9), X0
  4001	MOVOU -16(R9)(R8*1), X1
  4002	MOVOU X0, (AX)
  4003	MOVOU X1, -16(AX)(R8*1)
  4004	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B
  4005
  4006emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
  4007	MOVOU (R9), X0
  4008	MOVOU 16(R9), X1
  4009	MOVOU -32(R9)(R8*1), X2
  4010	MOVOU -16(R9)(R8*1), X3
  4011	MOVOU X0, (AX)
  4012	MOVOU X1, 16(AX)
  4013	MOVOU X2, -32(AX)(R8*1)
  4014	MOVOU X3, -16(AX)(R8*1)
  4015
  4016memmove_end_copy_repeat_emit_encodeBlockAsm10B:
  4017	MOVQ BX, AX
  4018	JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B
  4019
  4020memmove_long_repeat_emit_encodeBlockAsm10B:
  4021	LEAQ (AX)(R8*1), BX
  4022
  4023	// genMemMoveLong
  4024	MOVOU (R9), X0
  4025	MOVOU 16(R9), X1
  4026	MOVOU -32(R9)(R8*1), X2
  4027	MOVOU -16(R9)(R8*1), X3
  4028	MOVQ  R8, R11
  4029	SHRQ  $0x05, R11
  4030	MOVQ  AX, R10
  4031	ANDL  $0x0000001f, R10
  4032	MOVQ  $0x00000040, R12
  4033	SUBQ  R10, R12
  4034	DECQ  R11
  4035	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  4036	LEAQ  -32(R9)(R12*1), R10
  4037	LEAQ  -32(AX)(R12*1), R13
  4038
  4039emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
  4040	MOVOU (R10), X4
  4041	MOVOU 16(R10), X5
  4042	MOVOA X4, (R13)
  4043	MOVOA X5, 16(R13)
  4044	ADDQ  $0x20, R13
  4045	ADDQ  $0x20, R10
  4046	ADDQ  $0x20, R12
  4047	DECQ  R11
  4048	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
  4049
  4050emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
  4051	MOVOU -32(R9)(R12*1), X4
  4052	MOVOU -16(R9)(R12*1), X5
  4053	MOVOA X4, -32(AX)(R12*1)
  4054	MOVOA X5, -16(AX)(R12*1)
  4055	ADDQ  $0x20, R12
  4056	CMPQ  R8, R12
  4057	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  4058	MOVOU X0, (AX)
  4059	MOVOU X1, 16(AX)
  4060	MOVOU X2, -32(AX)(R8*1)
  4061	MOVOU X3, -16(AX)(R8*1)
  4062	MOVQ  BX, AX
  4063
  4064emit_literal_done_repeat_emit_encodeBlockAsm10B:
  4065	ADDL $0x05, CX
  4066	MOVL CX, BX
  4067	SUBL 16(SP), BX
  4068	MOVQ src_len+32(FP), R8
  4069	SUBL CX, R8
  4070	LEAQ (DX)(CX*1), R9
  4071	LEAQ (DX)(BX*1), BX
  4072
  4073	// matchLen
  4074	XORL R11, R11
  4075
  4076matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
  4077	CMPL R8, $0x10
  4078	JB   matchlen_match8_repeat_extend_encodeBlockAsm10B
  4079	MOVQ (R9)(R11*1), R10
  4080	MOVQ 8(R9)(R11*1), R12
  4081	XORQ (BX)(R11*1), R10
  4082	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
  4083	XORQ 8(BX)(R11*1), R12
  4084	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm10B
  4085	LEAL -16(R8), R8
  4086	LEAL 16(R11), R11
  4087	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
  4088
  4089matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
  4090#ifdef GOAMD64_v3
  4091	TZCNTQ R12, R12
  4092
  4093#else
  4094	BSFQ R12, R12
  4095
  4096#endif
  4097	SARQ $0x03, R12
  4098	LEAL 8(R11)(R12*1), R11
  4099	JMP  repeat_extend_forward_end_encodeBlockAsm10B
  4100
  4101matchlen_match8_repeat_extend_encodeBlockAsm10B:
  4102	CMPL R8, $0x08
  4103	JB   matchlen_match4_repeat_extend_encodeBlockAsm10B
  4104	MOVQ (R9)(R11*1), R10
  4105	XORQ (BX)(R11*1), R10
  4106	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
  4107	LEAL -8(R8), R8
  4108	LEAL 8(R11), R11
  4109	JMP  matchlen_match4_repeat_extend_encodeBlockAsm10B
  4110
  4111matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
  4112#ifdef GOAMD64_v3
  4113	TZCNTQ R10, R10
  4114
  4115#else
  4116	BSFQ R10, R10
  4117
  4118#endif
  4119	SARQ $0x03, R10
  4120	LEAL (R11)(R10*1), R11
  4121	JMP  repeat_extend_forward_end_encodeBlockAsm10B
  4122
  4123matchlen_match4_repeat_extend_encodeBlockAsm10B:
  4124	CMPL R8, $0x04
  4125	JB   matchlen_match2_repeat_extend_encodeBlockAsm10B
  4126	MOVL (R9)(R11*1), R10
  4127	CMPL (BX)(R11*1), R10
  4128	JNE  matchlen_match2_repeat_extend_encodeBlockAsm10B
  4129	LEAL -4(R8), R8
  4130	LEAL 4(R11), R11
  4131
  4132matchlen_match2_repeat_extend_encodeBlockAsm10B:
  4133	CMPL R8, $0x01
  4134	JE   matchlen_match1_repeat_extend_encodeBlockAsm10B
  4135	JB   repeat_extend_forward_end_encodeBlockAsm10B
  4136	MOVW (R9)(R11*1), R10
  4137	CMPW (BX)(R11*1), R10
  4138	JNE  matchlen_match1_repeat_extend_encodeBlockAsm10B
  4139	LEAL 2(R11), R11
  4140	SUBL $0x02, R8
  4141	JZ   repeat_extend_forward_end_encodeBlockAsm10B
  4142
  4143matchlen_match1_repeat_extend_encodeBlockAsm10B:
  4144	MOVB (R9)(R11*1), R10
  4145	CMPB (BX)(R11*1), R10
  4146	JNE  repeat_extend_forward_end_encodeBlockAsm10B
  4147	LEAL 1(R11), R11
  4148
  4149repeat_extend_forward_end_encodeBlockAsm10B:
  4150	ADDL  R11, CX
  4151	MOVL  CX, BX
  4152	SUBL  SI, BX
  4153	MOVL  16(SP), SI
  4154	TESTL DI, DI
  4155	JZ    repeat_as_copy_encodeBlockAsm10B
  4156
  4157	// emitRepeat
  4158	MOVL BX, DI
  4159	LEAL -4(BX), BX
  4160	CMPL DI, $0x08
  4161	JBE  repeat_two_match_repeat_encodeBlockAsm10B
  4162	CMPL DI, $0x0c
  4163	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
  4164	CMPL SI, $0x00000800
  4165	JB   repeat_two_offset_match_repeat_encodeBlockAsm10B
  4166
  4167cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
  4168	CMPL BX, $0x00000104
  4169	JB   repeat_three_match_repeat_encodeBlockAsm10B
  4170	LEAL -256(BX), BX
  4171	MOVW $0x0019, (AX)
  4172	MOVW BX, 2(AX)
  4173	ADDQ $0x04, AX
  4174	JMP  repeat_end_emit_encodeBlockAsm10B
  4175
  4176repeat_three_match_repeat_encodeBlockAsm10B:
  4177	LEAL -4(BX), BX
  4178	MOVW $0x0015, (AX)
  4179	MOVB BL, 2(AX)
  4180	ADDQ $0x03, AX
  4181	JMP  repeat_end_emit_encodeBlockAsm10B
  4182
  4183repeat_two_match_repeat_encodeBlockAsm10B:
  4184	SHLL $0x02, BX
  4185	ORL  $0x01, BX
  4186	MOVW BX, (AX)
  4187	ADDQ $0x02, AX
  4188	JMP  repeat_end_emit_encodeBlockAsm10B
  4189
  4190repeat_two_offset_match_repeat_encodeBlockAsm10B:
  4191	XORQ DI, DI
  4192	LEAL 1(DI)(BX*4), BX
  4193	MOVB SI, 1(AX)
  4194	SARL $0x08, SI
  4195	SHLL $0x05, SI
  4196	ORL  SI, BX
  4197	MOVB BL, (AX)
  4198	ADDQ $0x02, AX
  4199	JMP  repeat_end_emit_encodeBlockAsm10B
  4200
  4201repeat_as_copy_encodeBlockAsm10B:
  4202	// emitCopy
  4203	CMPL BX, $0x40
  4204	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
  4205	CMPL SI, $0x00000800
  4206	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm10B
  4207	MOVL $0x00000001, DI
  4208	LEAL 16(DI), DI
  4209	MOVB SI, 1(AX)
  4210	SHRL $0x08, SI
  4211	SHLL $0x05, SI
  4212	ORL  SI, DI
  4213	MOVB DI, (AX)
  4214	ADDQ $0x02, AX
  4215	SUBL $0x08, BX
  4216
  4217	// emitRepeat
  4218	LEAL -4(BX), BX
  4219	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  4220	MOVL BX, DI
  4221	LEAL -4(BX), BX
  4222	CMPL DI, $0x08
  4223	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  4224	CMPL DI, $0x0c
  4225	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  4226	CMPL SI, $0x00000800
  4227	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  4228
  4229cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  4230	CMPL BX, $0x00000104
  4231	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  4232	LEAL -256(BX), BX
  4233	MOVW $0x0019, (AX)
  4234	MOVW BX, 2(AX)
  4235	ADDQ $0x04, AX
  4236	JMP  repeat_end_emit_encodeBlockAsm10B
  4237
  4238repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  4239	LEAL -4(BX), BX
  4240	MOVW $0x0015, (AX)
  4241	MOVB BL, 2(AX)
  4242	ADDQ $0x03, AX
  4243	JMP  repeat_end_emit_encodeBlockAsm10B
  4244
  4245repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  4246	SHLL $0x02, BX
  4247	ORL  $0x01, BX
  4248	MOVW BX, (AX)
  4249	ADDQ $0x02, AX
  4250	JMP  repeat_end_emit_encodeBlockAsm10B
  4251
  4252repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  4253	XORQ DI, DI
  4254	LEAL 1(DI)(BX*4), BX
  4255	MOVB SI, 1(AX)
  4256	SARL $0x08, SI
  4257	SHLL $0x05, SI
  4258	ORL  SI, BX
  4259	MOVB BL, (AX)
  4260	ADDQ $0x02, AX
  4261	JMP  repeat_end_emit_encodeBlockAsm10B
  4262
  4263long_offset_short_repeat_as_copy_encodeBlockAsm10B:
  4264	MOVB $0xee, (AX)
  4265	MOVW SI, 1(AX)
  4266	LEAL -60(BX), BX
  4267	ADDQ $0x03, AX
  4268
  4269	// emitRepeat
  4270	MOVL BX, DI
  4271	LEAL -4(BX), BX
  4272	CMPL DI, $0x08
  4273	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  4274	CMPL DI, $0x0c
  4275	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  4276	CMPL SI, $0x00000800
  4277	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  4278
  4279cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  4280	CMPL BX, $0x00000104
  4281	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  4282	LEAL -256(BX), BX
  4283	MOVW $0x0019, (AX)
  4284	MOVW BX, 2(AX)
  4285	ADDQ $0x04, AX
  4286	JMP  repeat_end_emit_encodeBlockAsm10B
  4287
  4288repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  4289	LEAL -4(BX), BX
  4290	MOVW $0x0015, (AX)
  4291	MOVB BL, 2(AX)
  4292	ADDQ $0x03, AX
  4293	JMP  repeat_end_emit_encodeBlockAsm10B
  4294
  4295repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  4296	SHLL $0x02, BX
  4297	ORL  $0x01, BX
  4298	MOVW BX, (AX)
  4299	ADDQ $0x02, AX
  4300	JMP  repeat_end_emit_encodeBlockAsm10B
  4301
  4302repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  4303	XORQ DI, DI
  4304	LEAL 1(DI)(BX*4), BX
  4305	MOVB SI, 1(AX)
  4306	SARL $0x08, SI
  4307	SHLL $0x05, SI
  4308	ORL  SI, BX
  4309	MOVB BL, (AX)
  4310	ADDQ $0x02, AX
  4311	JMP  repeat_end_emit_encodeBlockAsm10B
  4312
  4313two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
  4314	MOVL BX, DI
  4315	SHLL $0x02, DI
  4316	CMPL BX, $0x0c
  4317	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
  4318	CMPL SI, $0x00000800
  4319	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
  4320	LEAL -15(DI), DI
  4321	MOVB SI, 1(AX)
  4322	SHRL $0x08, SI
  4323	SHLL $0x05, SI
  4324	ORL  SI, DI
  4325	MOVB DI, (AX)
  4326	ADDQ $0x02, AX
  4327	JMP  repeat_end_emit_encodeBlockAsm10B
  4328
  4329emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
  4330	LEAL -2(DI), DI
  4331	MOVB DI, (AX)
  4332	MOVW SI, 1(AX)
  4333	ADDQ $0x03, AX
  4334
  4335repeat_end_emit_encodeBlockAsm10B:
  4336	MOVL CX, 12(SP)
  4337	JMP  search_loop_encodeBlockAsm10B
  4338
  4339no_repeat_found_encodeBlockAsm10B:
  4340	CMPL (DX)(BX*1), SI
  4341	JEQ  candidate_match_encodeBlockAsm10B
  4342	SHRQ $0x08, SI
  4343	MOVL 24(SP)(R9*4), BX
  4344	LEAL 2(CX), R8
  4345	CMPL (DX)(DI*1), SI
  4346	JEQ  candidate2_match_encodeBlockAsm10B
  4347	MOVL R8, 24(SP)(R9*4)
  4348	SHRQ $0x08, SI
  4349	CMPL (DX)(BX*1), SI
  4350	JEQ  candidate3_match_encodeBlockAsm10B
  4351	MOVL 20(SP), CX
  4352	JMP  search_loop_encodeBlockAsm10B
  4353
  4354candidate3_match_encodeBlockAsm10B:
  4355	ADDL $0x02, CX
  4356	JMP  candidate_match_encodeBlockAsm10B
  4357
  4358candidate2_match_encodeBlockAsm10B:
  4359	MOVL R8, 24(SP)(R9*4)
  4360	INCL CX
  4361	MOVL DI, BX
  4362
  4363candidate_match_encodeBlockAsm10B:
  4364	MOVL  12(SP), SI
  4365	TESTL BX, BX
  4366	JZ    match_extend_back_end_encodeBlockAsm10B
  4367
  4368match_extend_back_loop_encodeBlockAsm10B:
  4369	CMPL CX, SI
  4370	JBE  match_extend_back_end_encodeBlockAsm10B
  4371	MOVB -1(DX)(BX*1), DI
  4372	MOVB -1(DX)(CX*1), R8
  4373	CMPB DI, R8
  4374	JNE  match_extend_back_end_encodeBlockAsm10B
  4375	LEAL -1(CX), CX
  4376	DECL BX
  4377	JZ   match_extend_back_end_encodeBlockAsm10B
  4378	JMP  match_extend_back_loop_encodeBlockAsm10B
  4379
  4380match_extend_back_end_encodeBlockAsm10B:
  4381	MOVL CX, SI
  4382	SUBL 12(SP), SI
  4383	LEAQ 3(AX)(SI*1), SI
  4384	CMPQ SI, (SP)
  4385	JB   match_dst_size_check_encodeBlockAsm10B
  4386	MOVQ $0x00000000, ret+48(FP)
  4387	RET
  4388
  4389match_dst_size_check_encodeBlockAsm10B:
  4390	MOVL CX, SI
  4391	MOVL 12(SP), DI
  4392	CMPL DI, SI
  4393	JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
  4394	MOVL SI, R8
  4395	MOVL SI, 12(SP)
  4396	LEAQ (DX)(DI*1), SI
  4397	SUBL DI, R8
  4398	LEAL -1(R8), DI
  4399	CMPL DI, $0x3c
  4400	JB   one_byte_match_emit_encodeBlockAsm10B
  4401	CMPL DI, $0x00000100
  4402	JB   two_bytes_match_emit_encodeBlockAsm10B
  4403	JB   three_bytes_match_emit_encodeBlockAsm10B
  4404
  4405three_bytes_match_emit_encodeBlockAsm10B:
  4406	MOVB $0xf4, (AX)
  4407	MOVW DI, 1(AX)
  4408	ADDQ $0x03, AX
  4409	JMP  memmove_long_match_emit_encodeBlockAsm10B
  4410
  4411two_bytes_match_emit_encodeBlockAsm10B:
  4412	MOVB $0xf0, (AX)
  4413	MOVB DI, 1(AX)
  4414	ADDQ $0x02, AX
  4415	CMPL DI, $0x40
  4416	JB   memmove_match_emit_encodeBlockAsm10B
  4417	JMP  memmove_long_match_emit_encodeBlockAsm10B
  4418
  4419one_byte_match_emit_encodeBlockAsm10B:
  4420	SHLB $0x02, DI
  4421	MOVB DI, (AX)
  4422	ADDQ $0x01, AX
  4423
  4424memmove_match_emit_encodeBlockAsm10B:
  4425	LEAQ (AX)(R8*1), DI
  4426
  4427	// genMemMoveShort
  4428	CMPQ R8, $0x08
  4429	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
  4430	CMPQ R8, $0x10
  4431	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
  4432	CMPQ R8, $0x20
  4433	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
  4434	JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
  4435
  4436emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
  4437	MOVQ (SI), R9
  4438	MOVQ R9, (AX)
  4439	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
  4440
  4441emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
  4442	MOVQ (SI), R9
  4443	MOVQ -8(SI)(R8*1), SI
  4444	MOVQ R9, (AX)
  4445	MOVQ SI, -8(AX)(R8*1)
  4446	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
  4447
  4448emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
  4449	MOVOU (SI), X0
  4450	MOVOU -16(SI)(R8*1), X1
  4451	MOVOU X0, (AX)
  4452	MOVOU X1, -16(AX)(R8*1)
  4453	JMP   memmove_end_copy_match_emit_encodeBlockAsm10B
  4454
  4455emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
  4456	MOVOU (SI), X0
  4457	MOVOU 16(SI), X1
  4458	MOVOU -32(SI)(R8*1), X2
  4459	MOVOU -16(SI)(R8*1), X3
  4460	MOVOU X0, (AX)
  4461	MOVOU X1, 16(AX)
  4462	MOVOU X2, -32(AX)(R8*1)
  4463	MOVOU X3, -16(AX)(R8*1)
  4464
  4465memmove_end_copy_match_emit_encodeBlockAsm10B:
  4466	MOVQ DI, AX
  4467	JMP  emit_literal_done_match_emit_encodeBlockAsm10B
  4468
  4469memmove_long_match_emit_encodeBlockAsm10B:
  4470	LEAQ (AX)(R8*1), DI
  4471
  4472	// genMemMoveLong
  4473	MOVOU (SI), X0
  4474	MOVOU 16(SI), X1
  4475	MOVOU -32(SI)(R8*1), X2
  4476	MOVOU -16(SI)(R8*1), X3
  4477	MOVQ  R8, R10
  4478	SHRQ  $0x05, R10
  4479	MOVQ  AX, R9
  4480	ANDL  $0x0000001f, R9
  4481	MOVQ  $0x00000040, R11
  4482	SUBQ  R9, R11
  4483	DECQ  R10
  4484	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  4485	LEAQ  -32(SI)(R11*1), R9
  4486	LEAQ  -32(AX)(R11*1), R12
  4487
  4488emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
  4489	MOVOU (R9), X4
  4490	MOVOU 16(R9), X5
  4491	MOVOA X4, (R12)
  4492	MOVOA X5, 16(R12)
  4493	ADDQ  $0x20, R12
  4494	ADDQ  $0x20, R9
  4495	ADDQ  $0x20, R11
  4496	DECQ  R10
  4497	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
  4498
  4499emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
  4500	MOVOU -32(SI)(R11*1), X4
  4501	MOVOU -16(SI)(R11*1), X5
  4502	MOVOA X4, -32(AX)(R11*1)
  4503	MOVOA X5, -16(AX)(R11*1)
  4504	ADDQ  $0x20, R11
  4505	CMPQ  R8, R11
  4506	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  4507	MOVOU X0, (AX)
  4508	MOVOU X1, 16(AX)
  4509	MOVOU X2, -32(AX)(R8*1)
  4510	MOVOU X3, -16(AX)(R8*1)
  4511	MOVQ  DI, AX
  4512
  4513emit_literal_done_match_emit_encodeBlockAsm10B:
  4514match_nolit_loop_encodeBlockAsm10B:
  4515	MOVL CX, SI
  4516	SUBL BX, SI
  4517	MOVL SI, 16(SP)
  4518	ADDL $0x04, CX
  4519	ADDL $0x04, BX
  4520	MOVQ src_len+32(FP), SI
  4521	SUBL CX, SI
  4522	LEAQ (DX)(CX*1), DI
  4523	LEAQ (DX)(BX*1), BX
  4524
  4525	// matchLen
  4526	XORL R9, R9
  4527
  4528matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
  4529	CMPL SI, $0x10
  4530	JB   matchlen_match8_match_nolit_encodeBlockAsm10B
  4531	MOVQ (DI)(R9*1), R8
  4532	MOVQ 8(DI)(R9*1), R10
  4533	XORQ (BX)(R9*1), R8
  4534	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
  4535	XORQ 8(BX)(R9*1), R10
  4536	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm10B
  4537	LEAL -16(SI), SI
  4538	LEAL 16(R9), R9
  4539	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm10B
  4540
  4541matchlen_bsf_16match_nolit_encodeBlockAsm10B:
  4542#ifdef GOAMD64_v3
  4543	TZCNTQ R10, R10
  4544
  4545#else
  4546	BSFQ R10, R10
  4547
  4548#endif
  4549	SARQ $0x03, R10
  4550	LEAL 8(R9)(R10*1), R9
  4551	JMP  match_nolit_end_encodeBlockAsm10B
  4552
  4553matchlen_match8_match_nolit_encodeBlockAsm10B:
  4554	CMPL SI, $0x08
  4555	JB   matchlen_match4_match_nolit_encodeBlockAsm10B
  4556	MOVQ (DI)(R9*1), R8
  4557	XORQ (BX)(R9*1), R8
  4558	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
  4559	LEAL -8(SI), SI
  4560	LEAL 8(R9), R9
  4561	JMP  matchlen_match4_match_nolit_encodeBlockAsm10B
  4562
  4563matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
  4564#ifdef GOAMD64_v3
  4565	TZCNTQ R8, R8
  4566
  4567#else
  4568	BSFQ R8, R8
  4569
  4570#endif
  4571	SARQ $0x03, R8
  4572	LEAL (R9)(R8*1), R9
  4573	JMP  match_nolit_end_encodeBlockAsm10B
  4574
  4575matchlen_match4_match_nolit_encodeBlockAsm10B:
  4576	CMPL SI, $0x04
  4577	JB   matchlen_match2_match_nolit_encodeBlockAsm10B
  4578	MOVL (DI)(R9*1), R8
  4579	CMPL (BX)(R9*1), R8
  4580	JNE  matchlen_match2_match_nolit_encodeBlockAsm10B
  4581	LEAL -4(SI), SI
  4582	LEAL 4(R9), R9
  4583
  4584matchlen_match2_match_nolit_encodeBlockAsm10B:
  4585	CMPL SI, $0x01
  4586	JE   matchlen_match1_match_nolit_encodeBlockAsm10B
  4587	JB   match_nolit_end_encodeBlockAsm10B
  4588	MOVW (DI)(R9*1), R8
  4589	CMPW (BX)(R9*1), R8
  4590	JNE  matchlen_match1_match_nolit_encodeBlockAsm10B
  4591	LEAL 2(R9), R9
  4592	SUBL $0x02, SI
  4593	JZ   match_nolit_end_encodeBlockAsm10B
  4594
  4595matchlen_match1_match_nolit_encodeBlockAsm10B:
  4596	MOVB (DI)(R9*1), R8
  4597	CMPB (BX)(R9*1), R8
  4598	JNE  match_nolit_end_encodeBlockAsm10B
  4599	LEAL 1(R9), R9
  4600
  4601match_nolit_end_encodeBlockAsm10B:
  4602	ADDL R9, CX
  4603	MOVL 16(SP), BX
  4604	ADDL $0x04, R9
  4605	MOVL CX, 12(SP)
  4606
  4607	// emitCopy
  4608	CMPL R9, $0x40
  4609	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
  4610	CMPL BX, $0x00000800
  4611	JAE  long_offset_short_match_nolit_encodeBlockAsm10B
  4612	MOVL $0x00000001, SI
  4613	LEAL 16(SI), SI
  4614	MOVB BL, 1(AX)
  4615	SHRL $0x08, BX
  4616	SHLL $0x05, BX
  4617	ORL  BX, SI
  4618	MOVB SI, (AX)
  4619	ADDQ $0x02, AX
  4620	SUBL $0x08, R9
  4621
  4622	// emitRepeat
  4623	LEAL -4(R9), R9
  4624	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4625	MOVL R9, SI
  4626	LEAL -4(R9), R9
  4627	CMPL SI, $0x08
  4628	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4629	CMPL SI, $0x0c
  4630	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4631	CMPL BX, $0x00000800
  4632	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4633
  4634cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4635	CMPL R9, $0x00000104
  4636	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4637	LEAL -256(R9), R9
  4638	MOVW $0x0019, (AX)
  4639	MOVW R9, 2(AX)
  4640	ADDQ $0x04, AX
  4641	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4642
  4643repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4644	LEAL -4(R9), R9
  4645	MOVW $0x0015, (AX)
  4646	MOVB R9, 2(AX)
  4647	ADDQ $0x03, AX
  4648	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4649
  4650repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4651	SHLL $0x02, R9
  4652	ORL  $0x01, R9
  4653	MOVW R9, (AX)
  4654	ADDQ $0x02, AX
  4655	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4656
  4657repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4658	XORQ SI, SI
  4659	LEAL 1(SI)(R9*4), R9
  4660	MOVB BL, 1(AX)
  4661	SARL $0x08, BX
  4662	SHLL $0x05, BX
  4663	ORL  BX, R9
  4664	MOVB R9, (AX)
  4665	ADDQ $0x02, AX
  4666	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4667
  4668long_offset_short_match_nolit_encodeBlockAsm10B:
  4669	MOVB $0xee, (AX)
  4670	MOVW BX, 1(AX)
  4671	LEAL -60(R9), R9
  4672	ADDQ $0x03, AX
  4673
  4674	// emitRepeat
  4675	MOVL R9, SI
  4676	LEAL -4(R9), R9
  4677	CMPL SI, $0x08
  4678	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
  4679	CMPL SI, $0x0c
  4680	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
  4681	CMPL BX, $0x00000800
  4682	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
  4683
  4684cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4685	CMPL R9, $0x00000104
  4686	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
  4687	LEAL -256(R9), R9
  4688	MOVW $0x0019, (AX)
  4689	MOVW R9, 2(AX)
  4690	ADDQ $0x04, AX
  4691	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4692
  4693repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4694	LEAL -4(R9), R9
  4695	MOVW $0x0015, (AX)
  4696	MOVB R9, 2(AX)
  4697	ADDQ $0x03, AX
  4698	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4699
  4700repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4701	SHLL $0x02, R9
  4702	ORL  $0x01, R9
  4703	MOVW R9, (AX)
  4704	ADDQ $0x02, AX
  4705	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4706
  4707repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4708	XORQ SI, SI
  4709	LEAL 1(SI)(R9*4), R9
  4710	MOVB BL, 1(AX)
  4711	SARL $0x08, BX
  4712	SHLL $0x05, BX
  4713	ORL  BX, R9
  4714	MOVB R9, (AX)
  4715	ADDQ $0x02, AX
  4716	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4717
  4718two_byte_offset_short_match_nolit_encodeBlockAsm10B:
  4719	MOVL R9, SI
  4720	SHLL $0x02, SI
  4721	CMPL R9, $0x0c
  4722	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
  4723	CMPL BX, $0x00000800
  4724	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
  4725	LEAL -15(SI), SI
  4726	MOVB BL, 1(AX)
  4727	SHRL $0x08, BX
  4728	SHLL $0x05, BX
  4729	ORL  BX, SI
  4730	MOVB SI, (AX)
  4731	ADDQ $0x02, AX
  4732	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
  4733
  4734emit_copy_three_match_nolit_encodeBlockAsm10B:
  4735	LEAL -2(SI), SI
  4736	MOVB SI, (AX)
  4737	MOVW BX, 1(AX)
  4738	ADDQ $0x03, AX
  4739
  4740match_nolit_emitcopy_end_encodeBlockAsm10B:
  4741	CMPL CX, 8(SP)
  4742	JAE  emit_remainder_encodeBlockAsm10B
  4743	MOVQ -2(DX)(CX*1), SI
  4744	CMPQ AX, (SP)
  4745	JB   match_nolit_dst_ok_encodeBlockAsm10B
  4746	MOVQ $0x00000000, ret+48(FP)
  4747	RET
  4748
  4749match_nolit_dst_ok_encodeBlockAsm10B:
  4750	MOVQ  $0x9e3779b1, R8
  4751	MOVQ  SI, DI
  4752	SHRQ  $0x10, SI
  4753	MOVQ  SI, BX
  4754	SHLQ  $0x20, DI
  4755	IMULQ R8, DI
  4756	SHRQ  $0x36, DI
  4757	SHLQ  $0x20, BX
  4758	IMULQ R8, BX
  4759	SHRQ  $0x36, BX
  4760	LEAL  -2(CX), R8
  4761	LEAQ  24(SP)(BX*4), R9
  4762	MOVL  (R9), BX
  4763	MOVL  R8, 24(SP)(DI*4)
  4764	MOVL  CX, (R9)
  4765	CMPL  (DX)(BX*1), SI
  4766	JEQ   match_nolit_loop_encodeBlockAsm10B
  4767	INCL  CX
  4768	JMP   search_loop_encodeBlockAsm10B
  4769
  4770emit_remainder_encodeBlockAsm10B:
  4771	MOVQ src_len+32(FP), CX
  4772	SUBL 12(SP), CX
  4773	LEAQ 3(AX)(CX*1), CX
  4774	CMPQ CX, (SP)
  4775	JB   emit_remainder_ok_encodeBlockAsm10B
  4776	MOVQ $0x00000000, ret+48(FP)
  4777	RET
  4778
  4779emit_remainder_ok_encodeBlockAsm10B:
  4780	MOVQ src_len+32(FP), CX
  4781	MOVL 12(SP), BX
  4782	CMPL BX, CX
  4783	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
  4784	MOVL CX, SI
  4785	MOVL CX, 12(SP)
  4786	LEAQ (DX)(BX*1), CX
  4787	SUBL BX, SI
  4788	LEAL -1(SI), DX
  4789	CMPL DX, $0x3c
  4790	JB   one_byte_emit_remainder_encodeBlockAsm10B
  4791	CMPL DX, $0x00000100
  4792	JB   two_bytes_emit_remainder_encodeBlockAsm10B
  4793	JB   three_bytes_emit_remainder_encodeBlockAsm10B
  4794
  4795three_bytes_emit_remainder_encodeBlockAsm10B:
  4796	MOVB $0xf4, (AX)
  4797	MOVW DX, 1(AX)
  4798	ADDQ $0x03, AX
  4799	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
  4800
  4801two_bytes_emit_remainder_encodeBlockAsm10B:
  4802	MOVB $0xf0, (AX)
  4803	MOVB DL, 1(AX)
  4804	ADDQ $0x02, AX
  4805	CMPL DX, $0x40
  4806	JB   memmove_emit_remainder_encodeBlockAsm10B
  4807	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
  4808
  4809one_byte_emit_remainder_encodeBlockAsm10B:
  4810	SHLB $0x02, DL
  4811	MOVB DL, (AX)
  4812	ADDQ $0x01, AX
  4813
  4814memmove_emit_remainder_encodeBlockAsm10B:
  4815	LEAQ (AX)(SI*1), DX
  4816	MOVL SI, BX
  4817
  4818	// genMemMoveShort
  4819	CMPQ BX, $0x03
  4820	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
  4821	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
  4822	CMPQ BX, $0x08
  4823	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
  4824	CMPQ BX, $0x10
  4825	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
  4826	CMPQ BX, $0x20
  4827	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
  4828	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
  4829
  4830emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
  4831	MOVB (CX), SI
  4832	MOVB -1(CX)(BX*1), CL
  4833	MOVB SI, (AX)
  4834	MOVB CL, -1(AX)(BX*1)
  4835	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4836
  4837emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
  4838	MOVW (CX), SI
  4839	MOVB 2(CX), CL
  4840	MOVW SI, (AX)
  4841	MOVB CL, 2(AX)
  4842	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4843
  4844emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
  4845	MOVL (CX), SI
  4846	MOVL -4(CX)(BX*1), CX
  4847	MOVL SI, (AX)
  4848	MOVL CX, -4(AX)(BX*1)
  4849	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4850
  4851emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
  4852	MOVQ (CX), SI
  4853	MOVQ -8(CX)(BX*1), CX
  4854	MOVQ SI, (AX)
  4855	MOVQ CX, -8(AX)(BX*1)
  4856	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4857
  4858emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
  4859	MOVOU (CX), X0
  4860	MOVOU -16(CX)(BX*1), X1
  4861	MOVOU X0, (AX)
  4862	MOVOU X1, -16(AX)(BX*1)
  4863	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4864
  4865emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
  4866	MOVOU (CX), X0
  4867	MOVOU 16(CX), X1
  4868	MOVOU -32(CX)(BX*1), X2
  4869	MOVOU -16(CX)(BX*1), X3
  4870	MOVOU X0, (AX)
  4871	MOVOU X1, 16(AX)
  4872	MOVOU X2, -32(AX)(BX*1)
  4873	MOVOU X3, -16(AX)(BX*1)
  4874
  4875memmove_end_copy_emit_remainder_encodeBlockAsm10B:
  4876	MOVQ DX, AX
  4877	JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B
  4878
  4879memmove_long_emit_remainder_encodeBlockAsm10B:
  4880	LEAQ (AX)(SI*1), DX
  4881	MOVL SI, BX
  4882
  4883	// genMemMoveLong
  4884	MOVOU (CX), X0
  4885	MOVOU 16(CX), X1
  4886	MOVOU -32(CX)(BX*1), X2
  4887	MOVOU -16(CX)(BX*1), X3
  4888	MOVQ  BX, DI
  4889	SHRQ  $0x05, DI
  4890	MOVQ  AX, SI
  4891	ANDL  $0x0000001f, SI
  4892	MOVQ  $0x00000040, R8
  4893	SUBQ  SI, R8
  4894	DECQ  DI
  4895	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
  4896	LEAQ  -32(CX)(R8*1), SI
  4897	LEAQ  -32(AX)(R8*1), R9
  4898
  4899emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
  4900	MOVOU (SI), X4
  4901	MOVOU 16(SI), X5
  4902	MOVOA X4, (R9)
  4903	MOVOA X5, 16(R9)
  4904	ADDQ  $0x20, R9
  4905	ADDQ  $0x20, SI
  4906	ADDQ  $0x20, R8
  4907	DECQ  DI
  4908	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
  4909
  4910emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
  4911	MOVOU -32(CX)(R8*1), X4
  4912	MOVOU -16(CX)(R8*1), X5
  4913	MOVOA X4, -32(AX)(R8*1)
  4914	MOVOA X5, -16(AX)(R8*1)
  4915	ADDQ  $0x20, R8
  4916	CMPQ  BX, R8
  4917	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
  4918	MOVOU X0, (AX)
  4919	MOVOU X1, 16(AX)
  4920	MOVOU X2, -32(AX)(BX*1)
  4921	MOVOU X3, -16(AX)(BX*1)
  4922	MOVQ  DX, AX
  4923
  4924emit_literal_done_emit_remainder_encodeBlockAsm10B:
  4925	MOVQ dst_base+0(FP), CX
  4926	SUBQ CX, AX
  4927	MOVQ AX, ret+48(FP)
  4928	RET
  4929
  4930// func encodeBlockAsm8B(dst []byte, src []byte) int
  4931// Requires: BMI, SSE2
  4932TEXT ·encodeBlockAsm8B(SB), $1048-56
  4933	MOVQ dst_base+0(FP), AX
  4934	MOVQ $0x00000008, CX
  4935	LEAQ 24(SP), DX
  4936	PXOR X0, X0
  4937
  4938zero_loop_encodeBlockAsm8B:
  4939	MOVOU X0, (DX)
  4940	MOVOU X0, 16(DX)
  4941	MOVOU X0, 32(DX)
  4942	MOVOU X0, 48(DX)
  4943	MOVOU X0, 64(DX)
  4944	MOVOU X0, 80(DX)
  4945	MOVOU X0, 96(DX)
  4946	MOVOU X0, 112(DX)
  4947	ADDQ  $0x80, DX
  4948	DECQ  CX
  4949	JNZ   zero_loop_encodeBlockAsm8B
  4950	MOVL  $0x00000000, 12(SP)
  4951	MOVQ  src_len+32(FP), CX
  4952	LEAQ  -9(CX), DX
  4953	LEAQ  -8(CX), BX
  4954	MOVL  BX, 8(SP)
  4955	SHRQ  $0x05, CX
  4956	SUBL  CX, DX
  4957	LEAQ  (AX)(DX*1), DX
  4958	MOVQ  DX, (SP)
  4959	MOVL  $0x00000001, CX
  4960	MOVL  CX, 16(SP)
  4961	MOVQ  src_base+24(FP), DX
  4962
  4963search_loop_encodeBlockAsm8B:
  4964	MOVL  CX, BX
  4965	SUBL  12(SP), BX
  4966	SHRL  $0x04, BX
  4967	LEAL  4(CX)(BX*1), BX
  4968	CMPL  BX, 8(SP)
  4969	JAE   emit_remainder_encodeBlockAsm8B
  4970	MOVQ  (DX)(CX*1), SI
  4971	MOVL  BX, 20(SP)
  4972	MOVQ  $0x9e3779b1, R8
  4973	MOVQ  SI, R9
  4974	MOVQ  SI, R10
  4975	SHRQ  $0x08, R10
  4976	SHLQ  $0x20, R9
  4977	IMULQ R8, R9
  4978	SHRQ  $0x38, R9
  4979	SHLQ  $0x20, R10
  4980	IMULQ R8, R10
  4981	SHRQ  $0x38, R10
  4982	MOVL  24(SP)(R9*4), BX
  4983	MOVL  24(SP)(R10*4), DI
  4984	MOVL  CX, 24(SP)(R9*4)
  4985	LEAL  1(CX), R9
  4986	MOVL  R9, 24(SP)(R10*4)
  4987	MOVQ  SI, R9
  4988	SHRQ  $0x10, R9
  4989	SHLQ  $0x20, R9
  4990	IMULQ R8, R9
  4991	SHRQ  $0x38, R9
  4992	MOVL  CX, R8
  4993	SUBL  16(SP), R8
  4994	MOVL  1(DX)(R8*1), R10
  4995	MOVQ  SI, R8
  4996	SHRQ  $0x08, R8
  4997	CMPL  R8, R10
  4998	JNE   no_repeat_found_encodeBlockAsm8B
  4999	LEAL  1(CX), SI
  5000	MOVL  12(SP), DI
  5001	MOVL  SI, BX
  5002	SUBL  16(SP), BX
  5003	JZ    repeat_extend_back_end_encodeBlockAsm8B
  5004
  5005repeat_extend_back_loop_encodeBlockAsm8B:
  5006	CMPL SI, DI
  5007	JBE  repeat_extend_back_end_encodeBlockAsm8B
  5008	MOVB -1(DX)(BX*1), R8
  5009	MOVB -1(DX)(SI*1), R9
  5010	CMPB R8, R9
  5011	JNE  repeat_extend_back_end_encodeBlockAsm8B
  5012	LEAL -1(SI), SI
  5013	DECL BX
  5014	JNZ  repeat_extend_back_loop_encodeBlockAsm8B
  5015
  5016repeat_extend_back_end_encodeBlockAsm8B:
  5017	MOVL SI, BX
  5018	SUBL 12(SP), BX
  5019	LEAQ 3(AX)(BX*1), BX
  5020	CMPQ BX, (SP)
  5021	JB   repeat_dst_size_check_encodeBlockAsm8B
  5022	MOVQ $0x00000000, ret+48(FP)
  5023	RET
  5024
  5025repeat_dst_size_check_encodeBlockAsm8B:
  5026	MOVL 12(SP), BX
  5027	CMPL BX, SI
  5028	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
  5029	MOVL SI, R8
  5030	MOVL SI, 12(SP)
  5031	LEAQ (DX)(BX*1), R9
  5032	SUBL BX, R8
  5033	LEAL -1(R8), BX
  5034	CMPL BX, $0x3c
  5035	JB   one_byte_repeat_emit_encodeBlockAsm8B
  5036	CMPL BX, $0x00000100
  5037	JB   two_bytes_repeat_emit_encodeBlockAsm8B
  5038	JB   three_bytes_repeat_emit_encodeBlockAsm8B
  5039
  5040three_bytes_repeat_emit_encodeBlockAsm8B:
  5041	MOVB $0xf4, (AX)
  5042	MOVW BX, 1(AX)
  5043	ADDQ $0x03, AX
  5044	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
  5045
  5046two_bytes_repeat_emit_encodeBlockAsm8B:
  5047	MOVB $0xf0, (AX)
  5048	MOVB BL, 1(AX)
  5049	ADDQ $0x02, AX
  5050	CMPL BX, $0x40
  5051	JB   memmove_repeat_emit_encodeBlockAsm8B
  5052	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
  5053
  5054one_byte_repeat_emit_encodeBlockAsm8B:
  5055	SHLB $0x02, BL
  5056	MOVB BL, (AX)
  5057	ADDQ $0x01, AX
  5058
  5059memmove_repeat_emit_encodeBlockAsm8B:
  5060	LEAQ (AX)(R8*1), BX
  5061
  5062	// genMemMoveShort
  5063	CMPQ R8, $0x08
  5064	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
  5065	CMPQ R8, $0x10
  5066	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
  5067	CMPQ R8, $0x20
  5068	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
  5069	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
  5070
  5071emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
  5072	MOVQ (R9), R10
  5073	MOVQ R10, (AX)
  5074	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
  5075
  5076emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
  5077	MOVQ (R9), R10
  5078	MOVQ -8(R9)(R8*1), R9
  5079	MOVQ R10, (AX)
  5080	MOVQ R9, -8(AX)(R8*1)
  5081	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
  5082
  5083emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
  5084	MOVOU (R9), X0
  5085	MOVOU -16(R9)(R8*1), X1
  5086	MOVOU X0, (AX)
  5087	MOVOU X1, -16(AX)(R8*1)
  5088	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B
  5089
  5090emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
  5091	MOVOU (R9), X0
  5092	MOVOU 16(R9), X1
  5093	MOVOU -32(R9)(R8*1), X2
  5094	MOVOU -16(R9)(R8*1), X3
  5095	MOVOU X0, (AX)
  5096	MOVOU X1, 16(AX)
  5097	MOVOU X2, -32(AX)(R8*1)
  5098	MOVOU X3, -16(AX)(R8*1)
  5099
  5100memmove_end_copy_repeat_emit_encodeBlockAsm8B:
  5101	MOVQ BX, AX
  5102	JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B
  5103
  5104memmove_long_repeat_emit_encodeBlockAsm8B:
  5105	LEAQ (AX)(R8*1), BX
  5106
  5107	// genMemMoveLong
  5108	MOVOU (R9), X0
  5109	MOVOU 16(R9), X1
  5110	MOVOU -32(R9)(R8*1), X2
  5111	MOVOU -16(R9)(R8*1), X3
  5112	MOVQ  R8, R11
  5113	SHRQ  $0x05, R11
  5114	MOVQ  AX, R10
  5115	ANDL  $0x0000001f, R10
  5116	MOVQ  $0x00000040, R12
  5117	SUBQ  R10, R12
  5118	DECQ  R11
  5119	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  5120	LEAQ  -32(R9)(R12*1), R10
  5121	LEAQ  -32(AX)(R12*1), R13
  5122
  5123emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
  5124	MOVOU (R10), X4
  5125	MOVOU 16(R10), X5
  5126	MOVOA X4, (R13)
  5127	MOVOA X5, 16(R13)
  5128	ADDQ  $0x20, R13
  5129	ADDQ  $0x20, R10
  5130	ADDQ  $0x20, R12
  5131	DECQ  R11
  5132	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
  5133
  5134emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
  5135	MOVOU -32(R9)(R12*1), X4
  5136	MOVOU -16(R9)(R12*1), X5
  5137	MOVOA X4, -32(AX)(R12*1)
  5138	MOVOA X5, -16(AX)(R12*1)
  5139	ADDQ  $0x20, R12
  5140	CMPQ  R8, R12
  5141	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  5142	MOVOU X0, (AX)
  5143	MOVOU X1, 16(AX)
  5144	MOVOU X2, -32(AX)(R8*1)
  5145	MOVOU X3, -16(AX)(R8*1)
  5146	MOVQ  BX, AX
  5147
  5148emit_literal_done_repeat_emit_encodeBlockAsm8B:
  5149	ADDL $0x05, CX
  5150	MOVL CX, BX
  5151	SUBL 16(SP), BX
  5152	MOVQ src_len+32(FP), R8
  5153	SUBL CX, R8
  5154	LEAQ (DX)(CX*1), R9
  5155	LEAQ (DX)(BX*1), BX
  5156
  5157	// matchLen
  5158	XORL R11, R11
  5159
  5160matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
  5161	CMPL R8, $0x10
  5162	JB   matchlen_match8_repeat_extend_encodeBlockAsm8B
  5163	MOVQ (R9)(R11*1), R10
  5164	MOVQ 8(R9)(R11*1), R12
  5165	XORQ (BX)(R11*1), R10
  5166	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
  5167	XORQ 8(BX)(R11*1), R12
  5168	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm8B
  5169	LEAL -16(R8), R8
  5170	LEAL 16(R11), R11
  5171	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
  5172
  5173matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
  5174#ifdef GOAMD64_v3
  5175	TZCNTQ R12, R12
  5176
  5177#else
  5178	BSFQ R12, R12
  5179
  5180#endif
  5181	SARQ $0x03, R12
  5182	LEAL 8(R11)(R12*1), R11
  5183	JMP  repeat_extend_forward_end_encodeBlockAsm8B
  5184
  5185matchlen_match8_repeat_extend_encodeBlockAsm8B:
  5186	CMPL R8, $0x08
  5187	JB   matchlen_match4_repeat_extend_encodeBlockAsm8B
  5188	MOVQ (R9)(R11*1), R10
  5189	XORQ (BX)(R11*1), R10
  5190	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
  5191	LEAL -8(R8), R8
  5192	LEAL 8(R11), R11
  5193	JMP  matchlen_match4_repeat_extend_encodeBlockAsm8B
  5194
  5195matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
  5196#ifdef GOAMD64_v3
  5197	TZCNTQ R10, R10
  5198
  5199#else
  5200	BSFQ R10, R10
  5201
  5202#endif
  5203	SARQ $0x03, R10
  5204	LEAL (R11)(R10*1), R11
  5205	JMP  repeat_extend_forward_end_encodeBlockAsm8B
  5206
  5207matchlen_match4_repeat_extend_encodeBlockAsm8B:
  5208	CMPL R8, $0x04
  5209	JB   matchlen_match2_repeat_extend_encodeBlockAsm8B
  5210	MOVL (R9)(R11*1), R10
  5211	CMPL (BX)(R11*1), R10
  5212	JNE  matchlen_match2_repeat_extend_encodeBlockAsm8B
  5213	LEAL -4(R8), R8
  5214	LEAL 4(R11), R11
  5215
  5216matchlen_match2_repeat_extend_encodeBlockAsm8B:
  5217	CMPL R8, $0x01
  5218	JE   matchlen_match1_repeat_extend_encodeBlockAsm8B
  5219	JB   repeat_extend_forward_end_encodeBlockAsm8B
  5220	MOVW (R9)(R11*1), R10
  5221	CMPW (BX)(R11*1), R10
  5222	JNE  matchlen_match1_repeat_extend_encodeBlockAsm8B
  5223	LEAL 2(R11), R11
  5224	SUBL $0x02, R8
  5225	JZ   repeat_extend_forward_end_encodeBlockAsm8B
  5226
  5227matchlen_match1_repeat_extend_encodeBlockAsm8B:
  5228	MOVB (R9)(R11*1), R10
  5229	CMPB (BX)(R11*1), R10
  5230	JNE  repeat_extend_forward_end_encodeBlockAsm8B
  5231	LEAL 1(R11), R11
  5232
  5233repeat_extend_forward_end_encodeBlockAsm8B:
  5234	ADDL  R11, CX
  5235	MOVL  CX, BX
  5236	SUBL  SI, BX
  5237	MOVL  16(SP), SI
  5238	TESTL DI, DI
  5239	JZ    repeat_as_copy_encodeBlockAsm8B
  5240
  5241	// emitRepeat
  5242	MOVL BX, SI
  5243	LEAL -4(BX), BX
  5244	CMPL SI, $0x08
  5245	JBE  repeat_two_match_repeat_encodeBlockAsm8B
  5246	CMPL SI, $0x0c
  5247	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
  5248
  5249cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
  5250	CMPL BX, $0x00000104
  5251	JB   repeat_three_match_repeat_encodeBlockAsm8B
  5252	LEAL -256(BX), BX
  5253	MOVW $0x0019, (AX)
  5254	MOVW BX, 2(AX)
  5255	ADDQ $0x04, AX
  5256	JMP  repeat_end_emit_encodeBlockAsm8B
  5257
  5258repeat_three_match_repeat_encodeBlockAsm8B:
  5259	LEAL -4(BX), BX
  5260	MOVW $0x0015, (AX)
  5261	MOVB BL, 2(AX)
  5262	ADDQ $0x03, AX
  5263	JMP  repeat_end_emit_encodeBlockAsm8B
  5264
  5265repeat_two_match_repeat_encodeBlockAsm8B:
  5266	SHLL $0x02, BX
  5267	ORL  $0x01, BX
  5268	MOVW BX, (AX)
  5269	ADDQ $0x02, AX
  5270	JMP  repeat_end_emit_encodeBlockAsm8B
  5271	XORQ DI, DI
  5272	LEAL 1(DI)(BX*4), BX
  5273	MOVB SI, 1(AX)
  5274	SARL $0x08, SI
  5275	SHLL $0x05, SI
  5276	ORL  SI, BX
  5277	MOVB BL, (AX)
  5278	ADDQ $0x02, AX
  5279	JMP  repeat_end_emit_encodeBlockAsm8B
  5280
  5281repeat_as_copy_encodeBlockAsm8B:
  5282	// emitCopy
  5283	CMPL BX, $0x40
  5284	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
  5285	CMPL SI, $0x00000800
  5286	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm8B
  5287	MOVL $0x00000001, DI
  5288	LEAL 16(DI), DI
  5289	MOVB SI, 1(AX)
  5290	SHRL $0x08, SI
  5291	SHLL $0x05, SI
  5292	ORL  SI, DI
  5293	MOVB DI, (AX)
  5294	ADDQ $0x02, AX
  5295	SUBL $0x08, BX
  5296
  5297	// emitRepeat
  5298	LEAL -4(BX), BX
  5299	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  5300	MOVL BX, SI
  5301	LEAL -4(BX), BX
  5302	CMPL SI, $0x08
  5303	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  5304	CMPL SI, $0x0c
  5305	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  5306
  5307cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
  5308	CMPL BX, $0x00000104
  5309	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  5310	LEAL -256(BX), BX
  5311	MOVW $0x0019, (AX)
  5312	MOVW BX, 2(AX)
  5313	ADDQ $0x04, AX
  5314	JMP  repeat_end_emit_encodeBlockAsm8B
  5315
  5316repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
  5317	LEAL -4(BX), BX
  5318	MOVW $0x0015, (AX)
  5319	MOVB BL, 2(AX)
  5320	ADDQ $0x03, AX
  5321	JMP  repeat_end_emit_encodeBlockAsm8B
  5322
  5323repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
  5324	SHLL $0x02, BX
  5325	ORL  $0x01, BX
  5326	MOVW BX, (AX)
  5327	ADDQ $0x02, AX
  5328	JMP  repeat_end_emit_encodeBlockAsm8B
  5329	XORQ DI, DI
  5330	LEAL 1(DI)(BX*4), BX
  5331	MOVB SI, 1(AX)
  5332	SARL $0x08, SI
  5333	SHLL $0x05, SI
  5334	ORL  SI, BX
  5335	MOVB BL, (AX)
  5336	ADDQ $0x02, AX
  5337	JMP  repeat_end_emit_encodeBlockAsm8B
  5338
  5339long_offset_short_repeat_as_copy_encodeBlockAsm8B:
  5340	MOVB $0xee, (AX)
  5341	MOVW SI, 1(AX)
  5342	LEAL -60(BX), BX
  5343	ADDQ $0x03, AX
  5344
  5345	// emitRepeat
  5346	MOVL BX, SI
  5347	LEAL -4(BX), BX
  5348	CMPL SI, $0x08
  5349	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  5350	CMPL SI, $0x0c
  5351	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  5352
  5353cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  5354	CMPL BX, $0x00000104
  5355	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  5356	LEAL -256(BX), BX
  5357	MOVW $0x0019, (AX)
  5358	MOVW BX, 2(AX)
  5359	ADDQ $0x04, AX
  5360	JMP  repeat_end_emit_encodeBlockAsm8B
  5361
  5362repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  5363	LEAL -4(BX), BX
  5364	MOVW $0x0015, (AX)
  5365	MOVB BL, 2(AX)
  5366	ADDQ $0x03, AX
  5367	JMP  repeat_end_emit_encodeBlockAsm8B
  5368
  5369repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  5370	SHLL $0x02, BX
  5371	ORL  $0x01, BX
  5372	MOVW BX, (AX)
  5373	ADDQ $0x02, AX
  5374	JMP  repeat_end_emit_encodeBlockAsm8B
  5375	XORQ DI, DI
  5376	LEAL 1(DI)(BX*4), BX
  5377	MOVB SI, 1(AX)
  5378	SARL $0x08, SI
  5379	SHLL $0x05, SI
  5380	ORL  SI, BX
  5381	MOVB BL, (AX)
  5382	ADDQ $0x02, AX
  5383	JMP  repeat_end_emit_encodeBlockAsm8B
  5384
  5385two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
  5386	MOVL BX, DI
  5387	SHLL $0x02, DI
  5388	CMPL BX, $0x0c
  5389	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
  5390	LEAL -15(DI), DI
  5391	MOVB SI, 1(AX)
  5392	SHRL $0x08, SI
  5393	SHLL $0x05, SI
  5394	ORL  SI, DI
  5395	MOVB DI, (AX)
  5396	ADDQ $0x02, AX
  5397	JMP  repeat_end_emit_encodeBlockAsm8B
  5398
  5399emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
  5400	LEAL -2(DI), DI
  5401	MOVB DI, (AX)
  5402	MOVW SI, 1(AX)
  5403	ADDQ $0x03, AX
  5404
  5405repeat_end_emit_encodeBlockAsm8B:
  5406	MOVL CX, 12(SP)
  5407	JMP  search_loop_encodeBlockAsm8B
  5408
  5409no_repeat_found_encodeBlockAsm8B:
  5410	CMPL (DX)(BX*1), SI
  5411	JEQ  candidate_match_encodeBlockAsm8B
  5412	SHRQ $0x08, SI
  5413	MOVL 24(SP)(R9*4), BX
  5414	LEAL 2(CX), R8
  5415	CMPL (DX)(DI*1), SI
  5416	JEQ  candidate2_match_encodeBlockAsm8B
  5417	MOVL R8, 24(SP)(R9*4)
  5418	SHRQ $0x08, SI
  5419	CMPL (DX)(BX*1), SI
  5420	JEQ  candidate3_match_encodeBlockAsm8B
  5421	MOVL 20(SP), CX
  5422	JMP  search_loop_encodeBlockAsm8B
  5423
  5424candidate3_match_encodeBlockAsm8B:
  5425	ADDL $0x02, CX
  5426	JMP  candidate_match_encodeBlockAsm8B
  5427
  5428candidate2_match_encodeBlockAsm8B:
  5429	MOVL R8, 24(SP)(R9*4)
  5430	INCL CX
  5431	MOVL DI, BX
  5432
  5433candidate_match_encodeBlockAsm8B:
  5434	MOVL  12(SP), SI
  5435	TESTL BX, BX
  5436	JZ    match_extend_back_end_encodeBlockAsm8B
  5437
  5438match_extend_back_loop_encodeBlockAsm8B:
  5439	CMPL CX, SI
  5440	JBE  match_extend_back_end_encodeBlockAsm8B
  5441	MOVB -1(DX)(BX*1), DI
  5442	MOVB -1(DX)(CX*1), R8
  5443	CMPB DI, R8
  5444	JNE  match_extend_back_end_encodeBlockAsm8B
  5445	LEAL -1(CX), CX
  5446	DECL BX
  5447	JZ   match_extend_back_end_encodeBlockAsm8B
  5448	JMP  match_extend_back_loop_encodeBlockAsm8B
  5449
  5450match_extend_back_end_encodeBlockAsm8B:
  5451	MOVL CX, SI
  5452	SUBL 12(SP), SI
  5453	LEAQ 3(AX)(SI*1), SI
  5454	CMPQ SI, (SP)
  5455	JB   match_dst_size_check_encodeBlockAsm8B
  5456	MOVQ $0x00000000, ret+48(FP)
  5457	RET
  5458
  5459match_dst_size_check_encodeBlockAsm8B:
  5460	MOVL CX, SI
  5461	MOVL 12(SP), DI
  5462	CMPL DI, SI
  5463	JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
  5464	MOVL SI, R8
  5465	MOVL SI, 12(SP)
  5466	LEAQ (DX)(DI*1), SI
  5467	SUBL DI, R8
  5468	LEAL -1(R8), DI
  5469	CMPL DI, $0x3c
  5470	JB   one_byte_match_emit_encodeBlockAsm8B
  5471	CMPL DI, $0x00000100
  5472	JB   two_bytes_match_emit_encodeBlockAsm8B
  5473	JB   three_bytes_match_emit_encodeBlockAsm8B
  5474
  5475three_bytes_match_emit_encodeBlockAsm8B:
  5476	MOVB $0xf4, (AX)
  5477	MOVW DI, 1(AX)
  5478	ADDQ $0x03, AX
  5479	JMP  memmove_long_match_emit_encodeBlockAsm8B
  5480
  5481two_bytes_match_emit_encodeBlockAsm8B:
  5482	MOVB $0xf0, (AX)
  5483	MOVB DI, 1(AX)
  5484	ADDQ $0x02, AX
  5485	CMPL DI, $0x40
  5486	JB   memmove_match_emit_encodeBlockAsm8B
  5487	JMP  memmove_long_match_emit_encodeBlockAsm8B
  5488
  5489one_byte_match_emit_encodeBlockAsm8B:
  5490	SHLB $0x02, DI
  5491	MOVB DI, (AX)
  5492	ADDQ $0x01, AX
  5493
  5494memmove_match_emit_encodeBlockAsm8B:
  5495	LEAQ (AX)(R8*1), DI
  5496
  5497	// genMemMoveShort
  5498	CMPQ R8, $0x08
  5499	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
  5500	CMPQ R8, $0x10
  5501	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
  5502	CMPQ R8, $0x20
  5503	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
  5504	JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
  5505
  5506emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
  5507	MOVQ (SI), R9
  5508	MOVQ R9, (AX)
  5509	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
  5510
  5511emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
  5512	MOVQ (SI), R9
  5513	MOVQ -8(SI)(R8*1), SI
  5514	MOVQ R9, (AX)
  5515	MOVQ SI, -8(AX)(R8*1)
  5516	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
  5517
  5518emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
  5519	MOVOU (SI), X0
  5520	MOVOU -16(SI)(R8*1), X1
  5521	MOVOU X0, (AX)
  5522	MOVOU X1, -16(AX)(R8*1)
  5523	JMP   memmove_end_copy_match_emit_encodeBlockAsm8B
  5524
  5525emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
  5526	MOVOU (SI), X0
  5527	MOVOU 16(SI), X1
  5528	MOVOU -32(SI)(R8*1), X2
  5529	MOVOU -16(SI)(R8*1), X3
  5530	MOVOU X0, (AX)
  5531	MOVOU X1, 16(AX)
  5532	MOVOU X2, -32(AX)(R8*1)
  5533	MOVOU X3, -16(AX)(R8*1)
  5534
  5535memmove_end_copy_match_emit_encodeBlockAsm8B:
  5536	MOVQ DI, AX
  5537	JMP  emit_literal_done_match_emit_encodeBlockAsm8B
  5538
  5539memmove_long_match_emit_encodeBlockAsm8B:
  5540	LEAQ (AX)(R8*1), DI
  5541
  5542	// genMemMoveLong
  5543	MOVOU (SI), X0
  5544	MOVOU 16(SI), X1
  5545	MOVOU -32(SI)(R8*1), X2
  5546	MOVOU -16(SI)(R8*1), X3
  5547	MOVQ  R8, R10
  5548	SHRQ  $0x05, R10
  5549	MOVQ  AX, R9
  5550	ANDL  $0x0000001f, R9
  5551	MOVQ  $0x00000040, R11
  5552	SUBQ  R9, R11
  5553	DECQ  R10
  5554	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  5555	LEAQ  -32(SI)(R11*1), R9
  5556	LEAQ  -32(AX)(R11*1), R12
  5557
  5558emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
  5559	MOVOU (R9), X4
  5560	MOVOU 16(R9), X5
  5561	MOVOA X4, (R12)
  5562	MOVOA X5, 16(R12)
  5563	ADDQ  $0x20, R12
  5564	ADDQ  $0x20, R9
  5565	ADDQ  $0x20, R11
  5566	DECQ  R10
  5567	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
  5568
  5569emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
  5570	MOVOU -32(SI)(R11*1), X4
  5571	MOVOU -16(SI)(R11*1), X5
  5572	MOVOA X4, -32(AX)(R11*1)
  5573	MOVOA X5, -16(AX)(R11*1)
  5574	ADDQ  $0x20, R11
  5575	CMPQ  R8, R11
  5576	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  5577	MOVOU X0, (AX)
  5578	MOVOU X1, 16(AX)
  5579	MOVOU X2, -32(AX)(R8*1)
  5580	MOVOU X3, -16(AX)(R8*1)
  5581	MOVQ  DI, AX
  5582
  5583emit_literal_done_match_emit_encodeBlockAsm8B:
  5584match_nolit_loop_encodeBlockAsm8B:
  5585	MOVL CX, SI
  5586	SUBL BX, SI
  5587	MOVL SI, 16(SP)
  5588	ADDL $0x04, CX
  5589	ADDL $0x04, BX
  5590	MOVQ src_len+32(FP), SI
  5591	SUBL CX, SI
  5592	LEAQ (DX)(CX*1), DI
  5593	LEAQ (DX)(BX*1), BX
  5594
  5595	// matchLen
  5596	XORL R9, R9
  5597
  5598matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
  5599	CMPL SI, $0x10
  5600	JB   matchlen_match8_match_nolit_encodeBlockAsm8B
  5601	MOVQ (DI)(R9*1), R8
  5602	MOVQ 8(DI)(R9*1), R10
  5603	XORQ (BX)(R9*1), R8
  5604	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
  5605	XORQ 8(BX)(R9*1), R10
  5606	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm8B
  5607	LEAL -16(SI), SI
  5608	LEAL 16(R9), R9
  5609	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm8B
  5610
  5611matchlen_bsf_16match_nolit_encodeBlockAsm8B:
  5612#ifdef GOAMD64_v3
  5613	TZCNTQ R10, R10
  5614
  5615#else
  5616	BSFQ R10, R10
  5617
  5618#endif
  5619	SARQ $0x03, R10
  5620	LEAL 8(R9)(R10*1), R9
  5621	JMP  match_nolit_end_encodeBlockAsm8B
  5622
  5623matchlen_match8_match_nolit_encodeBlockAsm8B:
  5624	CMPL SI, $0x08
  5625	JB   matchlen_match4_match_nolit_encodeBlockAsm8B
  5626	MOVQ (DI)(R9*1), R8
  5627	XORQ (BX)(R9*1), R8
  5628	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
  5629	LEAL -8(SI), SI
  5630	LEAL 8(R9), R9
  5631	JMP  matchlen_match4_match_nolit_encodeBlockAsm8B
  5632
  5633matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
  5634#ifdef GOAMD64_v3
  5635	TZCNTQ R8, R8
  5636
  5637#else
  5638	BSFQ R8, R8
  5639
  5640#endif
  5641	SARQ $0x03, R8
  5642	LEAL (R9)(R8*1), R9
  5643	JMP  match_nolit_end_encodeBlockAsm8B
  5644
  5645matchlen_match4_match_nolit_encodeBlockAsm8B:
  5646	CMPL SI, $0x04
  5647	JB   matchlen_match2_match_nolit_encodeBlockAsm8B
  5648	MOVL (DI)(R9*1), R8
  5649	CMPL (BX)(R9*1), R8
  5650	JNE  matchlen_match2_match_nolit_encodeBlockAsm8B
  5651	LEAL -4(SI), SI
  5652	LEAL 4(R9), R9
  5653
  5654matchlen_match2_match_nolit_encodeBlockAsm8B:
  5655	CMPL SI, $0x01
  5656	JE   matchlen_match1_match_nolit_encodeBlockAsm8B
  5657	JB   match_nolit_end_encodeBlockAsm8B
  5658	MOVW (DI)(R9*1), R8
  5659	CMPW (BX)(R9*1), R8
  5660	JNE  matchlen_match1_match_nolit_encodeBlockAsm8B
  5661	LEAL 2(R9), R9
  5662	SUBL $0x02, SI
  5663	JZ   match_nolit_end_encodeBlockAsm8B
  5664
  5665matchlen_match1_match_nolit_encodeBlockAsm8B:
  5666	MOVB (DI)(R9*1), R8
  5667	CMPB (BX)(R9*1), R8
  5668	JNE  match_nolit_end_encodeBlockAsm8B
  5669	LEAL 1(R9), R9
  5670
  5671match_nolit_end_encodeBlockAsm8B:
  5672	ADDL R9, CX
  5673	MOVL 16(SP), BX
  5674	ADDL $0x04, R9
  5675	MOVL CX, 12(SP)
  5676
  5677	// emitCopy
  5678	CMPL R9, $0x40
  5679	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
  5680	CMPL BX, $0x00000800
  5681	JAE  long_offset_short_match_nolit_encodeBlockAsm8B
  5682	MOVL $0x00000001, SI
  5683	LEAL 16(SI), SI
  5684	MOVB BL, 1(AX)
  5685	SHRL $0x08, BX
  5686	SHLL $0x05, BX
  5687	ORL  BX, SI
  5688	MOVB SI, (AX)
  5689	ADDQ $0x02, AX
  5690	SUBL $0x08, R9
  5691
  5692	// emitRepeat
  5693	LEAL -4(R9), R9
  5694	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5695	MOVL R9, BX
  5696	LEAL -4(R9), R9
  5697	CMPL BX, $0x08
  5698	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5699	CMPL BX, $0x0c
  5700	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5701
  5702cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
  5703	CMPL R9, $0x00000104
  5704	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5705	LEAL -256(R9), R9
  5706	MOVW $0x0019, (AX)
  5707	MOVW R9, 2(AX)
  5708	ADDQ $0x04, AX
  5709	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5710
  5711repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
  5712	LEAL -4(R9), R9
  5713	MOVW $0x0015, (AX)
  5714	MOVB R9, 2(AX)
  5715	ADDQ $0x03, AX
  5716	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5717
  5718repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
  5719	SHLL $0x02, R9
  5720	ORL  $0x01, R9
  5721	MOVW R9, (AX)
  5722	ADDQ $0x02, AX
  5723	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5724	XORQ SI, SI
  5725	LEAL 1(SI)(R9*4), R9
  5726	MOVB BL, 1(AX)
  5727	SARL $0x08, BX
  5728	SHLL $0x05, BX
  5729	ORL  BX, R9
  5730	MOVB R9, (AX)
  5731	ADDQ $0x02, AX
  5732	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5733
  5734long_offset_short_match_nolit_encodeBlockAsm8B:
  5735	MOVB $0xee, (AX)
  5736	MOVW BX, 1(AX)
  5737	LEAL -60(R9), R9
  5738	ADDQ $0x03, AX
  5739
  5740	// emitRepeat
  5741	MOVL R9, BX
  5742	LEAL -4(R9), R9
  5743	CMPL BX, $0x08
  5744	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
  5745	CMPL BX, $0x0c
  5746	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
  5747
  5748cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
  5749	CMPL R9, $0x00000104
  5750	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
  5751	LEAL -256(R9), R9
  5752	MOVW $0x0019, (AX)
  5753	MOVW R9, 2(AX)
  5754	ADDQ $0x04, AX
  5755	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5756
  5757repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
  5758	LEAL -4(R9), R9
  5759	MOVW $0x0015, (AX)
  5760	MOVB R9, 2(AX)
  5761	ADDQ $0x03, AX
  5762	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5763
  5764repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
  5765	SHLL $0x02, R9
  5766	ORL  $0x01, R9
  5767	MOVW R9, (AX)
  5768	ADDQ $0x02, AX
  5769	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5770	XORQ SI, SI
  5771	LEAL 1(SI)(R9*4), R9
  5772	MOVB BL, 1(AX)
  5773	SARL $0x08, BX
  5774	SHLL $0x05, BX
  5775	ORL  BX, R9
  5776	MOVB R9, (AX)
  5777	ADDQ $0x02, AX
  5778	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5779
  5780two_byte_offset_short_match_nolit_encodeBlockAsm8B:
  5781	MOVL R9, SI
  5782	SHLL $0x02, SI
  5783	CMPL R9, $0x0c
  5784	JAE  emit_copy_three_match_nolit_encodeBlockAsm8B
  5785	LEAL -15(SI), SI
  5786	MOVB BL, 1(AX)
  5787	SHRL $0x08, BX
  5788	SHLL $0x05, BX
  5789	ORL  BX, SI
  5790	MOVB SI, (AX)
  5791	ADDQ $0x02, AX
  5792	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
  5793
  5794emit_copy_three_match_nolit_encodeBlockAsm8B:
  5795	LEAL -2(SI), SI
  5796	MOVB SI, (AX)
  5797	MOVW BX, 1(AX)
  5798	ADDQ $0x03, AX
  5799
  5800match_nolit_emitcopy_end_encodeBlockAsm8B:
  5801	CMPL CX, 8(SP)
  5802	JAE  emit_remainder_encodeBlockAsm8B
  5803	MOVQ -2(DX)(CX*1), SI
  5804	CMPQ AX, (SP)
  5805	JB   match_nolit_dst_ok_encodeBlockAsm8B
  5806	MOVQ $0x00000000, ret+48(FP)
  5807	RET
  5808
  5809match_nolit_dst_ok_encodeBlockAsm8B:
  5810	MOVQ  $0x9e3779b1, R8
  5811	MOVQ  SI, DI
  5812	SHRQ  $0x10, SI
  5813	MOVQ  SI, BX
  5814	SHLQ  $0x20, DI
  5815	IMULQ R8, DI
  5816	SHRQ  $0x38, DI
  5817	SHLQ  $0x20, BX
  5818	IMULQ R8, BX
  5819	SHRQ  $0x38, BX
  5820	LEAL  -2(CX), R8
  5821	LEAQ  24(SP)(BX*4), R9
  5822	MOVL  (R9), BX
  5823	MOVL  R8, 24(SP)(DI*4)
  5824	MOVL  CX, (R9)
  5825	CMPL  (DX)(BX*1), SI
  5826	JEQ   match_nolit_loop_encodeBlockAsm8B
  5827	INCL  CX
  5828	JMP   search_loop_encodeBlockAsm8B
  5829
  5830emit_remainder_encodeBlockAsm8B:
  5831	MOVQ src_len+32(FP), CX
  5832	SUBL 12(SP), CX
  5833	LEAQ 3(AX)(CX*1), CX
  5834	CMPQ CX, (SP)
  5835	JB   emit_remainder_ok_encodeBlockAsm8B
  5836	MOVQ $0x00000000, ret+48(FP)
  5837	RET
  5838
  5839emit_remainder_ok_encodeBlockAsm8B:
  5840	MOVQ src_len+32(FP), CX
  5841	MOVL 12(SP), BX
  5842	CMPL BX, CX
  5843	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
  5844	MOVL CX, SI
  5845	MOVL CX, 12(SP)
  5846	LEAQ (DX)(BX*1), CX
  5847	SUBL BX, SI
  5848	LEAL -1(SI), DX
  5849	CMPL DX, $0x3c
  5850	JB   one_byte_emit_remainder_encodeBlockAsm8B
  5851	CMPL DX, $0x00000100
  5852	JB   two_bytes_emit_remainder_encodeBlockAsm8B
  5853	JB   three_bytes_emit_remainder_encodeBlockAsm8B
  5854
  5855three_bytes_emit_remainder_encodeBlockAsm8B:
  5856	MOVB $0xf4, (AX)
  5857	MOVW DX, 1(AX)
  5858	ADDQ $0x03, AX
  5859	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
  5860
  5861two_bytes_emit_remainder_encodeBlockAsm8B:
  5862	MOVB $0xf0, (AX)
  5863	MOVB DL, 1(AX)
  5864	ADDQ $0x02, AX
  5865	CMPL DX, $0x40
  5866	JB   memmove_emit_remainder_encodeBlockAsm8B
  5867	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
  5868
  5869one_byte_emit_remainder_encodeBlockAsm8B:
  5870	SHLB $0x02, DL
  5871	MOVB DL, (AX)
  5872	ADDQ $0x01, AX
  5873
  5874memmove_emit_remainder_encodeBlockAsm8B:
  5875	LEAQ (AX)(SI*1), DX
  5876	MOVL SI, BX
  5877
  5878	// genMemMoveShort
  5879	CMPQ BX, $0x03
  5880	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
  5881	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
  5882	CMPQ BX, $0x08
  5883	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
  5884	CMPQ BX, $0x10
  5885	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
  5886	CMPQ BX, $0x20
  5887	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
  5888	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
  5889
  5890emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
  5891	MOVB (CX), SI
  5892	MOVB -1(CX)(BX*1), CL
  5893	MOVB SI, (AX)
  5894	MOVB CL, -1(AX)(BX*1)
  5895	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5896
  5897emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
  5898	MOVW (CX), SI
  5899	MOVB 2(CX), CL
  5900	MOVW SI, (AX)
  5901	MOVB CL, 2(AX)
  5902	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5903
  5904emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
  5905	MOVL (CX), SI
  5906	MOVL -4(CX)(BX*1), CX
  5907	MOVL SI, (AX)
  5908	MOVL CX, -4(AX)(BX*1)
  5909	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5910
  5911emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
  5912	MOVQ (CX), SI
  5913	MOVQ -8(CX)(BX*1), CX
  5914	MOVQ SI, (AX)
  5915	MOVQ CX, -8(AX)(BX*1)
  5916	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5917
  5918emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
  5919	MOVOU (CX), X0
  5920	MOVOU -16(CX)(BX*1), X1
  5921	MOVOU X0, (AX)
  5922	MOVOU X1, -16(AX)(BX*1)
  5923	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5924
  5925emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
  5926	MOVOU (CX), X0
  5927	MOVOU 16(CX), X1
  5928	MOVOU -32(CX)(BX*1), X2
  5929	MOVOU -16(CX)(BX*1), X3
  5930	MOVOU X0, (AX)
  5931	MOVOU X1, 16(AX)
  5932	MOVOU X2, -32(AX)(BX*1)
  5933	MOVOU X3, -16(AX)(BX*1)
  5934
  5935memmove_end_copy_emit_remainder_encodeBlockAsm8B:
  5936	MOVQ DX, AX
  5937	JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B
  5938
  5939memmove_long_emit_remainder_encodeBlockAsm8B:
  5940	LEAQ (AX)(SI*1), DX
  5941	MOVL SI, BX
  5942
  5943	// genMemMoveLong
  5944	MOVOU (CX), X0
  5945	MOVOU 16(CX), X1
  5946	MOVOU -32(CX)(BX*1), X2
  5947	MOVOU -16(CX)(BX*1), X3
  5948	MOVQ  BX, DI
  5949	SHRQ  $0x05, DI
  5950	MOVQ  AX, SI
  5951	ANDL  $0x0000001f, SI
  5952	MOVQ  $0x00000040, R8
  5953	SUBQ  SI, R8
  5954	DECQ  DI
  5955	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
  5956	LEAQ  -32(CX)(R8*1), SI
  5957	LEAQ  -32(AX)(R8*1), R9
  5958
  5959emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
  5960	MOVOU (SI), X4
  5961	MOVOU 16(SI), X5
  5962	MOVOA X4, (R9)
  5963	MOVOA X5, 16(R9)
  5964	ADDQ  $0x20, R9
  5965	ADDQ  $0x20, SI
  5966	ADDQ  $0x20, R8
  5967	DECQ  DI
  5968	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
  5969
  5970emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
  5971	MOVOU -32(CX)(R8*1), X4
  5972	MOVOU -16(CX)(R8*1), X5
  5973	MOVOA X4, -32(AX)(R8*1)
  5974	MOVOA X5, -16(AX)(R8*1)
  5975	ADDQ  $0x20, R8
  5976	CMPQ  BX, R8
  5977	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
  5978	MOVOU X0, (AX)
  5979	MOVOU X1, 16(AX)
  5980	MOVOU X2, -32(AX)(BX*1)
  5981	MOVOU X3, -16(AX)(BX*1)
  5982	MOVQ  DX, AX
  5983
  5984emit_literal_done_emit_remainder_encodeBlockAsm8B:
  5985	MOVQ dst_base+0(FP), CX
  5986	SUBQ CX, AX
  5987	MOVQ AX, ret+48(FP)
  5988	RET
  5989
  5990// func encodeBetterBlockAsm(dst []byte, src []byte) int
  5991// Requires: BMI, SSE2
  5992TEXT ·encodeBetterBlockAsm(SB), $589848-56
  5993	MOVQ dst_base+0(FP), AX
  5994	MOVQ $0x00001200, CX
  5995	LEAQ 24(SP), DX
  5996	PXOR X0, X0
  5997
  5998zero_loop_encodeBetterBlockAsm:
  5999	MOVOU X0, (DX)
  6000	MOVOU X0, 16(DX)
  6001	MOVOU X0, 32(DX)
  6002	MOVOU X0, 48(DX)
  6003	MOVOU X0, 64(DX)
  6004	MOVOU X0, 80(DX)
  6005	MOVOU X0, 96(DX)
  6006	MOVOU X0, 112(DX)
  6007	ADDQ  $0x80, DX
  6008	DECQ  CX
  6009	JNZ   zero_loop_encodeBetterBlockAsm
  6010	MOVL  $0x00000000, 12(SP)
  6011	MOVQ  src_len+32(FP), CX
  6012	LEAQ  -6(CX), DX
  6013	LEAQ  -8(CX), BX
  6014	MOVL  BX, 8(SP)
  6015	SHRQ  $0x05, CX
  6016	SUBL  CX, DX
  6017	LEAQ  (AX)(DX*1), DX
  6018	MOVQ  DX, (SP)
  6019	MOVL  $0x00000001, CX
  6020	MOVL  $0x00000000, 16(SP)
  6021	MOVQ  src_base+24(FP), DX
  6022
  6023search_loop_encodeBetterBlockAsm:
  6024	MOVL CX, BX
  6025	SUBL 12(SP), BX
  6026	SHRL $0x07, BX
  6027	CMPL BX, $0x63
  6028	JBE  check_maxskip_ok_encodeBetterBlockAsm
  6029	LEAL 100(CX), BX
  6030	JMP  check_maxskip_cont_encodeBetterBlockAsm
  6031
  6032check_maxskip_ok_encodeBetterBlockAsm:
  6033	LEAL 1(CX)(BX*1), BX
  6034
  6035check_maxskip_cont_encodeBetterBlockAsm:
  6036	CMPL  BX, 8(SP)
  6037	JAE   emit_remainder_encodeBetterBlockAsm
  6038	MOVQ  (DX)(CX*1), SI
  6039	MOVL  BX, 20(SP)
  6040	MOVQ  $0x00cf1bbcdcbfa563, R8
  6041	MOVQ  $0x9e3779b1, BX
  6042	MOVQ  SI, R9
  6043	MOVQ  SI, R10
  6044	SHLQ  $0x08, R9
  6045	IMULQ R8, R9
  6046	SHRQ  $0x2f, R9
  6047	SHLQ  $0x20, R10
  6048	IMULQ BX, R10
  6049	SHRQ  $0x32, R10
  6050	MOVL  24(SP)(R9*4), BX
  6051	MOVL  524312(SP)(R10*4), DI
  6052	MOVL  CX, 24(SP)(R9*4)
  6053	MOVL  CX, 524312(SP)(R10*4)
  6054	MOVQ  (DX)(BX*1), R9
  6055	MOVQ  (DX)(DI*1), R10
  6056	CMPQ  R9, SI
  6057	JEQ   candidate_match_encodeBetterBlockAsm
  6058	CMPQ  R10, SI
  6059	JNE   no_short_found_encodeBetterBlockAsm
  6060	MOVL  DI, BX
  6061	JMP   candidate_match_encodeBetterBlockAsm
  6062
  6063no_short_found_encodeBetterBlockAsm:
  6064	CMPL R9, SI
  6065	JEQ  candidate_match_encodeBetterBlockAsm
  6066	CMPL R10, SI
  6067	JEQ  candidateS_match_encodeBetterBlockAsm
  6068	MOVL 20(SP), CX
  6069	JMP  search_loop_encodeBetterBlockAsm
  6070
  6071candidateS_match_encodeBetterBlockAsm:
  6072	SHRQ  $0x08, SI
  6073	MOVQ  SI, R9
  6074	SHLQ  $0x08, R9
  6075	IMULQ R8, R9
  6076	SHRQ  $0x2f, R9
  6077	MOVL  24(SP)(R9*4), BX
  6078	INCL  CX
  6079	MOVL  CX, 24(SP)(R9*4)
  6080	CMPL  (DX)(BX*1), SI
  6081	JEQ   candidate_match_encodeBetterBlockAsm
  6082	DECL  CX
  6083	MOVL  DI, BX
  6084
  6085candidate_match_encodeBetterBlockAsm:
  6086	MOVL  12(SP), SI
  6087	TESTL BX, BX
  6088	JZ    match_extend_back_end_encodeBetterBlockAsm
  6089
  6090match_extend_back_loop_encodeBetterBlockAsm:
  6091	CMPL CX, SI
  6092	JBE  match_extend_back_end_encodeBetterBlockAsm
  6093	MOVB -1(DX)(BX*1), DI
  6094	MOVB -1(DX)(CX*1), R8
  6095	CMPB DI, R8
  6096	JNE  match_extend_back_end_encodeBetterBlockAsm
  6097	LEAL -1(CX), CX
  6098	DECL BX
  6099	JZ   match_extend_back_end_encodeBetterBlockAsm
  6100	JMP  match_extend_back_loop_encodeBetterBlockAsm
  6101
  6102match_extend_back_end_encodeBetterBlockAsm:
  6103	MOVL CX, SI
  6104	SUBL 12(SP), SI
  6105	LEAQ 5(AX)(SI*1), SI
  6106	CMPQ SI, (SP)
  6107	JB   match_dst_size_check_encodeBetterBlockAsm
  6108	MOVQ $0x00000000, ret+48(FP)
  6109	RET
  6110
  6111match_dst_size_check_encodeBetterBlockAsm:
  6112	MOVL CX, SI
  6113	ADDL $0x04, CX
  6114	ADDL $0x04, BX
  6115	MOVQ src_len+32(FP), DI
  6116	SUBL CX, DI
  6117	LEAQ (DX)(CX*1), R8
  6118	LEAQ (DX)(BX*1), R9
  6119
  6120	// matchLen
  6121	XORL R11, R11
  6122
  6123matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
  6124	CMPL DI, $0x10
  6125	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm
  6126	MOVQ (R8)(R11*1), R10
  6127	MOVQ 8(R8)(R11*1), R12
  6128	XORQ (R9)(R11*1), R10
  6129	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
  6130	XORQ 8(R9)(R11*1), R12
  6131	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm
  6132	LEAL -16(DI), DI
  6133	LEAL 16(R11), R11
  6134	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
  6135
  6136matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
  6137#ifdef GOAMD64_v3
  6138	TZCNTQ R12, R12
  6139
  6140#else
  6141	BSFQ R12, R12
  6142
  6143#endif
  6144	SARQ $0x03, R12
  6145	LEAL 8(R11)(R12*1), R11
  6146	JMP  match_nolit_end_encodeBetterBlockAsm
  6147
  6148matchlen_match8_match_nolit_encodeBetterBlockAsm:
  6149	CMPL DI, $0x08
  6150	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm
  6151	MOVQ (R8)(R11*1), R10
  6152	XORQ (R9)(R11*1), R10
  6153	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
  6154	LEAL -8(DI), DI
  6155	LEAL 8(R11), R11
  6156	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm
  6157
  6158matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
  6159#ifdef GOAMD64_v3
  6160	TZCNTQ R10, R10
  6161
  6162#else
  6163	BSFQ R10, R10
  6164
  6165#endif
  6166	SARQ $0x03, R10
  6167	LEAL (R11)(R10*1), R11
  6168	JMP  match_nolit_end_encodeBetterBlockAsm
  6169
  6170matchlen_match4_match_nolit_encodeBetterBlockAsm:
  6171	CMPL DI, $0x04
  6172	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm
  6173	MOVL (R8)(R11*1), R10
  6174	CMPL (R9)(R11*1), R10
  6175	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm
  6176	LEAL -4(DI), DI
  6177	LEAL 4(R11), R11
  6178
  6179matchlen_match2_match_nolit_encodeBetterBlockAsm:
  6180	CMPL DI, $0x01
  6181	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm
  6182	JB   match_nolit_end_encodeBetterBlockAsm
  6183	MOVW (R8)(R11*1), R10
  6184	CMPW (R9)(R11*1), R10
  6185	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm
  6186	LEAL 2(R11), R11
  6187	SUBL $0x02, DI
  6188	JZ   match_nolit_end_encodeBetterBlockAsm
  6189
  6190matchlen_match1_match_nolit_encodeBetterBlockAsm:
  6191	MOVB (R8)(R11*1), R10
  6192	CMPB (R9)(R11*1), R10
  6193	JNE  match_nolit_end_encodeBetterBlockAsm
  6194	LEAL 1(R11), R11
  6195
  6196match_nolit_end_encodeBetterBlockAsm:
  6197	MOVL CX, DI
  6198	SUBL BX, DI
  6199
  6200	// Check if repeat
  6201	CMPL 16(SP), DI
  6202	JEQ  match_is_repeat_encodeBetterBlockAsm
  6203	CMPL R11, $0x01
  6204	JA   match_length_ok_encodeBetterBlockAsm
  6205	CMPL DI, $0x0000ffff
  6206	JBE  match_length_ok_encodeBetterBlockAsm
  6207	MOVL 20(SP), CX
  6208	INCL CX
  6209	JMP  search_loop_encodeBetterBlockAsm
  6210
  6211match_length_ok_encodeBetterBlockAsm:
  6212	MOVL DI, 16(SP)
  6213	MOVL 12(SP), BX
  6214	CMPL BX, SI
  6215	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm
  6216	MOVL SI, R8
  6217	MOVL SI, 12(SP)
  6218	LEAQ (DX)(BX*1), R9
  6219	SUBL BX, R8
  6220	LEAL -1(R8), BX
  6221	CMPL BX, $0x3c
  6222	JB   one_byte_match_emit_encodeBetterBlockAsm
  6223	CMPL BX, $0x00000100
  6224	JB   two_bytes_match_emit_encodeBetterBlockAsm
  6225	CMPL BX, $0x00010000
  6226	JB   three_bytes_match_emit_encodeBetterBlockAsm
  6227	CMPL BX, $0x01000000
  6228	JB   four_bytes_match_emit_encodeBetterBlockAsm
  6229	MOVB $0xfc, (AX)
  6230	MOVL BX, 1(AX)
  6231	ADDQ $0x05, AX
  6232	JMP  memmove_long_match_emit_encodeBetterBlockAsm
  6233
  6234four_bytes_match_emit_encodeBetterBlockAsm:
  6235	MOVL BX, R10
  6236	SHRL $0x10, R10
  6237	MOVB $0xf8, (AX)
  6238	MOVW BX, 1(AX)
  6239	MOVB R10, 3(AX)
  6240	ADDQ $0x04, AX
  6241	JMP  memmove_long_match_emit_encodeBetterBlockAsm
  6242
  6243three_bytes_match_emit_encodeBetterBlockAsm:
  6244	MOVB $0xf4, (AX)
  6245	MOVW BX, 1(AX)
  6246	ADDQ $0x03, AX
  6247	JMP  memmove_long_match_emit_encodeBetterBlockAsm
  6248
  6249two_bytes_match_emit_encodeBetterBlockAsm:
  6250	MOVB $0xf0, (AX)
  6251	MOVB BL, 1(AX)
  6252	ADDQ $0x02, AX
  6253	CMPL BX, $0x40
  6254	JB   memmove_match_emit_encodeBetterBlockAsm
  6255	JMP  memmove_long_match_emit_encodeBetterBlockAsm
  6256
  6257one_byte_match_emit_encodeBetterBlockAsm:
  6258	SHLB $0x02, BL
  6259	MOVB BL, (AX)
  6260	ADDQ $0x01, AX
  6261
  6262memmove_match_emit_encodeBetterBlockAsm:
  6263	LEAQ (AX)(R8*1), BX
  6264
  6265	// genMemMoveShort
  6266	CMPQ R8, $0x04
  6267	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
  6268	CMPQ R8, $0x08
  6269	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
  6270	CMPQ R8, $0x10
  6271	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
  6272	CMPQ R8, $0x20
  6273	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
  6274	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
  6275
  6276emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
  6277	MOVL (R9), R10
  6278	MOVL R10, (AX)
  6279	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
  6280
  6281emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
  6282	MOVL (R9), R10
  6283	MOVL -4(R9)(R8*1), R9
  6284	MOVL R10, (AX)
  6285	MOVL R9, -4(AX)(R8*1)
  6286	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
  6287
  6288emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
  6289	MOVQ (R9), R10
  6290	MOVQ -8(R9)(R8*1), R9
  6291	MOVQ R10, (AX)
  6292	MOVQ R9, -8(AX)(R8*1)
  6293	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
  6294
  6295emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
  6296	MOVOU (R9), X0
  6297	MOVOU -16(R9)(R8*1), X1
  6298	MOVOU X0, (AX)
  6299	MOVOU X1, -16(AX)(R8*1)
  6300	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm
  6301
  6302emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
  6303	MOVOU (R9), X0
  6304	MOVOU 16(R9), X1
  6305	MOVOU -32(R9)(R8*1), X2
  6306	MOVOU -16(R9)(R8*1), X3
  6307	MOVOU X0, (AX)
  6308	MOVOU X1, 16(AX)
  6309	MOVOU X2, -32(AX)(R8*1)
  6310	MOVOU X3, -16(AX)(R8*1)
  6311
  6312memmove_end_copy_match_emit_encodeBetterBlockAsm:
  6313	MOVQ BX, AX
  6314	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm
  6315
  6316memmove_long_match_emit_encodeBetterBlockAsm:
  6317	LEAQ (AX)(R8*1), BX
  6318
  6319	// genMemMoveLong
  6320	MOVOU (R9), X0
  6321	MOVOU 16(R9), X1
  6322	MOVOU -32(R9)(R8*1), X2
  6323	MOVOU -16(R9)(R8*1), X3
  6324	MOVQ  R8, R12
  6325	SHRQ  $0x05, R12
  6326	MOVQ  AX, R10
  6327	ANDL  $0x0000001f, R10
  6328	MOVQ  $0x00000040, R13
  6329	SUBQ  R10, R13
  6330	DECQ  R12
  6331	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6332	LEAQ  -32(R9)(R13*1), R10
  6333	LEAQ  -32(AX)(R13*1), R14
  6334
  6335emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
  6336	MOVOU (R10), X4
  6337	MOVOU 16(R10), X5
  6338	MOVOA X4, (R14)
  6339	MOVOA X5, 16(R14)
  6340	ADDQ  $0x20, R14
  6341	ADDQ  $0x20, R10
  6342	ADDQ  $0x20, R13
  6343	DECQ  R12
  6344	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
  6345
  6346emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
  6347	MOVOU -32(R9)(R13*1), X4
  6348	MOVOU -16(R9)(R13*1), X5
  6349	MOVOA X4, -32(AX)(R13*1)
  6350	MOVOA X5, -16(AX)(R13*1)
  6351	ADDQ  $0x20, R13
  6352	CMPQ  R8, R13
  6353	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6354	MOVOU X0, (AX)
  6355	MOVOU X1, 16(AX)
  6356	MOVOU X2, -32(AX)(R8*1)
  6357	MOVOU X3, -16(AX)(R8*1)
  6358	MOVQ  BX, AX
  6359
  6360emit_literal_done_match_emit_encodeBetterBlockAsm:
  6361	ADDL R11, CX
  6362	ADDL $0x04, R11
  6363	MOVL CX, 12(SP)
  6364
  6365	// emitCopy
  6366	CMPL DI, $0x00010000
  6367	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm
  6368	CMPL R11, $0x40
  6369	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm
  6370	MOVB $0xff, (AX)
  6371	MOVL DI, 1(AX)
  6372	LEAL -64(R11), R11
  6373	ADDQ $0x05, AX
  6374	CMPL R11, $0x04
  6375	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm
  6376
  6377	// emitRepeat
  6378emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
  6379	MOVL R11, BX
  6380	LEAL -4(R11), R11
  6381	CMPL BX, $0x08
  6382	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
  6383	CMPL BX, $0x0c
  6384	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
  6385	CMPL DI, $0x00000800
  6386	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
  6387
  6388cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
  6389	CMPL R11, $0x00000104
  6390	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
  6391	CMPL R11, $0x00010100
  6392	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
  6393	CMPL R11, $0x0100ffff
  6394	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
  6395	LEAL -16842747(R11), R11
  6396	MOVL $0xfffb001d, (AX)
  6397	MOVB $0xff, 4(AX)
  6398	ADDQ $0x05, AX
  6399	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
  6400
  6401repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
  6402	LEAL -65536(R11), R11
  6403	MOVL R11, DI
  6404	MOVW $0x001d, (AX)
  6405	MOVW R11, 2(AX)
  6406	SARL $0x10, DI
  6407	MOVB DI, 4(AX)
  6408	ADDQ $0x05, AX
  6409	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6410
  6411repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
  6412	LEAL -256(R11), R11
  6413	MOVW $0x0019, (AX)
  6414	MOVW R11, 2(AX)
  6415	ADDQ $0x04, AX
  6416	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6417
  6418repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
  6419	LEAL -4(R11), R11
  6420	MOVW $0x0015, (AX)
  6421	MOVB R11, 2(AX)
  6422	ADDQ $0x03, AX
  6423	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6424
  6425repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
  6426	SHLL $0x02, R11
  6427	ORL  $0x01, R11
  6428	MOVW R11, (AX)
  6429	ADDQ $0x02, AX
  6430	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6431
  6432repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
  6433	XORQ BX, BX
  6434	LEAL 1(BX)(R11*4), R11
  6435	MOVB DI, 1(AX)
  6436	SARL $0x08, DI
  6437	SHLL $0x05, DI
  6438	ORL  DI, R11
  6439	MOVB R11, (AX)
  6440	ADDQ $0x02, AX
  6441	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6442
  6443four_bytes_remain_match_nolit_encodeBetterBlockAsm:
  6444	TESTL R11, R11
  6445	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm
  6446	XORL  BX, BX
  6447	LEAL  -1(BX)(R11*4), R11
  6448	MOVB  R11, (AX)
  6449	MOVL  DI, 1(AX)
  6450	ADDQ  $0x05, AX
  6451	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm
  6452
  6453two_byte_offset_match_nolit_encodeBetterBlockAsm:
  6454	CMPL R11, $0x40
  6455	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm
  6456	CMPL DI, $0x00000800
  6457	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm
  6458	MOVL $0x00000001, BX
  6459	LEAL 16(BX), BX
  6460	MOVB DI, 1(AX)
  6461	MOVL DI, R8
  6462	SHRL $0x08, R8
  6463	SHLL $0x05, R8
  6464	ORL  R8, BX
  6465	MOVB BL, (AX)
  6466	ADDQ $0x02, AX
  6467	SUBL $0x08, R11
  6468
  6469	// emitRepeat
  6470	LEAL -4(R11), R11
  6471	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6472
  6473emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6474	MOVL R11, BX
  6475	LEAL -4(R11), R11
  6476	CMPL BX, $0x08
  6477	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6478	CMPL BX, $0x0c
  6479	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6480	CMPL DI, $0x00000800
  6481	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6482
  6483cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6484	CMPL R11, $0x00000104
  6485	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6486	CMPL R11, $0x00010100
  6487	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6488	CMPL R11, $0x0100ffff
  6489	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6490	LEAL -16842747(R11), R11
  6491	MOVL $0xfffb001d, (AX)
  6492	MOVB $0xff, 4(AX)
  6493	ADDQ $0x05, AX
  6494	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  6495
  6496repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6497	LEAL -65536(R11), R11
  6498	MOVL R11, DI
  6499	MOVW $0x001d, (AX)
  6500	MOVW R11, 2(AX)
  6501	SARL $0x10, DI
  6502	MOVB DI, 4(AX)
  6503	ADDQ $0x05, AX
  6504	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6505
  6506repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6507	LEAL -256(R11), R11
  6508	MOVW $0x0019, (AX)
  6509	MOVW R11, 2(AX)
  6510	ADDQ $0x04, AX
  6511	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6512
  6513repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6514	LEAL -4(R11), R11
  6515	MOVW $0x0015, (AX)
  6516	MOVB R11, 2(AX)
  6517	ADDQ $0x03, AX
  6518	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6519
  6520repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6521	SHLL $0x02, R11
  6522	ORL  $0x01, R11
  6523	MOVW R11, (AX)
  6524	ADDQ $0x02, AX
  6525	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6526
  6527repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  6528	XORQ BX, BX
  6529	LEAL 1(BX)(R11*4), R11
  6530	MOVB DI, 1(AX)
  6531	SARL $0x08, DI
  6532	SHLL $0x05, DI
  6533	ORL  DI, R11
  6534	MOVB R11, (AX)
  6535	ADDQ $0x02, AX
  6536	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6537
  6538long_offset_short_match_nolit_encodeBetterBlockAsm:
  6539	MOVB $0xee, (AX)
  6540	MOVW DI, 1(AX)
  6541	LEAL -60(R11), R11
  6542	ADDQ $0x03, AX
  6543
  6544	// emitRepeat
  6545emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6546	MOVL R11, BX
  6547	LEAL -4(R11), R11
  6548	CMPL BX, $0x08
  6549	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6550	CMPL BX, $0x0c
  6551	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6552	CMPL DI, $0x00000800
  6553	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6554
  6555cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6556	CMPL R11, $0x00000104
  6557	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6558	CMPL R11, $0x00010100
  6559	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6560	CMPL R11, $0x0100ffff
  6561	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6562	LEAL -16842747(R11), R11
  6563	MOVL $0xfffb001d, (AX)
  6564	MOVB $0xff, 4(AX)
  6565	ADDQ $0x05, AX
  6566	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
  6567
  6568repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6569	LEAL -65536(R11), R11
  6570	MOVL R11, DI
  6571	MOVW $0x001d, (AX)
  6572	MOVW R11, 2(AX)
  6573	SARL $0x10, DI
  6574	MOVB DI, 4(AX)
  6575	ADDQ $0x05, AX
  6576	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6577
  6578repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6579	LEAL -256(R11), R11
  6580	MOVW $0x0019, (AX)
  6581	MOVW R11, 2(AX)
  6582	ADDQ $0x04, AX
  6583	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6584
  6585repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6586	LEAL -4(R11), R11
  6587	MOVW $0x0015, (AX)
  6588	MOVB R11, 2(AX)
  6589	ADDQ $0x03, AX
  6590	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6591
  6592repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6593	SHLL $0x02, R11
  6594	ORL  $0x01, R11
  6595	MOVW R11, (AX)
  6596	ADDQ $0x02, AX
  6597	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6598
  6599repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  6600	XORQ BX, BX
  6601	LEAL 1(BX)(R11*4), R11
  6602	MOVB DI, 1(AX)
  6603	SARL $0x08, DI
  6604	SHLL $0x05, DI
  6605	ORL  DI, R11
  6606	MOVB R11, (AX)
  6607	ADDQ $0x02, AX
  6608	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6609
  6610two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
  6611	MOVL R11, BX
  6612	SHLL $0x02, BX
  6613	CMPL R11, $0x0c
  6614	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
  6615	CMPL DI, $0x00000800
  6616	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
  6617	LEAL -15(BX), BX
  6618	MOVB DI, 1(AX)
  6619	SHRL $0x08, DI
  6620	SHLL $0x05, DI
  6621	ORL  DI, BX
  6622	MOVB BL, (AX)
  6623	ADDQ $0x02, AX
  6624	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6625
  6626emit_copy_three_match_nolit_encodeBetterBlockAsm:
  6627	LEAL -2(BX), BX
  6628	MOVB BL, (AX)
  6629	MOVW DI, 1(AX)
  6630	ADDQ $0x03, AX
  6631	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6632
  6633match_is_repeat_encodeBetterBlockAsm:
  6634	MOVL 12(SP), BX
  6635	CMPL BX, SI
  6636	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
  6637	MOVL SI, R8
  6638	MOVL SI, 12(SP)
  6639	LEAQ (DX)(BX*1), R9
  6640	SUBL BX, R8
  6641	LEAL -1(R8), BX
  6642	CMPL BX, $0x3c
  6643	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm
  6644	CMPL BX, $0x00000100
  6645	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm
  6646	CMPL BX, $0x00010000
  6647	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm
  6648	CMPL BX, $0x01000000
  6649	JB   four_bytes_match_emit_repeat_encodeBetterBlockAsm
  6650	MOVB $0xfc, (AX)
  6651	MOVL BX, 1(AX)
  6652	ADDQ $0x05, AX
  6653	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
  6654
  6655four_bytes_match_emit_repeat_encodeBetterBlockAsm:
  6656	MOVL BX, R10
  6657	SHRL $0x10, R10
  6658	MOVB $0xf8, (AX)
  6659	MOVW BX, 1(AX)
  6660	MOVB R10, 3(AX)
  6661	ADDQ $0x04, AX
  6662	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
  6663
  6664three_bytes_match_emit_repeat_encodeBetterBlockAsm:
  6665	MOVB $0xf4, (AX)
  6666	MOVW BX, 1(AX)
  6667	ADDQ $0x03, AX
  6668	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
  6669
  6670two_bytes_match_emit_repeat_encodeBetterBlockAsm:
  6671	MOVB $0xf0, (AX)
  6672	MOVB BL, 1(AX)
  6673	ADDQ $0x02, AX
  6674	CMPL BX, $0x40
  6675	JB   memmove_match_emit_repeat_encodeBetterBlockAsm
  6676	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
  6677
  6678one_byte_match_emit_repeat_encodeBetterBlockAsm:
  6679	SHLB $0x02, BL
  6680	MOVB BL, (AX)
  6681	ADDQ $0x01, AX
  6682
  6683memmove_match_emit_repeat_encodeBetterBlockAsm:
  6684	LEAQ (AX)(R8*1), BX
  6685
  6686	// genMemMoveShort
  6687	CMPQ R8, $0x04
  6688	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
  6689	CMPQ R8, $0x08
  6690	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
  6691	CMPQ R8, $0x10
  6692	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
  6693	CMPQ R8, $0x20
  6694	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
  6695	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
  6696
  6697emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
  6698	MOVL (R9), R10
  6699	MOVL R10, (AX)
  6700	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  6701
  6702emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
  6703	MOVL (R9), R10
  6704	MOVL -4(R9)(R8*1), R9
  6705	MOVL R10, (AX)
  6706	MOVL R9, -4(AX)(R8*1)
  6707	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  6708
  6709emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
  6710	MOVQ (R9), R10
  6711	MOVQ -8(R9)(R8*1), R9
  6712	MOVQ R10, (AX)
  6713	MOVQ R9, -8(AX)(R8*1)
  6714	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  6715
  6716emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
  6717	MOVOU (R9), X0
  6718	MOVOU -16(R9)(R8*1), X1
  6719	MOVOU X0, (AX)
  6720	MOVOU X1, -16(AX)(R8*1)
  6721	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  6722
  6723emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
  6724	MOVOU (R9), X0
  6725	MOVOU 16(R9), X1
  6726	MOVOU -32(R9)(R8*1), X2
  6727	MOVOU -16(R9)(R8*1), X3
  6728	MOVOU X0, (AX)
  6729	MOVOU X1, 16(AX)
  6730	MOVOU X2, -32(AX)(R8*1)
  6731	MOVOU X3, -16(AX)(R8*1)
  6732
  6733memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
  6734	MOVQ BX, AX
  6735	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
  6736
  6737memmove_long_match_emit_repeat_encodeBetterBlockAsm:
  6738	LEAQ (AX)(R8*1), BX
  6739
  6740	// genMemMoveLong
  6741	MOVOU (R9), X0
  6742	MOVOU 16(R9), X1
  6743	MOVOU -32(R9)(R8*1), X2
  6744	MOVOU -16(R9)(R8*1), X3
  6745	MOVQ  R8, R12
  6746	SHRQ  $0x05, R12
  6747	MOVQ  AX, R10
  6748	ANDL  $0x0000001f, R10
  6749	MOVQ  $0x00000040, R13
  6750	SUBQ  R10, R13
  6751	DECQ  R12
  6752	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6753	LEAQ  -32(R9)(R13*1), R10
  6754	LEAQ  -32(AX)(R13*1), R14
  6755
  6756emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
  6757	MOVOU (R10), X4
  6758	MOVOU 16(R10), X5
  6759	MOVOA X4, (R14)
  6760	MOVOA X5, 16(R14)
  6761	ADDQ  $0x20, R14
  6762	ADDQ  $0x20, R10
  6763	ADDQ  $0x20, R13
  6764	DECQ  R12
  6765	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
  6766
  6767emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
  6768	MOVOU -32(R9)(R13*1), X4
  6769	MOVOU -16(R9)(R13*1), X5
  6770	MOVOA X4, -32(AX)(R13*1)
  6771	MOVOA X5, -16(AX)(R13*1)
  6772	ADDQ  $0x20, R13
  6773	CMPQ  R8, R13
  6774	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6775	MOVOU X0, (AX)
  6776	MOVOU X1, 16(AX)
  6777	MOVOU X2, -32(AX)(R8*1)
  6778	MOVOU X3, -16(AX)(R8*1)
  6779	MOVQ  BX, AX
  6780
  6781emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
  6782	ADDL R11, CX
  6783	ADDL $0x04, R11
  6784	MOVL CX, 12(SP)
  6785
  6786	// emitRepeat
  6787emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
  6788	MOVL R11, BX
  6789	LEAL -4(R11), R11
  6790	CMPL BX, $0x08
  6791	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm
  6792	CMPL BX, $0x0c
  6793	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
  6794	CMPL DI, $0x00000800
  6795	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
  6796
  6797cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
  6798	CMPL R11, $0x00000104
  6799	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm
  6800	CMPL R11, $0x00010100
  6801	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm
  6802	CMPL R11, $0x0100ffff
  6803	JB   repeat_five_match_nolit_repeat_encodeBetterBlockAsm
  6804	LEAL -16842747(R11), R11
  6805	MOVL $0xfffb001d, (AX)
  6806	MOVB $0xff, 4(AX)
  6807	ADDQ $0x05, AX
  6808	JMP  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
  6809
  6810repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
  6811	LEAL -65536(R11), R11
  6812	MOVL R11, DI
  6813	MOVW $0x001d, (AX)
  6814	MOVW R11, 2(AX)
  6815	SARL $0x10, DI
  6816	MOVB DI, 4(AX)
  6817	ADDQ $0x05, AX
  6818	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6819
  6820repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
  6821	LEAL -256(R11), R11
  6822	MOVW $0x0019, (AX)
  6823	MOVW R11, 2(AX)
  6824	ADDQ $0x04, AX
  6825	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6826
  6827repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
  6828	LEAL -4(R11), R11
  6829	MOVW $0x0015, (AX)
  6830	MOVB R11, 2(AX)
  6831	ADDQ $0x03, AX
  6832	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6833
  6834repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
  6835	SHLL $0x02, R11
  6836	ORL  $0x01, R11
  6837	MOVW R11, (AX)
  6838	ADDQ $0x02, AX
  6839	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
  6840
  6841repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
  6842	XORQ BX, BX
  6843	LEAL 1(BX)(R11*4), R11
  6844	MOVB DI, 1(AX)
  6845	SARL $0x08, DI
  6846	SHLL $0x05, DI
  6847	ORL  DI, R11
  6848	MOVB R11, (AX)
  6849	ADDQ $0x02, AX
  6850
  6851match_nolit_emitcopy_end_encodeBetterBlockAsm:
  6852	CMPL CX, 8(SP)
  6853	JAE  emit_remainder_encodeBetterBlockAsm
  6854	CMPQ AX, (SP)
  6855	JB   match_nolit_dst_ok_encodeBetterBlockAsm
  6856	MOVQ $0x00000000, ret+48(FP)
  6857	RET
  6858
  6859match_nolit_dst_ok_encodeBetterBlockAsm:
  6860	MOVQ  $0x00cf1bbcdcbfa563, BX
  6861	MOVQ  $0x9e3779b1, DI
  6862	LEAQ  1(SI), SI
  6863	LEAQ  -2(CX), R8
  6864	MOVQ  (DX)(SI*1), R9
  6865	MOVQ  1(DX)(SI*1), R10
  6866	MOVQ  (DX)(R8*1), R11
  6867	MOVQ  1(DX)(R8*1), R12
  6868	SHLQ  $0x08, R9
  6869	IMULQ BX, R9
  6870	SHRQ  $0x2f, R9
  6871	SHLQ  $0x20, R10
  6872	IMULQ DI, R10
  6873	SHRQ  $0x32, R10
  6874	SHLQ  $0x08, R11
  6875	IMULQ BX, R11
  6876	SHRQ  $0x2f, R11
  6877	SHLQ  $0x20, R12
  6878	IMULQ DI, R12
  6879	SHRQ  $0x32, R12
  6880	LEAQ  1(SI), DI
  6881	LEAQ  1(R8), R13
  6882	MOVL  SI, 24(SP)(R9*4)
  6883	MOVL  R8, 24(SP)(R11*4)
  6884	MOVL  DI, 524312(SP)(R10*4)
  6885	MOVL  R13, 524312(SP)(R12*4)
  6886	LEAQ  1(R8)(SI*1), DI
  6887	SHRQ  $0x01, DI
  6888	ADDQ  $0x01, SI
  6889	SUBQ  $0x01, R8
  6890
  6891index_loop_encodeBetterBlockAsm:
  6892	CMPQ  DI, R8
  6893	JAE   search_loop_encodeBetterBlockAsm
  6894	MOVQ  (DX)(SI*1), R9
  6895	MOVQ  (DX)(DI*1), R10
  6896	SHLQ  $0x08, R9
  6897	IMULQ BX, R9
  6898	SHRQ  $0x2f, R9
  6899	SHLQ  $0x08, R10
  6900	IMULQ BX, R10
  6901	SHRQ  $0x2f, R10
  6902	MOVL  SI, 24(SP)(R9*4)
  6903	MOVL  DI, 24(SP)(R10*4)
  6904	ADDQ  $0x02, SI
  6905	ADDQ  $0x02, DI
  6906	JMP   index_loop_encodeBetterBlockAsm
  6907
  6908emit_remainder_encodeBetterBlockAsm:
  6909	MOVQ src_len+32(FP), CX
  6910	SUBL 12(SP), CX
  6911	LEAQ 5(AX)(CX*1), CX
  6912	CMPQ CX, (SP)
  6913	JB   emit_remainder_ok_encodeBetterBlockAsm
  6914	MOVQ $0x00000000, ret+48(FP)
  6915	RET
  6916
  6917emit_remainder_ok_encodeBetterBlockAsm:
  6918	MOVQ src_len+32(FP), CX
  6919	MOVL 12(SP), BX
  6920	CMPL BX, CX
  6921	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm
  6922	MOVL CX, SI
  6923	MOVL CX, 12(SP)
  6924	LEAQ (DX)(BX*1), CX
  6925	SUBL BX, SI
  6926	LEAL -1(SI), DX
  6927	CMPL DX, $0x3c
  6928	JB   one_byte_emit_remainder_encodeBetterBlockAsm
  6929	CMPL DX, $0x00000100
  6930	JB   two_bytes_emit_remainder_encodeBetterBlockAsm
  6931	CMPL DX, $0x00010000
  6932	JB   three_bytes_emit_remainder_encodeBetterBlockAsm
  6933	CMPL DX, $0x01000000
  6934	JB   four_bytes_emit_remainder_encodeBetterBlockAsm
  6935	MOVB $0xfc, (AX)
  6936	MOVL DX, 1(AX)
  6937	ADDQ $0x05, AX
  6938	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
  6939
  6940four_bytes_emit_remainder_encodeBetterBlockAsm:
  6941	MOVL DX, BX
  6942	SHRL $0x10, BX
  6943	MOVB $0xf8, (AX)
  6944	MOVW DX, 1(AX)
  6945	MOVB BL, 3(AX)
  6946	ADDQ $0x04, AX
  6947	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
  6948
  6949three_bytes_emit_remainder_encodeBetterBlockAsm:
  6950	MOVB $0xf4, (AX)
  6951	MOVW DX, 1(AX)
  6952	ADDQ $0x03, AX
  6953	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
  6954
  6955two_bytes_emit_remainder_encodeBetterBlockAsm:
  6956	MOVB $0xf0, (AX)
  6957	MOVB DL, 1(AX)
  6958	ADDQ $0x02, AX
  6959	CMPL DX, $0x40
  6960	JB   memmove_emit_remainder_encodeBetterBlockAsm
  6961	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
  6962
  6963one_byte_emit_remainder_encodeBetterBlockAsm:
  6964	SHLB $0x02, DL
  6965	MOVB DL, (AX)
  6966	ADDQ $0x01, AX
  6967
  6968memmove_emit_remainder_encodeBetterBlockAsm:
  6969	LEAQ (AX)(SI*1), DX
  6970	MOVL SI, BX
  6971
  6972	// genMemMoveShort
  6973	CMPQ BX, $0x03
  6974	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
  6975	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
  6976	CMPQ BX, $0x08
  6977	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
  6978	CMPQ BX, $0x10
  6979	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
  6980	CMPQ BX, $0x20
  6981	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
  6982	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
  6983
  6984emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
  6985	MOVB (CX), SI
  6986	MOVB -1(CX)(BX*1), CL
  6987	MOVB SI, (AX)
  6988	MOVB CL, -1(AX)(BX*1)
  6989	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6990
  6991emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
  6992	MOVW (CX), SI
  6993	MOVB 2(CX), CL
  6994	MOVW SI, (AX)
  6995	MOVB CL, 2(AX)
  6996	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6997
  6998emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
  6999	MOVL (CX), SI
  7000	MOVL -4(CX)(BX*1), CX
  7001	MOVL SI, (AX)
  7002	MOVL CX, -4(AX)(BX*1)
  7003	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  7004
  7005emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
  7006	MOVQ (CX), SI
  7007	MOVQ -8(CX)(BX*1), CX
  7008	MOVQ SI, (AX)
  7009	MOVQ CX, -8(AX)(BX*1)
  7010	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  7011
  7012emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
  7013	MOVOU (CX), X0
  7014	MOVOU -16(CX)(BX*1), X1
  7015	MOVOU X0, (AX)
  7016	MOVOU X1, -16(AX)(BX*1)
  7017	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  7018
  7019emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
  7020	MOVOU (CX), X0
  7021	MOVOU 16(CX), X1
  7022	MOVOU -32(CX)(BX*1), X2
  7023	MOVOU -16(CX)(BX*1), X3
  7024	MOVOU X0, (AX)
  7025	MOVOU X1, 16(AX)
  7026	MOVOU X2, -32(AX)(BX*1)
  7027	MOVOU X3, -16(AX)(BX*1)
  7028
  7029memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
  7030	MOVQ DX, AX
  7031	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm
  7032
  7033memmove_long_emit_remainder_encodeBetterBlockAsm:
  7034	LEAQ (AX)(SI*1), DX
  7035	MOVL SI, BX
  7036
  7037	// genMemMoveLong
  7038	MOVOU (CX), X0
  7039	MOVOU 16(CX), X1
  7040	MOVOU -32(CX)(BX*1), X2
  7041	MOVOU -16(CX)(BX*1), X3
  7042	MOVQ  BX, DI
  7043	SHRQ  $0x05, DI
  7044	MOVQ  AX, SI
  7045	ANDL  $0x0000001f, SI
  7046	MOVQ  $0x00000040, R8
  7047	SUBQ  SI, R8
  7048	DECQ  DI
  7049	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
  7050	LEAQ  -32(CX)(R8*1), SI
  7051	LEAQ  -32(AX)(R8*1), R9
  7052
  7053emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
  7054	MOVOU (SI), X4
  7055	MOVOU 16(SI), X5
  7056	MOVOA X4, (R9)
  7057	MOVOA X5, 16(R9)
  7058	ADDQ  $0x20, R9
  7059	ADDQ  $0x20, SI
  7060	ADDQ  $0x20, R8
  7061	DECQ  DI
  7062	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
  7063
  7064emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
  7065	MOVOU -32(CX)(R8*1), X4
  7066	MOVOU -16(CX)(R8*1), X5
  7067	MOVOA X4, -32(AX)(R8*1)
  7068	MOVOA X5, -16(AX)(R8*1)
  7069	ADDQ  $0x20, R8
  7070	CMPQ  BX, R8
  7071	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
  7072	MOVOU X0, (AX)
  7073	MOVOU X1, 16(AX)
  7074	MOVOU X2, -32(AX)(BX*1)
  7075	MOVOU X3, -16(AX)(BX*1)
  7076	MOVQ  DX, AX
  7077
  7078emit_literal_done_emit_remainder_encodeBetterBlockAsm:
  7079	MOVQ dst_base+0(FP), CX
  7080	SUBQ CX, AX
  7081	MOVQ AX, ret+48(FP)
  7082	RET
  7083
  7084// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
  7085// Requires: BMI, SSE2
  7086TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56
  7087	MOVQ dst_base+0(FP), AX
  7088	MOVQ $0x00001200, CX
  7089	LEAQ 24(SP), DX
  7090	PXOR X0, X0
  7091
  7092zero_loop_encodeBetterBlockAsm4MB:
  7093	MOVOU X0, (DX)
  7094	MOVOU X0, 16(DX)
  7095	MOVOU X0, 32(DX)
  7096	MOVOU X0, 48(DX)
  7097	MOVOU X0, 64(DX)
  7098	MOVOU X0, 80(DX)
  7099	MOVOU X0, 96(DX)
  7100	MOVOU X0, 112(DX)
  7101	ADDQ  $0x80, DX
  7102	DECQ  CX
  7103	JNZ   zero_loop_encodeBetterBlockAsm4MB
  7104	MOVL  $0x00000000, 12(SP)
  7105	MOVQ  src_len+32(FP), CX
  7106	LEAQ  -6(CX), DX
  7107	LEAQ  -8(CX), BX
  7108	MOVL  BX, 8(SP)
  7109	SHRQ  $0x05, CX
  7110	SUBL  CX, DX
  7111	LEAQ  (AX)(DX*1), DX
  7112	MOVQ  DX, (SP)
  7113	MOVL  $0x00000001, CX
  7114	MOVL  $0x00000000, 16(SP)
  7115	MOVQ  src_base+24(FP), DX
  7116
  7117search_loop_encodeBetterBlockAsm4MB:
  7118	MOVL CX, BX
  7119	SUBL 12(SP), BX
  7120	SHRL $0x07, BX
  7121	CMPL BX, $0x63
  7122	JBE  check_maxskip_ok_encodeBetterBlockAsm4MB
  7123	LEAL 100(CX), BX
  7124	JMP  check_maxskip_cont_encodeBetterBlockAsm4MB
  7125
  7126check_maxskip_ok_encodeBetterBlockAsm4MB:
  7127	LEAL 1(CX)(BX*1), BX
  7128
  7129check_maxskip_cont_encodeBetterBlockAsm4MB:
  7130	CMPL  BX, 8(SP)
  7131	JAE   emit_remainder_encodeBetterBlockAsm4MB
  7132	MOVQ  (DX)(CX*1), SI
  7133	MOVL  BX, 20(SP)
  7134	MOVQ  $0x00cf1bbcdcbfa563, R8
  7135	MOVQ  $0x9e3779b1, BX
  7136	MOVQ  SI, R9
  7137	MOVQ  SI, R10
  7138	SHLQ  $0x08, R9
  7139	IMULQ R8, R9
  7140	SHRQ  $0x2f, R9
  7141	SHLQ  $0x20, R10
  7142	IMULQ BX, R10
  7143	SHRQ  $0x32, R10
  7144	MOVL  24(SP)(R9*4), BX
  7145	MOVL  524312(SP)(R10*4), DI
  7146	MOVL  CX, 24(SP)(R9*4)
  7147	MOVL  CX, 524312(SP)(R10*4)
  7148	MOVQ  (DX)(BX*1), R9
  7149	MOVQ  (DX)(DI*1), R10
  7150	CMPQ  R9, SI
  7151	JEQ   candidate_match_encodeBetterBlockAsm4MB
  7152	CMPQ  R10, SI
  7153	JNE   no_short_found_encodeBetterBlockAsm4MB
  7154	MOVL  DI, BX
  7155	JMP   candidate_match_encodeBetterBlockAsm4MB
  7156
  7157no_short_found_encodeBetterBlockAsm4MB:
  7158	CMPL R9, SI
  7159	JEQ  candidate_match_encodeBetterBlockAsm4MB
  7160	CMPL R10, SI
  7161	JEQ  candidateS_match_encodeBetterBlockAsm4MB
  7162	MOVL 20(SP), CX
  7163	JMP  search_loop_encodeBetterBlockAsm4MB
  7164
  7165candidateS_match_encodeBetterBlockAsm4MB:
  7166	SHRQ  $0x08, SI
  7167	MOVQ  SI, R9
  7168	SHLQ  $0x08, R9
  7169	IMULQ R8, R9
  7170	SHRQ  $0x2f, R9
  7171	MOVL  24(SP)(R9*4), BX
  7172	INCL  CX
  7173	MOVL  CX, 24(SP)(R9*4)
  7174	CMPL  (DX)(BX*1), SI
  7175	JEQ   candidate_match_encodeBetterBlockAsm4MB
  7176	DECL  CX
  7177	MOVL  DI, BX
  7178
  7179candidate_match_encodeBetterBlockAsm4MB:
  7180	MOVL  12(SP), SI
  7181	TESTL BX, BX
  7182	JZ    match_extend_back_end_encodeBetterBlockAsm4MB
  7183
  7184match_extend_back_loop_encodeBetterBlockAsm4MB:
  7185	CMPL CX, SI
  7186	JBE  match_extend_back_end_encodeBetterBlockAsm4MB
  7187	MOVB -1(DX)(BX*1), DI
  7188	MOVB -1(DX)(CX*1), R8
  7189	CMPB DI, R8
  7190	JNE  match_extend_back_end_encodeBetterBlockAsm4MB
  7191	LEAL -1(CX), CX
  7192	DECL BX
  7193	JZ   match_extend_back_end_encodeBetterBlockAsm4MB
  7194	JMP  match_extend_back_loop_encodeBetterBlockAsm4MB
  7195
  7196match_extend_back_end_encodeBetterBlockAsm4MB:
  7197	MOVL CX, SI
  7198	SUBL 12(SP), SI
  7199	LEAQ 4(AX)(SI*1), SI
  7200	CMPQ SI, (SP)
  7201	JB   match_dst_size_check_encodeBetterBlockAsm4MB
  7202	MOVQ $0x00000000, ret+48(FP)
  7203	RET
  7204
  7205match_dst_size_check_encodeBetterBlockAsm4MB:
  7206	MOVL CX, SI
  7207	ADDL $0x04, CX
  7208	ADDL $0x04, BX
  7209	MOVQ src_len+32(FP), DI
  7210	SUBL CX, DI
  7211	LEAQ (DX)(CX*1), R8
  7212	LEAQ (DX)(BX*1), R9
  7213
  7214	// matchLen
  7215	XORL R11, R11
  7216
  7217matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
  7218	CMPL DI, $0x10
  7219	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
  7220	MOVQ (R8)(R11*1), R10
  7221	MOVQ 8(R8)(R11*1), R12
  7222	XORQ (R9)(R11*1), R10
  7223	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
  7224	XORQ 8(R9)(R11*1), R12
  7225	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
  7226	LEAL -16(DI), DI
  7227	LEAL 16(R11), R11
  7228	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
  7229
  7230matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
  7231#ifdef GOAMD64_v3
  7232	TZCNTQ R12, R12
  7233
  7234#else
  7235	BSFQ R12, R12
  7236
  7237#endif
  7238	SARQ $0x03, R12
  7239	LEAL 8(R11)(R12*1), R11
  7240	JMP  match_nolit_end_encodeBetterBlockAsm4MB
  7241
  7242matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
  7243	CMPL DI, $0x08
  7244	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
  7245	MOVQ (R8)(R11*1), R10
  7246	XORQ (R9)(R11*1), R10
  7247	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
  7248	LEAL -8(DI), DI
  7249	LEAL 8(R11), R11
  7250	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
  7251
  7252matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
  7253#ifdef GOAMD64_v3
  7254	TZCNTQ R10, R10
  7255
  7256#else
  7257	BSFQ R10, R10
  7258
  7259#endif
  7260	SARQ $0x03, R10
  7261	LEAL (R11)(R10*1), R11
  7262	JMP  match_nolit_end_encodeBetterBlockAsm4MB
  7263
  7264matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
  7265	CMPL DI, $0x04
  7266	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
  7267	MOVL (R8)(R11*1), R10
  7268	CMPL (R9)(R11*1), R10
  7269	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
  7270	LEAL -4(DI), DI
  7271	LEAL 4(R11), R11
  7272
  7273matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
  7274	CMPL DI, $0x01
  7275	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
  7276	JB   match_nolit_end_encodeBetterBlockAsm4MB
  7277	MOVW (R8)(R11*1), R10
  7278	CMPW (R9)(R11*1), R10
  7279	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
  7280	LEAL 2(R11), R11
  7281	SUBL $0x02, DI
  7282	JZ   match_nolit_end_encodeBetterBlockAsm4MB
  7283
  7284matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
  7285	MOVB (R8)(R11*1), R10
  7286	CMPB (R9)(R11*1), R10
  7287	JNE  match_nolit_end_encodeBetterBlockAsm4MB
  7288	LEAL 1(R11), R11
  7289
  7290match_nolit_end_encodeBetterBlockAsm4MB:
  7291	MOVL CX, DI
  7292	SUBL BX, DI
  7293
  7294	// Check if repeat
  7295	CMPL 16(SP), DI
  7296	JEQ  match_is_repeat_encodeBetterBlockAsm4MB
  7297	CMPL R11, $0x01
  7298	JA   match_length_ok_encodeBetterBlockAsm4MB
  7299	CMPL DI, $0x0000ffff
  7300	JBE  match_length_ok_encodeBetterBlockAsm4MB
  7301	MOVL 20(SP), CX
  7302	INCL CX
  7303	JMP  search_loop_encodeBetterBlockAsm4MB
  7304
  7305match_length_ok_encodeBetterBlockAsm4MB:
  7306	MOVL DI, 16(SP)
  7307	MOVL 12(SP), BX
  7308	CMPL BX, SI
  7309	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
  7310	MOVL SI, R8
  7311	MOVL SI, 12(SP)
  7312	LEAQ (DX)(BX*1), R9
  7313	SUBL BX, R8
  7314	LEAL -1(R8), BX
  7315	CMPL BX, $0x3c
  7316	JB   one_byte_match_emit_encodeBetterBlockAsm4MB
  7317	CMPL BX, $0x00000100
  7318	JB   two_bytes_match_emit_encodeBetterBlockAsm4MB
  7319	CMPL BX, $0x00010000
  7320	JB   three_bytes_match_emit_encodeBetterBlockAsm4MB
  7321	MOVL BX, R10
  7322	SHRL $0x10, R10
  7323	MOVB $0xf8, (AX)
  7324	MOVW BX, 1(AX)
  7325	MOVB R10, 3(AX)
  7326	ADDQ $0x04, AX
  7327	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
  7328
  7329three_bytes_match_emit_encodeBetterBlockAsm4MB:
  7330	MOVB $0xf4, (AX)
  7331	MOVW BX, 1(AX)
  7332	ADDQ $0x03, AX
  7333	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
  7334
  7335two_bytes_match_emit_encodeBetterBlockAsm4MB:
  7336	MOVB $0xf0, (AX)
  7337	MOVB BL, 1(AX)
  7338	ADDQ $0x02, AX
  7339	CMPL BX, $0x40
  7340	JB   memmove_match_emit_encodeBetterBlockAsm4MB
  7341	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
  7342
  7343one_byte_match_emit_encodeBetterBlockAsm4MB:
  7344	SHLB $0x02, BL
  7345	MOVB BL, (AX)
  7346	ADDQ $0x01, AX
  7347
  7348memmove_match_emit_encodeBetterBlockAsm4MB:
  7349	LEAQ (AX)(R8*1), BX
  7350
  7351	// genMemMoveShort
  7352	CMPQ R8, $0x04
  7353	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
  7354	CMPQ R8, $0x08
  7355	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
  7356	CMPQ R8, $0x10
  7357	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
  7358	CMPQ R8, $0x20
  7359	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
  7360	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
  7361
  7362emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
  7363	MOVL (R9), R10
  7364	MOVL R10, (AX)
  7365	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  7366
  7367emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
  7368	MOVL (R9), R10
  7369	MOVL -4(R9)(R8*1), R9
  7370	MOVL R10, (AX)
  7371	MOVL R9, -4(AX)(R8*1)
  7372	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  7373
  7374emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
  7375	MOVQ (R9), R10
  7376	MOVQ -8(R9)(R8*1), R9
  7377	MOVQ R10, (AX)
  7378	MOVQ R9, -8(AX)(R8*1)
  7379	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  7380
  7381emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
  7382	MOVOU (R9), X0
  7383	MOVOU -16(R9)(R8*1), X1
  7384	MOVOU X0, (AX)
  7385	MOVOU X1, -16(AX)(R8*1)
  7386	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  7387
  7388emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
  7389	MOVOU (R9), X0
  7390	MOVOU 16(R9), X1
  7391	MOVOU -32(R9)(R8*1), X2
  7392	MOVOU -16(R9)(R8*1), X3
  7393	MOVOU X0, (AX)
  7394	MOVOU X1, 16(AX)
  7395	MOVOU X2, -32(AX)(R8*1)
  7396	MOVOU X3, -16(AX)(R8*1)
  7397
  7398memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
  7399	MOVQ BX, AX
  7400	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
  7401
  7402memmove_long_match_emit_encodeBetterBlockAsm4MB:
  7403	LEAQ (AX)(R8*1), BX
  7404
  7405	// genMemMoveLong
  7406	MOVOU (R9), X0
  7407	MOVOU 16(R9), X1
  7408	MOVOU -32(R9)(R8*1), X2
  7409	MOVOU -16(R9)(R8*1), X3
  7410	MOVQ  R8, R12
  7411	SHRQ  $0x05, R12
  7412	MOVQ  AX, R10
  7413	ANDL  $0x0000001f, R10
  7414	MOVQ  $0x00000040, R13
  7415	SUBQ  R10, R13
  7416	DECQ  R12
  7417	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  7418	LEAQ  -32(R9)(R13*1), R10
  7419	LEAQ  -32(AX)(R13*1), R14
  7420
  7421emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
  7422	MOVOU (R10), X4
  7423	MOVOU 16(R10), X5
  7424	MOVOA X4, (R14)
  7425	MOVOA X5, 16(R14)
  7426	ADDQ  $0x20, R14
  7427	ADDQ  $0x20, R10
  7428	ADDQ  $0x20, R13
  7429	DECQ  R12
  7430	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
  7431
  7432emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
  7433	MOVOU -32(R9)(R13*1), X4
  7434	MOVOU -16(R9)(R13*1), X5
  7435	MOVOA X4, -32(AX)(R13*1)
  7436	MOVOA X5, -16(AX)(R13*1)
  7437	ADDQ  $0x20, R13
  7438	CMPQ  R8, R13
  7439	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  7440	MOVOU X0, (AX)
  7441	MOVOU X1, 16(AX)
  7442	MOVOU X2, -32(AX)(R8*1)
  7443	MOVOU X3, -16(AX)(R8*1)
  7444	MOVQ  BX, AX
  7445
  7446emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
  7447	ADDL R11, CX
  7448	ADDL $0x04, R11
  7449	MOVL CX, 12(SP)
  7450
  7451	// emitCopy
  7452	CMPL DI, $0x00010000
  7453	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
  7454	CMPL R11, $0x40
  7455	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
  7456	MOVB $0xff, (AX)
  7457	MOVL DI, 1(AX)
  7458	LEAL -64(R11), R11
  7459	ADDQ $0x05, AX
  7460	CMPL R11, $0x04
  7461	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
  7462
  7463	// emitRepeat
  7464	MOVL R11, BX
  7465	LEAL -4(R11), R11
  7466	CMPL BX, $0x08
  7467	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  7468	CMPL BX, $0x0c
  7469	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  7470	CMPL DI, $0x00000800
  7471	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  7472
  7473cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  7474	CMPL R11, $0x00000104
  7475	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  7476	CMPL R11, $0x00010100
  7477	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  7478	LEAL -65536(R11), R11
  7479	MOVL R11, DI
  7480	MOVW $0x001d, (AX)
  7481	MOVW R11, 2(AX)
  7482	SARL $0x10, DI
  7483	MOVB DI, 4(AX)
  7484	ADDQ $0x05, AX
  7485	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7486
  7487repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  7488	LEAL -256(R11), R11
  7489	MOVW $0x0019, (AX)
  7490	MOVW R11, 2(AX)
  7491	ADDQ $0x04, AX
  7492	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7493
  7494repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  7495	LEAL -4(R11), R11
  7496	MOVW $0x0015, (AX)
  7497	MOVB R11, 2(AX)
  7498	ADDQ $0x03, AX
  7499	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7500
  7501repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  7502	SHLL $0x02, R11
  7503	ORL  $0x01, R11
  7504	MOVW R11, (AX)
  7505	ADDQ $0x02, AX
  7506	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7507
  7508repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  7509	XORQ BX, BX
  7510	LEAL 1(BX)(R11*4), R11
  7511	MOVB DI, 1(AX)
  7512	SARL $0x08, DI
  7513	SHLL $0x05, DI
  7514	ORL  DI, R11
  7515	MOVB R11, (AX)
  7516	ADDQ $0x02, AX
  7517	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7518
  7519four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
  7520	TESTL R11, R11
  7521	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7522	XORL  BX, BX
  7523	LEAL  -1(BX)(R11*4), R11
  7524	MOVB  R11, (AX)
  7525	MOVL  DI, 1(AX)
  7526	ADDQ  $0x05, AX
  7527	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7528
  7529two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
  7530	CMPL R11, $0x40
  7531	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
  7532	CMPL DI, $0x00000800
  7533	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm4MB
  7534	MOVL $0x00000001, BX
  7535	LEAL 16(BX), BX
  7536	MOVB DI, 1(AX)
  7537	SHRL $0x08, DI
  7538	SHLL $0x05, DI
  7539	ORL  DI, BX
  7540	MOVB BL, (AX)
  7541	ADDQ $0x02, AX
  7542	SUBL $0x08, R11
  7543
  7544	// emitRepeat
  7545	LEAL -4(R11), R11
  7546	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  7547	MOVL R11, BX
  7548	LEAL -4(R11), R11
  7549	CMPL BX, $0x08
  7550	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  7551	CMPL BX, $0x0c
  7552	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  7553	CMPL DI, $0x00000800
  7554	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  7555
  7556cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  7557	CMPL R11, $0x00000104
  7558	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  7559	CMPL R11, $0x00010100
  7560	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  7561	LEAL -65536(R11), R11
  7562	MOVL R11, DI
  7563	MOVW $0x001d, (AX)
  7564	MOVW R11, 2(AX)
  7565	SARL $0x10, DI
  7566	MOVB DI, 4(AX)
  7567	ADDQ $0x05, AX
  7568	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7569
  7570repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  7571	LEAL -256(R11), R11
  7572	MOVW $0x0019, (AX)
  7573	MOVW R11, 2(AX)
  7574	ADDQ $0x04, AX
  7575	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7576
  7577repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  7578	LEAL -4(R11), R11
  7579	MOVW $0x0015, (AX)
  7580	MOVB R11, 2(AX)
  7581	ADDQ $0x03, AX
  7582	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7583
  7584repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  7585	SHLL $0x02, R11
  7586	ORL  $0x01, R11
  7587	MOVW R11, (AX)
  7588	ADDQ $0x02, AX
  7589	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7590
  7591repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  7592	XORQ BX, BX
  7593	LEAL 1(BX)(R11*4), R11
  7594	MOVB DI, 1(AX)
  7595	SARL $0x08, DI
  7596	SHLL $0x05, DI
  7597	ORL  DI, R11
  7598	MOVB R11, (AX)
  7599	ADDQ $0x02, AX
  7600	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7601
  7602long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
  7603	MOVB $0xee, (AX)
  7604	MOVW DI, 1(AX)
  7605	LEAL -60(R11), R11
  7606	ADDQ $0x03, AX
  7607
  7608	// emitRepeat
  7609	MOVL R11, BX
  7610	LEAL -4(R11), R11
  7611	CMPL BX, $0x08
  7612	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  7613	CMPL BX, $0x0c
  7614	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  7615	CMPL DI, $0x00000800
  7616	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  7617
  7618cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  7619	CMPL R11, $0x00000104
  7620	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  7621	CMPL R11, $0x00010100
  7622	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  7623	LEAL -65536(R11), R11
  7624	MOVL R11, DI
  7625	MOVW $0x001d, (AX)
  7626	MOVW R11, 2(AX)
  7627	SARL $0x10, DI
  7628	MOVB DI, 4(AX)
  7629	ADDQ $0x05, AX
  7630	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7631
  7632repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  7633	LEAL -256(R11), R11
  7634	MOVW $0x0019, (AX)
  7635	MOVW R11, 2(AX)
  7636	ADDQ $0x04, AX
  7637	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7638
  7639repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  7640	LEAL -4(R11), R11
  7641	MOVW $0x0015, (AX)
  7642	MOVB R11, 2(AX)
  7643	ADDQ $0x03, AX
  7644	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7645
  7646repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  7647	SHLL $0x02, R11
  7648	ORL  $0x01, R11
  7649	MOVW R11, (AX)
  7650	ADDQ $0x02, AX
  7651	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7652
  7653repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  7654	XORQ BX, BX
  7655	LEAL 1(BX)(R11*4), R11
  7656	MOVB DI, 1(AX)
  7657	SARL $0x08, DI
  7658	SHLL $0x05, DI
  7659	ORL  DI, R11
  7660	MOVB R11, (AX)
  7661	ADDQ $0x02, AX
  7662	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7663
  7664two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
  7665	MOVL R11, BX
  7666	SHLL $0x02, BX
  7667	CMPL R11, $0x0c
  7668	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
  7669	CMPL DI, $0x00000800
  7670	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
  7671	LEAL -15(BX), BX
  7672	MOVB DI, 1(AX)
  7673	SHRL $0x08, DI
  7674	SHLL $0x05, DI
  7675	ORL  DI, BX
  7676	MOVB BL, (AX)
  7677	ADDQ $0x02, AX
  7678	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7679
  7680emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
  7681	LEAL -2(BX), BX
  7682	MOVB BL, (AX)
  7683	MOVW DI, 1(AX)
  7684	ADDQ $0x03, AX
  7685	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7686
  7687match_is_repeat_encodeBetterBlockAsm4MB:
  7688	MOVL 12(SP), BX
  7689	CMPL BX, SI
  7690	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
  7691	MOVL SI, R8
  7692	MOVL SI, 12(SP)
  7693	LEAQ (DX)(BX*1), R9
  7694	SUBL BX, R8
  7695	LEAL -1(R8), BX
  7696	CMPL BX, $0x3c
  7697	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
  7698	CMPL BX, $0x00000100
  7699	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
  7700	CMPL BX, $0x00010000
  7701	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
  7702	MOVL BX, R10
  7703	SHRL $0x10, R10
  7704	MOVB $0xf8, (AX)
  7705	MOVW BX, 1(AX)
  7706	MOVB R10, 3(AX)
  7707	ADDQ $0x04, AX
  7708	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
  7709
  7710three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
  7711	MOVB $0xf4, (AX)
  7712	MOVW BX, 1(AX)
  7713	ADDQ $0x03, AX
  7714	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
  7715
  7716two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
  7717	MOVB $0xf0, (AX)
  7718	MOVB BL, 1(AX)
  7719	ADDQ $0x02, AX
  7720	CMPL BX, $0x40
  7721	JB   memmove_match_emit_repeat_encodeBetterBlockAsm4MB
  7722	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
  7723
  7724one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
  7725	SHLB $0x02, BL
  7726	MOVB BL, (AX)
  7727	ADDQ $0x01, AX
  7728
  7729memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
  7730	LEAQ (AX)(R8*1), BX
  7731
  7732	// genMemMoveShort
  7733	CMPQ R8, $0x04
  7734	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
  7735	CMPQ R8, $0x08
  7736	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
  7737	CMPQ R8, $0x10
  7738	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
  7739	CMPQ R8, $0x20
  7740	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
  7741	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
  7742
  7743emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
  7744	MOVL (R9), R10
  7745	MOVL R10, (AX)
  7746	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  7747
  7748emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
  7749	MOVL (R9), R10
  7750	MOVL -4(R9)(R8*1), R9
  7751	MOVL R10, (AX)
  7752	MOVL R9, -4(AX)(R8*1)
  7753	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  7754
  7755emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
  7756	MOVQ (R9), R10
  7757	MOVQ -8(R9)(R8*1), R9
  7758	MOVQ R10, (AX)
  7759	MOVQ R9, -8(AX)(R8*1)
  7760	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  7761
  7762emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
  7763	MOVOU (R9), X0
  7764	MOVOU -16(R9)(R8*1), X1
  7765	MOVOU X0, (AX)
  7766	MOVOU X1, -16(AX)(R8*1)
  7767	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  7768
  7769emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
  7770	MOVOU (R9), X0
  7771	MOVOU 16(R9), X1
  7772	MOVOU -32(R9)(R8*1), X2
  7773	MOVOU -16(R9)(R8*1), X3
  7774	MOVOU X0, (AX)
  7775	MOVOU X1, 16(AX)
  7776	MOVOU X2, -32(AX)(R8*1)
  7777	MOVOU X3, -16(AX)(R8*1)
  7778
  7779memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
  7780	MOVQ BX, AX
  7781	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
  7782
  7783memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
  7784	LEAQ (AX)(R8*1), BX
  7785
  7786	// genMemMoveLong
  7787	MOVOU (R9), X0
  7788	MOVOU 16(R9), X1
  7789	MOVOU -32(R9)(R8*1), X2
  7790	MOVOU -16(R9)(R8*1), X3
  7791	MOVQ  R8, R12
  7792	SHRQ  $0x05, R12
  7793	MOVQ  AX, R10
  7794	ANDL  $0x0000001f, R10
  7795	MOVQ  $0x00000040, R13
  7796	SUBQ  R10, R13
  7797	DECQ  R12
  7798	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  7799	LEAQ  -32(R9)(R13*1), R10
  7800	LEAQ  -32(AX)(R13*1), R14
  7801
  7802emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
  7803	MOVOU (R10), X4
  7804	MOVOU 16(R10), X5
  7805	MOVOA X4, (R14)
  7806	MOVOA X5, 16(R14)
  7807	ADDQ  $0x20, R14
  7808	ADDQ  $0x20, R10
  7809	ADDQ  $0x20, R13
  7810	DECQ  R12
  7811	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
  7812
  7813emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
  7814	MOVOU -32(R9)(R13*1), X4
  7815	MOVOU -16(R9)(R13*1), X5
  7816	MOVOA X4, -32(AX)(R13*1)
  7817	MOVOA X5, -16(AX)(R13*1)
  7818	ADDQ  $0x20, R13
  7819	CMPQ  R8, R13
  7820	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  7821	MOVOU X0, (AX)
  7822	MOVOU X1, 16(AX)
  7823	MOVOU X2, -32(AX)(R8*1)
  7824	MOVOU X3, -16(AX)(R8*1)
  7825	MOVQ  BX, AX
  7826
  7827emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
  7828	ADDL R11, CX
  7829	ADDL $0x04, R11
  7830	MOVL CX, 12(SP)
  7831
  7832	// emitRepeat
  7833	MOVL R11, BX
  7834	LEAL -4(R11), R11
  7835	CMPL BX, $0x08
  7836	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
  7837	CMPL BX, $0x0c
  7838	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
  7839	CMPL DI, $0x00000800
  7840	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
  7841
  7842cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
  7843	CMPL R11, $0x00000104
  7844	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
  7845	CMPL R11, $0x00010100
  7846	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
  7847	LEAL -65536(R11), R11
  7848	MOVL R11, DI
  7849	MOVW $0x001d, (AX)
  7850	MOVW R11, 2(AX)
  7851	SARL $0x10, DI
  7852	MOVB DI, 4(AX)
  7853	ADDQ $0x05, AX
  7854	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7855
  7856repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
  7857	LEAL -256(R11), R11
  7858	MOVW $0x0019, (AX)
  7859	MOVW R11, 2(AX)
  7860	ADDQ $0x04, AX
  7861	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7862
  7863repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
  7864	LEAL -4(R11), R11
  7865	MOVW $0x0015, (AX)
  7866	MOVB R11, 2(AX)
  7867	ADDQ $0x03, AX
  7868	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7869
  7870repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
  7871	SHLL $0x02, R11
  7872	ORL  $0x01, R11
  7873	MOVW R11, (AX)
  7874	ADDQ $0x02, AX
  7875	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7876
  7877repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
  7878	XORQ BX, BX
  7879	LEAL 1(BX)(R11*4), R11
  7880	MOVB DI, 1(AX)
  7881	SARL $0x08, DI
  7882	SHLL $0x05, DI
  7883	ORL  DI, R11
  7884	MOVB R11, (AX)
  7885	ADDQ $0x02, AX
  7886
  7887match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
  7888	CMPL CX, 8(SP)
  7889	JAE  emit_remainder_encodeBetterBlockAsm4MB
  7890	CMPQ AX, (SP)
  7891	JB   match_nolit_dst_ok_encodeBetterBlockAsm4MB
  7892	MOVQ $0x00000000, ret+48(FP)
  7893	RET
  7894
  7895match_nolit_dst_ok_encodeBetterBlockAsm4MB:
  7896	MOVQ  $0x00cf1bbcdcbfa563, BX
  7897	MOVQ  $0x9e3779b1, DI
  7898	LEAQ  1(SI), SI
  7899	LEAQ  -2(CX), R8
  7900	MOVQ  (DX)(SI*1), R9
  7901	MOVQ  1(DX)(SI*1), R10
  7902	MOVQ  (DX)(R8*1), R11
  7903	MOVQ  1(DX)(R8*1), R12
  7904	SHLQ  $0x08, R9
  7905	IMULQ BX, R9
  7906	SHRQ  $0x2f, R9
  7907	SHLQ  $0x20, R10
  7908	IMULQ DI, R10
  7909	SHRQ  $0x32, R10
  7910	SHLQ  $0x08, R11
  7911	IMULQ BX, R11
  7912	SHRQ  $0x2f, R11
  7913	SHLQ  $0x20, R12
  7914	IMULQ DI, R12
  7915	SHRQ  $0x32, R12
  7916	LEAQ  1(SI), DI
  7917	LEAQ  1(R8), R13
  7918	MOVL  SI, 24(SP)(R9*4)
  7919	MOVL  R8, 24(SP)(R11*4)
  7920	MOVL  DI, 524312(SP)(R10*4)
  7921	MOVL  R13, 524312(SP)(R12*4)
  7922	LEAQ  1(R8)(SI*1), DI
  7923	SHRQ  $0x01, DI
  7924	ADDQ  $0x01, SI
  7925	SUBQ  $0x01, R8
  7926
  7927index_loop_encodeBetterBlockAsm4MB:
  7928	CMPQ  DI, R8
  7929	JAE   search_loop_encodeBetterBlockAsm4MB
  7930	MOVQ  (DX)(SI*1), R9
  7931	MOVQ  (DX)(DI*1), R10
  7932	SHLQ  $0x08, R9
  7933	IMULQ BX, R9
  7934	SHRQ  $0x2f, R9
  7935	SHLQ  $0x08, R10
  7936	IMULQ BX, R10
  7937	SHRQ  $0x2f, R10
  7938	MOVL  SI, 24(SP)(R9*4)
  7939	MOVL  DI, 24(SP)(R10*4)
  7940	ADDQ  $0x02, SI
  7941	ADDQ  $0x02, DI
  7942	JMP   index_loop_encodeBetterBlockAsm4MB
  7943
  7944emit_remainder_encodeBetterBlockAsm4MB:
  7945	MOVQ src_len+32(FP), CX
  7946	SUBL 12(SP), CX
  7947	LEAQ 4(AX)(CX*1), CX
  7948	CMPQ CX, (SP)
  7949	JB   emit_remainder_ok_encodeBetterBlockAsm4MB
  7950	MOVQ $0x00000000, ret+48(FP)
  7951	RET
  7952
  7953emit_remainder_ok_encodeBetterBlockAsm4MB:
  7954	MOVQ src_len+32(FP), CX
  7955	MOVL 12(SP), BX
  7956	CMPL BX, CX
  7957	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
  7958	MOVL CX, SI
  7959	MOVL CX, 12(SP)
  7960	LEAQ (DX)(BX*1), CX
  7961	SUBL BX, SI
  7962	LEAL -1(SI), DX
  7963	CMPL DX, $0x3c
  7964	JB   one_byte_emit_remainder_encodeBetterBlockAsm4MB
  7965	CMPL DX, $0x00000100
  7966	JB   two_bytes_emit_remainder_encodeBetterBlockAsm4MB
  7967	CMPL DX, $0x00010000
  7968	JB   three_bytes_emit_remainder_encodeBetterBlockAsm4MB
  7969	MOVL DX, BX
  7970	SHRL $0x10, BX
  7971	MOVB $0xf8, (AX)
  7972	MOVW DX, 1(AX)
  7973	MOVB BL, 3(AX)
  7974	ADDQ $0x04, AX
  7975	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
  7976
  7977three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
  7978	MOVB $0xf4, (AX)
  7979	MOVW DX, 1(AX)
  7980	ADDQ $0x03, AX
  7981	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
  7982
  7983two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
  7984	MOVB $0xf0, (AX)
  7985	MOVB DL, 1(AX)
  7986	ADDQ $0x02, AX
  7987	CMPL DX, $0x40
  7988	JB   memmove_emit_remainder_encodeBetterBlockAsm4MB
  7989	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
  7990
  7991one_byte_emit_remainder_encodeBetterBlockAsm4MB:
  7992	SHLB $0x02, DL
  7993	MOVB DL, (AX)
  7994	ADDQ $0x01, AX
  7995
  7996memmove_emit_remainder_encodeBetterBlockAsm4MB:
  7997	LEAQ (AX)(SI*1), DX
  7998	MOVL SI, BX
  7999
  8000	// genMemMoveShort
  8001	CMPQ BX, $0x03
  8002	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2
  8003	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3
  8004	CMPQ BX, $0x08
  8005	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
  8006	CMPQ BX, $0x10
  8007	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
  8008	CMPQ BX, $0x20
  8009	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
  8010	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
  8011
  8012emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
  8013	MOVB (CX), SI
  8014	MOVB -1(CX)(BX*1), CL
  8015	MOVB SI, (AX)
  8016	MOVB CL, -1(AX)(BX*1)
  8017	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  8018
  8019emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
  8020	MOVW (CX), SI
  8021	MOVB 2(CX), CL
  8022	MOVW SI, (AX)
  8023	MOVB CL, 2(AX)
  8024	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  8025
  8026emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
  8027	MOVL (CX), SI
  8028	MOVL -4(CX)(BX*1), CX
  8029	MOVL SI, (AX)
  8030	MOVL CX, -4(AX)(BX*1)
  8031	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  8032
  8033emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
  8034	MOVQ (CX), SI
  8035	MOVQ -8(CX)(BX*1), CX
  8036	MOVQ SI, (AX)
  8037	MOVQ CX, -8(AX)(BX*1)
  8038	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  8039
  8040emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
  8041	MOVOU (CX), X0
  8042	MOVOU -16(CX)(BX*1), X1
  8043	MOVOU X0, (AX)
  8044	MOVOU X1, -16(AX)(BX*1)
  8045	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  8046
  8047emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
  8048	MOVOU (CX), X0
  8049	MOVOU 16(CX), X1
  8050	MOVOU -32(CX)(BX*1), X2
  8051	MOVOU -16(CX)(BX*1), X3
  8052	MOVOU X0, (AX)
  8053	MOVOU X1, 16(AX)
  8054	MOVOU X2, -32(AX)(BX*1)
  8055	MOVOU X3, -16(AX)(BX*1)
  8056
  8057memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
  8058	MOVQ DX, AX
  8059	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
  8060
  8061memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
  8062	LEAQ (AX)(SI*1), DX
  8063	MOVL SI, BX
  8064
  8065	// genMemMoveLong
  8066	MOVOU (CX), X0
  8067	MOVOU 16(CX), X1
  8068	MOVOU -32(CX)(BX*1), X2
  8069	MOVOU -16(CX)(BX*1), X3
  8070	MOVQ  BX, DI
  8071	SHRQ  $0x05, DI
  8072	MOVQ  AX, SI
  8073	ANDL  $0x0000001f, SI
  8074	MOVQ  $0x00000040, R8
  8075	SUBQ  SI, R8
  8076	DECQ  DI
  8077	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  8078	LEAQ  -32(CX)(R8*1), SI
  8079	LEAQ  -32(AX)(R8*1), R9
  8080
  8081emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
  8082	MOVOU (SI), X4
  8083	MOVOU 16(SI), X5
  8084	MOVOA X4, (R9)
  8085	MOVOA X5, 16(R9)
  8086	ADDQ  $0x20, R9
  8087	ADDQ  $0x20, SI
  8088	ADDQ  $0x20, R8
  8089	DECQ  DI
  8090	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
  8091
  8092emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
  8093	MOVOU -32(CX)(R8*1), X4
  8094	MOVOU -16(CX)(R8*1), X5
  8095	MOVOA X4, -32(AX)(R8*1)
  8096	MOVOA X5, -16(AX)(R8*1)
  8097	ADDQ  $0x20, R8
  8098	CMPQ  BX, R8
  8099	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  8100	MOVOU X0, (AX)
  8101	MOVOU X1, 16(AX)
  8102	MOVOU X2, -32(AX)(BX*1)
  8103	MOVOU X3, -16(AX)(BX*1)
  8104	MOVQ  DX, AX
  8105
  8106emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
  8107	MOVQ dst_base+0(FP), CX
  8108	SUBQ CX, AX
  8109	MOVQ AX, ret+48(FP)
  8110	RET
  8111
  8112// func encodeBetterBlockAsm12B(dst []byte, src []byte) int
  8113// Requires: BMI, SSE2
  8114TEXT ·encodeBetterBlockAsm12B(SB), $81944-56
  8115	MOVQ dst_base+0(FP), AX
  8116	MOVQ $0x00000280, CX
  8117	LEAQ 24(SP), DX
  8118	PXOR X0, X0
  8119
  8120zero_loop_encodeBetterBlockAsm12B:
  8121	MOVOU X0, (DX)
  8122	MOVOU X0, 16(DX)
  8123	MOVOU X0, 32(DX)
  8124	MOVOU X0, 48(DX)
  8125	MOVOU X0, 64(DX)
  8126	MOVOU X0, 80(DX)
  8127	MOVOU X0, 96(DX)
  8128	MOVOU X0, 112(DX)
  8129	ADDQ  $0x80, DX
  8130	DECQ  CX
  8131	JNZ   zero_loop_encodeBetterBlockAsm12B
  8132	MOVL  $0x00000000, 12(SP)
  8133	MOVQ  src_len+32(FP), CX
  8134	LEAQ  -6(CX), DX
  8135	LEAQ  -8(CX), BX
  8136	MOVL  BX, 8(SP)
  8137	SHRQ  $0x05, CX
  8138	SUBL  CX, DX
  8139	LEAQ  (AX)(DX*1), DX
  8140	MOVQ  DX, (SP)
  8141	MOVL  $0x00000001, CX
  8142	MOVL  $0x00000000, 16(SP)
  8143	MOVQ  src_base+24(FP), DX
  8144
  8145search_loop_encodeBetterBlockAsm12B:
  8146	MOVL  CX, BX
  8147	SUBL  12(SP), BX
  8148	SHRL  $0x06, BX
  8149	LEAL  1(CX)(BX*1), BX
  8150	CMPL  BX, 8(SP)
  8151	JAE   emit_remainder_encodeBetterBlockAsm12B
  8152	MOVQ  (DX)(CX*1), SI
  8153	MOVL  BX, 20(SP)
  8154	MOVQ  $0x0000cf1bbcdcbf9b, R8
  8155	MOVQ  $0x9e3779b1, BX
  8156	MOVQ  SI, R9
  8157	MOVQ  SI, R10
  8158	SHLQ  $0x10, R9
  8159	IMULQ R8, R9
  8160	SHRQ  $0x32, R9
  8161	SHLQ  $0x20, R10
  8162	IMULQ BX, R10
  8163	SHRQ  $0x34, R10
  8164	MOVL  24(SP)(R9*4), BX
  8165	MOVL  65560(SP)(R10*4), DI
  8166	MOVL  CX, 24(SP)(R9*4)
  8167	MOVL  CX, 65560(SP)(R10*4)
  8168	MOVQ  (DX)(BX*1), R9
  8169	MOVQ  (DX)(DI*1), R10
  8170	CMPQ  R9, SI
  8171	JEQ   candidate_match_encodeBetterBlockAsm12B
  8172	CMPQ  R10, SI
  8173	JNE   no_short_found_encodeBetterBlockAsm12B
  8174	MOVL  DI, BX
  8175	JMP   candidate_match_encodeBetterBlockAsm12B
  8176
  8177no_short_found_encodeBetterBlockAsm12B:
  8178	CMPL R9, SI
  8179	JEQ  candidate_match_encodeBetterBlockAsm12B
  8180	CMPL R10, SI
  8181	JEQ  candidateS_match_encodeBetterBlockAsm12B
  8182	MOVL 20(SP), CX
  8183	JMP  search_loop_encodeBetterBlockAsm12B
  8184
  8185candidateS_match_encodeBetterBlockAsm12B:
  8186	SHRQ  $0x08, SI
  8187	MOVQ  SI, R9
  8188	SHLQ  $0x10, R9
  8189	IMULQ R8, R9
  8190	SHRQ  $0x32, R9
  8191	MOVL  24(SP)(R9*4), BX
  8192	INCL  CX
  8193	MOVL  CX, 24(SP)(R9*4)
  8194	CMPL  (DX)(BX*1), SI
  8195	JEQ   candidate_match_encodeBetterBlockAsm12B
  8196	DECL  CX
  8197	MOVL  DI, BX
  8198
  8199candidate_match_encodeBetterBlockAsm12B:
  8200	MOVL  12(SP), SI
  8201	TESTL BX, BX
  8202	JZ    match_extend_back_end_encodeBetterBlockAsm12B
  8203
  8204match_extend_back_loop_encodeBetterBlockAsm12B:
  8205	CMPL CX, SI
  8206	JBE  match_extend_back_end_encodeBetterBlockAsm12B
  8207	MOVB -1(DX)(BX*1), DI
  8208	MOVB -1(DX)(CX*1), R8
  8209	CMPB DI, R8
  8210	JNE  match_extend_back_end_encodeBetterBlockAsm12B
  8211	LEAL -1(CX), CX
  8212	DECL BX
  8213	JZ   match_extend_back_end_encodeBetterBlockAsm12B
  8214	JMP  match_extend_back_loop_encodeBetterBlockAsm12B
  8215
  8216match_extend_back_end_encodeBetterBlockAsm12B:
  8217	MOVL CX, SI
  8218	SUBL 12(SP), SI
  8219	LEAQ 3(AX)(SI*1), SI
  8220	CMPQ SI, (SP)
  8221	JB   match_dst_size_check_encodeBetterBlockAsm12B
  8222	MOVQ $0x00000000, ret+48(FP)
  8223	RET
  8224
  8225match_dst_size_check_encodeBetterBlockAsm12B:
  8226	MOVL CX, SI
  8227	ADDL $0x04, CX
  8228	ADDL $0x04, BX
  8229	MOVQ src_len+32(FP), DI
  8230	SUBL CX, DI
  8231	LEAQ (DX)(CX*1), R8
  8232	LEAQ (DX)(BX*1), R9
  8233
  8234	// matchLen
  8235	XORL R11, R11
  8236
  8237matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
  8238	CMPL DI, $0x10
  8239	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm12B
  8240	MOVQ (R8)(R11*1), R10
  8241	MOVQ 8(R8)(R11*1), R12
  8242	XORQ (R9)(R11*1), R10
  8243	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
  8244	XORQ 8(R9)(R11*1), R12
  8245	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
  8246	LEAL -16(DI), DI
  8247	LEAL 16(R11), R11
  8248	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
  8249
  8250matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
  8251#ifdef GOAMD64_v3
  8252	TZCNTQ R12, R12
  8253
  8254#else
  8255	BSFQ R12, R12
  8256
  8257#endif
  8258	SARQ $0x03, R12
  8259	LEAL 8(R11)(R12*1), R11
  8260	JMP  match_nolit_end_encodeBetterBlockAsm12B
  8261
  8262matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
  8263	CMPL DI, $0x08
  8264	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm12B
  8265	MOVQ (R8)(R11*1), R10
  8266	XORQ (R9)(R11*1), R10
  8267	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
  8268	LEAL -8(DI), DI
  8269	LEAL 8(R11), R11
  8270	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm12B
  8271
  8272matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
  8273#ifdef GOAMD64_v3
  8274	TZCNTQ R10, R10
  8275
  8276#else
  8277	BSFQ R10, R10
  8278
  8279#endif
  8280	SARQ $0x03, R10
  8281	LEAL (R11)(R10*1), R11
  8282	JMP  match_nolit_end_encodeBetterBlockAsm12B
  8283
  8284matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
  8285	CMPL DI, $0x04
  8286	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm12B
  8287	MOVL (R8)(R11*1), R10
  8288	CMPL (R9)(R11*1), R10
  8289	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm12B
  8290	LEAL -4(DI), DI
  8291	LEAL 4(R11), R11
  8292
  8293matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
  8294	CMPL DI, $0x01
  8295	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm12B
  8296	JB   match_nolit_end_encodeBetterBlockAsm12B
  8297	MOVW (R8)(R11*1), R10
  8298	CMPW (R9)(R11*1), R10
  8299	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm12B
  8300	LEAL 2(R11), R11
  8301	SUBL $0x02, DI
  8302	JZ   match_nolit_end_encodeBetterBlockAsm12B
  8303
  8304matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
  8305	MOVB (R8)(R11*1), R10
  8306	CMPB (R9)(R11*1), R10
  8307	JNE  match_nolit_end_encodeBetterBlockAsm12B
  8308	LEAL 1(R11), R11
  8309
  8310match_nolit_end_encodeBetterBlockAsm12B:
  8311	MOVL CX, DI
  8312	SUBL BX, DI
  8313
  8314	// Check if repeat
  8315	CMPL 16(SP), DI
  8316	JEQ  match_is_repeat_encodeBetterBlockAsm12B
  8317	MOVL DI, 16(SP)
  8318	MOVL 12(SP), BX
  8319	CMPL BX, SI
  8320	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm12B
  8321	MOVL SI, R8
  8322	MOVL SI, 12(SP)
  8323	LEAQ (DX)(BX*1), R9
  8324	SUBL BX, R8
  8325	LEAL -1(R8), BX
  8326	CMPL BX, $0x3c
  8327	JB   one_byte_match_emit_encodeBetterBlockAsm12B
  8328	CMPL BX, $0x00000100
  8329	JB   two_bytes_match_emit_encodeBetterBlockAsm12B
  8330	JB   three_bytes_match_emit_encodeBetterBlockAsm12B
  8331
  8332three_bytes_match_emit_encodeBetterBlockAsm12B:
  8333	MOVB $0xf4, (AX)
  8334	MOVW BX, 1(AX)
  8335	ADDQ $0x03, AX
  8336	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
  8337
  8338two_bytes_match_emit_encodeBetterBlockAsm12B:
  8339	MOVB $0xf0, (AX)
  8340	MOVB BL, 1(AX)
  8341	ADDQ $0x02, AX
  8342	CMPL BX, $0x40
  8343	JB   memmove_match_emit_encodeBetterBlockAsm12B
  8344	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
  8345
  8346one_byte_match_emit_encodeBetterBlockAsm12B:
  8347	SHLB $0x02, BL
  8348	MOVB BL, (AX)
  8349	ADDQ $0x01, AX
  8350
  8351memmove_match_emit_encodeBetterBlockAsm12B:
  8352	LEAQ (AX)(R8*1), BX
  8353
  8354	// genMemMoveShort
  8355	CMPQ R8, $0x04
  8356	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
  8357	CMPQ R8, $0x08
  8358	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
  8359	CMPQ R8, $0x10
  8360	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
  8361	CMPQ R8, $0x20
  8362	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
  8363	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
  8364
  8365emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
  8366	MOVL (R9), R10
  8367	MOVL R10, (AX)
  8368	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  8369
  8370emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
  8371	MOVL (R9), R10
  8372	MOVL -4(R9)(R8*1), R9
  8373	MOVL R10, (AX)
  8374	MOVL R9, -4(AX)(R8*1)
  8375	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  8376
  8377emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
  8378	MOVQ (R9), R10
  8379	MOVQ -8(R9)(R8*1), R9
  8380	MOVQ R10, (AX)
  8381	MOVQ R9, -8(AX)(R8*1)
  8382	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  8383
  8384emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
  8385	MOVOU (R9), X0
  8386	MOVOU -16(R9)(R8*1), X1
  8387	MOVOU X0, (AX)
  8388	MOVOU X1, -16(AX)(R8*1)
  8389	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  8390
  8391emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
  8392	MOVOU (R9), X0
  8393	MOVOU 16(R9), X1
  8394	MOVOU -32(R9)(R8*1), X2
  8395	MOVOU -16(R9)(R8*1), X3
  8396	MOVOU X0, (AX)
  8397	MOVOU X1, 16(AX)
  8398	MOVOU X2, -32(AX)(R8*1)
  8399	MOVOU X3, -16(AX)(R8*1)
  8400
  8401memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
  8402	MOVQ BX, AX
  8403	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm12B
  8404
  8405memmove_long_match_emit_encodeBetterBlockAsm12B:
  8406	LEAQ (AX)(R8*1), BX
  8407
  8408	// genMemMoveLong
  8409	MOVOU (R9), X0
  8410	MOVOU 16(R9), X1
  8411	MOVOU -32(R9)(R8*1), X2
  8412	MOVOU -16(R9)(R8*1), X3
  8413	MOVQ  R8, R12
  8414	SHRQ  $0x05, R12
  8415	MOVQ  AX, R10
  8416	ANDL  $0x0000001f, R10
  8417	MOVQ  $0x00000040, R13
  8418	SUBQ  R10, R13
  8419	DECQ  R12
  8420	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  8421	LEAQ  -32(R9)(R13*1), R10
  8422	LEAQ  -32(AX)(R13*1), R14
  8423
  8424emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
  8425	MOVOU (R10), X4
  8426	MOVOU 16(R10), X5
  8427	MOVOA X4, (R14)
  8428	MOVOA X5, 16(R14)
  8429	ADDQ  $0x20, R14
  8430	ADDQ  $0x20, R10
  8431	ADDQ  $0x20, R13
  8432	DECQ  R12
  8433	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
  8434
  8435emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
  8436	MOVOU -32(R9)(R13*1), X4
  8437	MOVOU -16(R9)(R13*1), X5
  8438	MOVOA X4, -32(AX)(R13*1)
  8439	MOVOA X5, -16(AX)(R13*1)
  8440	ADDQ  $0x20, R13
  8441	CMPQ  R8, R13
  8442	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  8443	MOVOU X0, (AX)
  8444	MOVOU X1, 16(AX)
  8445	MOVOU X2, -32(AX)(R8*1)
  8446	MOVOU X3, -16(AX)(R8*1)
  8447	MOVQ  BX, AX
  8448
  8449emit_literal_done_match_emit_encodeBetterBlockAsm12B:
  8450	ADDL R11, CX
  8451	ADDL $0x04, R11
  8452	MOVL CX, 12(SP)
  8453
  8454	// emitCopy
  8455	CMPL R11, $0x40
  8456	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
  8457	CMPL DI, $0x00000800
  8458	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm12B
  8459	MOVL $0x00000001, BX
  8460	LEAL 16(BX), BX
  8461	MOVB DI, 1(AX)
  8462	SHRL $0x08, DI
  8463	SHLL $0x05, DI
  8464	ORL  DI, BX
  8465	MOVB BL, (AX)
  8466	ADDQ $0x02, AX
  8467	SUBL $0x08, R11
  8468
  8469	// emitRepeat
  8470	LEAL -4(R11), R11
  8471	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  8472	MOVL R11, BX
  8473	LEAL -4(R11), R11
  8474	CMPL BX, $0x08
  8475	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  8476	CMPL BX, $0x0c
  8477	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  8478	CMPL DI, $0x00000800
  8479	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  8480
  8481cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  8482	CMPL R11, $0x00000104
  8483	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  8484	LEAL -256(R11), R11
  8485	MOVW $0x0019, (AX)
  8486	MOVW R11, 2(AX)
  8487	ADDQ $0x04, AX
  8488	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8489
  8490repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  8491	LEAL -4(R11), R11
  8492	MOVW $0x0015, (AX)
  8493	MOVB R11, 2(AX)
  8494	ADDQ $0x03, AX
  8495	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8496
  8497repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  8498	SHLL $0x02, R11
  8499	ORL  $0x01, R11
  8500	MOVW R11, (AX)
  8501	ADDQ $0x02, AX
  8502	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8503
  8504repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  8505	XORQ BX, BX
  8506	LEAL 1(BX)(R11*4), R11
  8507	MOVB DI, 1(AX)
  8508	SARL $0x08, DI
  8509	SHLL $0x05, DI
  8510	ORL  DI, R11
  8511	MOVB R11, (AX)
  8512	ADDQ $0x02, AX
  8513	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8514
  8515long_offset_short_match_nolit_encodeBetterBlockAsm12B:
  8516	MOVB $0xee, (AX)
  8517	MOVW DI, 1(AX)
  8518	LEAL -60(R11), R11
  8519	ADDQ $0x03, AX
  8520
  8521	// emitRepeat
  8522	MOVL R11, BX
  8523	LEAL -4(R11), R11
  8524	CMPL BX, $0x08
  8525	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  8526	CMPL BX, $0x0c
  8527	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  8528	CMPL DI, $0x00000800
  8529	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  8530
  8531cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  8532	CMPL R11, $0x00000104
  8533	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  8534	LEAL -256(R11), R11
  8535	MOVW $0x0019, (AX)
  8536	MOVW R11, 2(AX)
  8537	ADDQ $0x04, AX
  8538	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8539
  8540repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  8541	LEAL -4(R11), R11
  8542	MOVW $0x0015, (AX)
  8543	MOVB R11, 2(AX)
  8544	ADDQ $0x03, AX
  8545	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8546
  8547repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  8548	SHLL $0x02, R11
  8549	ORL  $0x01, R11
  8550	MOVW R11, (AX)
  8551	ADDQ $0x02, AX
  8552	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8553
  8554repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  8555	XORQ BX, BX
  8556	LEAL 1(BX)(R11*4), R11
  8557	MOVB DI, 1(AX)
  8558	SARL $0x08, DI
  8559	SHLL $0x05, DI
  8560	ORL  DI, R11
  8561	MOVB R11, (AX)
  8562	ADDQ $0x02, AX
  8563	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8564
  8565two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
  8566	MOVL R11, BX
  8567	SHLL $0x02, BX
  8568	CMPL R11, $0x0c
  8569	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
  8570	CMPL DI, $0x00000800
  8571	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
  8572	LEAL -15(BX), BX
  8573	MOVB DI, 1(AX)
  8574	SHRL $0x08, DI
  8575	SHLL $0x05, DI
  8576	ORL  DI, BX
  8577	MOVB BL, (AX)
  8578	ADDQ $0x02, AX
  8579	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8580
  8581emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
  8582	LEAL -2(BX), BX
  8583	MOVB BL, (AX)
  8584	MOVW DI, 1(AX)
  8585	ADDQ $0x03, AX
  8586	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8587
  8588match_is_repeat_encodeBetterBlockAsm12B:
  8589	MOVL 12(SP), BX
  8590	CMPL BX, SI
  8591	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
  8592	MOVL SI, R8
  8593	MOVL SI, 12(SP)
  8594	LEAQ (DX)(BX*1), R9
  8595	SUBL BX, R8
  8596	LEAL -1(R8), BX
  8597	CMPL BX, $0x3c
  8598	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm12B
  8599	CMPL BX, $0x00000100
  8600	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
  8601	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
  8602
  8603three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
  8604	MOVB $0xf4, (AX)
  8605	MOVW BX, 1(AX)
  8606	ADDQ $0x03, AX
  8607	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
  8608
  8609two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
  8610	MOVB $0xf0, (AX)
  8611	MOVB BL, 1(AX)
  8612	ADDQ $0x02, AX
  8613	CMPL BX, $0x40
  8614	JB   memmove_match_emit_repeat_encodeBetterBlockAsm12B
  8615	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
  8616
  8617one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
  8618	SHLB $0x02, BL
  8619	MOVB BL, (AX)
  8620	ADDQ $0x01, AX
  8621
  8622memmove_match_emit_repeat_encodeBetterBlockAsm12B:
  8623	LEAQ (AX)(R8*1), BX
  8624
  8625	// genMemMoveShort
  8626	CMPQ R8, $0x04
  8627	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
  8628	CMPQ R8, $0x08
  8629	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
  8630	CMPQ R8, $0x10
  8631	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
  8632	CMPQ R8, $0x20
  8633	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
  8634	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
  8635
  8636emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
  8637	MOVL (R9), R10
  8638	MOVL R10, (AX)
  8639	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  8640
  8641emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
  8642	MOVL (R9), R10
  8643	MOVL -4(R9)(R8*1), R9
  8644	MOVL R10, (AX)
  8645	MOVL R9, -4(AX)(R8*1)
  8646	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  8647
  8648emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
  8649	MOVQ (R9), R10
  8650	MOVQ -8(R9)(R8*1), R9
  8651	MOVQ R10, (AX)
  8652	MOVQ R9, -8(AX)(R8*1)
  8653	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  8654
  8655emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
  8656	MOVOU (R9), X0
  8657	MOVOU -16(R9)(R8*1), X1
  8658	MOVOU X0, (AX)
  8659	MOVOU X1, -16(AX)(R8*1)
  8660	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  8661
  8662emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
  8663	MOVOU (R9), X0
  8664	MOVOU 16(R9), X1
  8665	MOVOU -32(R9)(R8*1), X2
  8666	MOVOU -16(R9)(R8*1), X3
  8667	MOVOU X0, (AX)
  8668	MOVOU X1, 16(AX)
  8669	MOVOU X2, -32(AX)(R8*1)
  8670	MOVOU X3, -16(AX)(R8*1)
  8671
  8672memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
  8673	MOVQ BX, AX
  8674	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
  8675
  8676memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
  8677	LEAQ (AX)(R8*1), BX
  8678
  8679	// genMemMoveLong
  8680	MOVOU (R9), X0
  8681	MOVOU 16(R9), X1
  8682	MOVOU -32(R9)(R8*1), X2
  8683	MOVOU -16(R9)(R8*1), X3
  8684	MOVQ  R8, R12
  8685	SHRQ  $0x05, R12
  8686	MOVQ  AX, R10
  8687	ANDL  $0x0000001f, R10
  8688	MOVQ  $0x00000040, R13
  8689	SUBQ  R10, R13
  8690	DECQ  R12
  8691	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  8692	LEAQ  -32(R9)(R13*1), R10
  8693	LEAQ  -32(AX)(R13*1), R14
  8694
  8695emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
  8696	MOVOU (R10), X4
  8697	MOVOU 16(R10), X5
  8698	MOVOA X4, (R14)
  8699	MOVOA X5, 16(R14)
  8700	ADDQ  $0x20, R14
  8701	ADDQ  $0x20, R10
  8702	ADDQ  $0x20, R13
  8703	DECQ  R12
  8704	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
  8705
  8706emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
  8707	MOVOU -32(R9)(R13*1), X4
  8708	MOVOU -16(R9)(R13*1), X5
  8709	MOVOA X4, -32(AX)(R13*1)
  8710	MOVOA X5, -16(AX)(R13*1)
  8711	ADDQ  $0x20, R13
  8712	CMPQ  R8, R13
  8713	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  8714	MOVOU X0, (AX)
  8715	MOVOU X1, 16(AX)
  8716	MOVOU X2, -32(AX)(R8*1)
  8717	MOVOU X3, -16(AX)(R8*1)
  8718	MOVQ  BX, AX
  8719
  8720emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
  8721	ADDL R11, CX
  8722	ADDL $0x04, R11
  8723	MOVL CX, 12(SP)
  8724
  8725	// emitRepeat
  8726	MOVL R11, BX
  8727	LEAL -4(R11), R11
  8728	CMPL BX, $0x08
  8729	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
  8730	CMPL BX, $0x0c
  8731	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
  8732	CMPL DI, $0x00000800
  8733	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
  8734
  8735cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
  8736	CMPL R11, $0x00000104
  8737	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
  8738	LEAL -256(R11), R11
  8739	MOVW $0x0019, (AX)
  8740	MOVW R11, 2(AX)
  8741	ADDQ $0x04, AX
  8742	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8743
  8744repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
  8745	LEAL -4(R11), R11
  8746	MOVW $0x0015, (AX)
  8747	MOVB R11, 2(AX)
  8748	ADDQ $0x03, AX
  8749	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8750
  8751repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
  8752	SHLL $0x02, R11
  8753	ORL  $0x01, R11
  8754	MOVW R11, (AX)
  8755	ADDQ $0x02, AX
  8756	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  8757
  8758repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
  8759	XORQ BX, BX
  8760	LEAL 1(BX)(R11*4), R11
  8761	MOVB DI, 1(AX)
  8762	SARL $0x08, DI
  8763	SHLL $0x05, DI
  8764	ORL  DI, R11
  8765	MOVB R11, (AX)
  8766	ADDQ $0x02, AX
  8767
  8768match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
  8769	CMPL CX, 8(SP)
  8770	JAE  emit_remainder_encodeBetterBlockAsm12B
  8771	CMPQ AX, (SP)
  8772	JB   match_nolit_dst_ok_encodeBetterBlockAsm12B
  8773	MOVQ $0x00000000, ret+48(FP)
  8774	RET
  8775
  8776match_nolit_dst_ok_encodeBetterBlockAsm12B:
  8777	MOVQ  $0x0000cf1bbcdcbf9b, BX
  8778	MOVQ  $0x9e3779b1, DI
  8779	LEAQ  1(SI), SI
  8780	LEAQ  -2(CX), R8
  8781	MOVQ  (DX)(SI*1), R9
  8782	MOVQ  1(DX)(SI*1), R10
  8783	MOVQ  (DX)(R8*1), R11
  8784	MOVQ  1(DX)(R8*1), R12
  8785	SHLQ  $0x10, R9
  8786	IMULQ BX, R9
  8787	SHRQ  $0x32, R9
  8788	SHLQ  $0x20, R10
  8789	IMULQ DI, R10
  8790	SHRQ  $0x34, R10
  8791	SHLQ  $0x10, R11
  8792	IMULQ BX, R11
  8793	SHRQ  $0x32, R11
  8794	SHLQ  $0x20, R12
  8795	IMULQ DI, R12
  8796	SHRQ  $0x34, R12
  8797	LEAQ  1(SI), DI
  8798	LEAQ  1(R8), R13
  8799	MOVL  SI, 24(SP)(R9*4)
  8800	MOVL  R8, 24(SP)(R11*4)
  8801	MOVL  DI, 65560(SP)(R10*4)
  8802	MOVL  R13, 65560(SP)(R12*4)
  8803	LEAQ  1(R8)(SI*1), DI
  8804	SHRQ  $0x01, DI
  8805	ADDQ  $0x01, SI
  8806	SUBQ  $0x01, R8
  8807
  8808index_loop_encodeBetterBlockAsm12B:
  8809	CMPQ  DI, R8
  8810	JAE   search_loop_encodeBetterBlockAsm12B
  8811	MOVQ  (DX)(SI*1), R9
  8812	MOVQ  (DX)(DI*1), R10
  8813	SHLQ  $0x10, R9
  8814	IMULQ BX, R9
  8815	SHRQ  $0x32, R9
  8816	SHLQ  $0x10, R10
  8817	IMULQ BX, R10
  8818	SHRQ  $0x32, R10
  8819	MOVL  SI, 24(SP)(R9*4)
  8820	MOVL  DI, 24(SP)(R10*4)
  8821	ADDQ  $0x02, SI
  8822	ADDQ  $0x02, DI
  8823	JMP   index_loop_encodeBetterBlockAsm12B
  8824
  8825emit_remainder_encodeBetterBlockAsm12B:
  8826	MOVQ src_len+32(FP), CX
  8827	SUBL 12(SP), CX
  8828	LEAQ 3(AX)(CX*1), CX
  8829	CMPQ CX, (SP)
  8830	JB   emit_remainder_ok_encodeBetterBlockAsm12B
  8831	MOVQ $0x00000000, ret+48(FP)
  8832	RET
  8833
  8834emit_remainder_ok_encodeBetterBlockAsm12B:
  8835	MOVQ src_len+32(FP), CX
  8836	MOVL 12(SP), BX
  8837	CMPL BX, CX
  8838	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
  8839	MOVL CX, SI
  8840	MOVL CX, 12(SP)
  8841	LEAQ (DX)(BX*1), CX
  8842	SUBL BX, SI
  8843	LEAL -1(SI), DX
  8844	CMPL DX, $0x3c
  8845	JB   one_byte_emit_remainder_encodeBetterBlockAsm12B
  8846	CMPL DX, $0x00000100
  8847	JB   two_bytes_emit_remainder_encodeBetterBlockAsm12B
  8848	JB   three_bytes_emit_remainder_encodeBetterBlockAsm12B
  8849
  8850three_bytes_emit_remainder_encodeBetterBlockAsm12B:
  8851	MOVB $0xf4, (AX)
  8852	MOVW DX, 1(AX)
  8853	ADDQ $0x03, AX
  8854	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
  8855
  8856two_bytes_emit_remainder_encodeBetterBlockAsm12B:
  8857	MOVB $0xf0, (AX)
  8858	MOVB DL, 1(AX)
  8859	ADDQ $0x02, AX
  8860	CMPL DX, $0x40
  8861	JB   memmove_emit_remainder_encodeBetterBlockAsm12B
  8862	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
  8863
  8864one_byte_emit_remainder_encodeBetterBlockAsm12B:
  8865	SHLB $0x02, DL
  8866	MOVB DL, (AX)
  8867	ADDQ $0x01, AX
  8868
  8869memmove_emit_remainder_encodeBetterBlockAsm12B:
  8870	LEAQ (AX)(SI*1), DX
  8871	MOVL SI, BX
  8872
  8873	// genMemMoveShort
  8874	CMPQ BX, $0x03
  8875	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2
  8876	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3
  8877	CMPQ BX, $0x08
  8878	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
  8879	CMPQ BX, $0x10
  8880	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
  8881	CMPQ BX, $0x20
  8882	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
  8883	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
  8884
  8885emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
  8886	MOVB (CX), SI
  8887	MOVB -1(CX)(BX*1), CL
  8888	MOVB SI, (AX)
  8889	MOVB CL, -1(AX)(BX*1)
  8890	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  8891
  8892emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
  8893	MOVW (CX), SI
  8894	MOVB 2(CX), CL
  8895	MOVW SI, (AX)
  8896	MOVB CL, 2(AX)
  8897	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  8898
  8899emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
  8900	MOVL (CX), SI
  8901	MOVL -4(CX)(BX*1), CX
  8902	MOVL SI, (AX)
  8903	MOVL CX, -4(AX)(BX*1)
  8904	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  8905
  8906emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
  8907	MOVQ (CX), SI
  8908	MOVQ -8(CX)(BX*1), CX
  8909	MOVQ SI, (AX)
  8910	MOVQ CX, -8(AX)(BX*1)
  8911	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  8912
  8913emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
  8914	MOVOU (CX), X0
  8915	MOVOU -16(CX)(BX*1), X1
  8916	MOVOU X0, (AX)
  8917	MOVOU X1, -16(AX)(BX*1)
  8918	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  8919
  8920emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
  8921	MOVOU (CX), X0
  8922	MOVOU 16(CX), X1
  8923	MOVOU -32(CX)(BX*1), X2
  8924	MOVOU -16(CX)(BX*1), X3
  8925	MOVOU X0, (AX)
  8926	MOVOU X1, 16(AX)
  8927	MOVOU X2, -32(AX)(BX*1)
  8928	MOVOU X3, -16(AX)(BX*1)
  8929
  8930memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
  8931	MOVQ DX, AX
  8932	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
  8933
  8934memmove_long_emit_remainder_encodeBetterBlockAsm12B:
  8935	LEAQ (AX)(SI*1), DX
  8936	MOVL SI, BX
  8937
  8938	// genMemMoveLong
  8939	MOVOU (CX), X0
  8940	MOVOU 16(CX), X1
  8941	MOVOU -32(CX)(BX*1), X2
  8942	MOVOU -16(CX)(BX*1), X3
  8943	MOVQ  BX, DI
  8944	SHRQ  $0x05, DI
  8945	MOVQ  AX, SI
  8946	ANDL  $0x0000001f, SI
  8947	MOVQ  $0x00000040, R8
  8948	SUBQ  SI, R8
  8949	DECQ  DI
  8950	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  8951	LEAQ  -32(CX)(R8*1), SI
  8952	LEAQ  -32(AX)(R8*1), R9
  8953
  8954emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
  8955	MOVOU (SI), X4
  8956	MOVOU 16(SI), X5
  8957	MOVOA X4, (R9)
  8958	MOVOA X5, 16(R9)
  8959	ADDQ  $0x20, R9
  8960	ADDQ  $0x20, SI
  8961	ADDQ  $0x20, R8
  8962	DECQ  DI
  8963	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
  8964
  8965emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
  8966	MOVOU -32(CX)(R8*1), X4
  8967	MOVOU -16(CX)(R8*1), X5
  8968	MOVOA X4, -32(AX)(R8*1)
  8969	MOVOA X5, -16(AX)(R8*1)
  8970	ADDQ  $0x20, R8
  8971	CMPQ  BX, R8
  8972	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  8973	MOVOU X0, (AX)
  8974	MOVOU X1, 16(AX)
  8975	MOVOU X2, -32(AX)(BX*1)
  8976	MOVOU X3, -16(AX)(BX*1)
  8977	MOVQ  DX, AX
  8978
  8979emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
  8980	MOVQ dst_base+0(FP), CX
  8981	SUBQ CX, AX
  8982	MOVQ AX, ret+48(FP)
  8983	RET
  8984
  8985// func encodeBetterBlockAsm10B(dst []byte, src []byte) int
  8986// Requires: BMI, SSE2
  8987TEXT ·encodeBetterBlockAsm10B(SB), $20504-56
  8988	MOVQ dst_base+0(FP), AX
  8989	MOVQ $0x000000a0, CX
  8990	LEAQ 24(SP), DX
  8991	PXOR X0, X0
  8992
  8993zero_loop_encodeBetterBlockAsm10B:
  8994	MOVOU X0, (DX)
  8995	MOVOU X0, 16(DX)
  8996	MOVOU X0, 32(DX)
  8997	MOVOU X0, 48(DX)
  8998	MOVOU X0, 64(DX)
  8999	MOVOU X0, 80(DX)
  9000	MOVOU X0, 96(DX)
  9001	MOVOU X0, 112(DX)
  9002	ADDQ  $0x80, DX
  9003	DECQ  CX
  9004	JNZ   zero_loop_encodeBetterBlockAsm10B
  9005	MOVL  $0x00000000, 12(SP)
  9006	MOVQ  src_len+32(FP), CX
  9007	LEAQ  -6(CX), DX
  9008	LEAQ  -8(CX), BX
  9009	MOVL  BX, 8(SP)
  9010	SHRQ  $0x05, CX
  9011	SUBL  CX, DX
  9012	LEAQ  (AX)(DX*1), DX
  9013	MOVQ  DX, (SP)
  9014	MOVL  $0x00000001, CX
  9015	MOVL  $0x00000000, 16(SP)
  9016	MOVQ  src_base+24(FP), DX
  9017
  9018search_loop_encodeBetterBlockAsm10B:
  9019	MOVL  CX, BX
  9020	SUBL  12(SP), BX
  9021	SHRL  $0x05, BX
  9022	LEAL  1(CX)(BX*1), BX
  9023	CMPL  BX, 8(SP)
  9024	JAE   emit_remainder_encodeBetterBlockAsm10B
  9025	MOVQ  (DX)(CX*1), SI
  9026	MOVL  BX, 20(SP)
  9027	MOVQ  $0x0000cf1bbcdcbf9b, R8
  9028	MOVQ  $0x9e3779b1, BX
  9029	MOVQ  SI, R9
  9030	MOVQ  SI, R10
  9031	SHLQ  $0x10, R9
  9032	IMULQ R8, R9
  9033	SHRQ  $0x34, R9
  9034	SHLQ  $0x20, R10
  9035	IMULQ BX, R10
  9036	SHRQ  $0x36, R10
  9037	MOVL  24(SP)(R9*4), BX
  9038	MOVL  16408(SP)(R10*4), DI
  9039	MOVL  CX, 24(SP)(R9*4)
  9040	MOVL  CX, 16408(SP)(R10*4)
  9041	MOVQ  (DX)(BX*1), R9
  9042	MOVQ  (DX)(DI*1), R10
  9043	CMPQ  R9, SI
  9044	JEQ   candidate_match_encodeBetterBlockAsm10B
  9045	CMPQ  R10, SI
  9046	JNE   no_short_found_encodeBetterBlockAsm10B
  9047	MOVL  DI, BX
  9048	JMP   candidate_match_encodeBetterBlockAsm10B
  9049
  9050no_short_found_encodeBetterBlockAsm10B:
  9051	CMPL R9, SI
  9052	JEQ  candidate_match_encodeBetterBlockAsm10B
  9053	CMPL R10, SI
  9054	JEQ  candidateS_match_encodeBetterBlockAsm10B
  9055	MOVL 20(SP), CX
  9056	JMP  search_loop_encodeBetterBlockAsm10B
  9057
  9058candidateS_match_encodeBetterBlockAsm10B:
  9059	SHRQ  $0x08, SI
  9060	MOVQ  SI, R9
  9061	SHLQ  $0x10, R9
  9062	IMULQ R8, R9
  9063	SHRQ  $0x34, R9
  9064	MOVL  24(SP)(R9*4), BX
  9065	INCL  CX
  9066	MOVL  CX, 24(SP)(R9*4)
  9067	CMPL  (DX)(BX*1), SI
  9068	JEQ   candidate_match_encodeBetterBlockAsm10B
  9069	DECL  CX
  9070	MOVL  DI, BX
  9071
  9072candidate_match_encodeBetterBlockAsm10B:
  9073	MOVL  12(SP), SI
  9074	TESTL BX, BX
  9075	JZ    match_extend_back_end_encodeBetterBlockAsm10B
  9076
  9077match_extend_back_loop_encodeBetterBlockAsm10B:
  9078	CMPL CX, SI
  9079	JBE  match_extend_back_end_encodeBetterBlockAsm10B
  9080	MOVB -1(DX)(BX*1), DI
  9081	MOVB -1(DX)(CX*1), R8
  9082	CMPB DI, R8
  9083	JNE  match_extend_back_end_encodeBetterBlockAsm10B
  9084	LEAL -1(CX), CX
  9085	DECL BX
  9086	JZ   match_extend_back_end_encodeBetterBlockAsm10B
  9087	JMP  match_extend_back_loop_encodeBetterBlockAsm10B
  9088
  9089match_extend_back_end_encodeBetterBlockAsm10B:
  9090	MOVL CX, SI
  9091	SUBL 12(SP), SI
  9092	LEAQ 3(AX)(SI*1), SI
  9093	CMPQ SI, (SP)
  9094	JB   match_dst_size_check_encodeBetterBlockAsm10B
  9095	MOVQ $0x00000000, ret+48(FP)
  9096	RET
  9097
  9098match_dst_size_check_encodeBetterBlockAsm10B:
  9099	MOVL CX, SI
  9100	ADDL $0x04, CX
  9101	ADDL $0x04, BX
  9102	MOVQ src_len+32(FP), DI
  9103	SUBL CX, DI
  9104	LEAQ (DX)(CX*1), R8
  9105	LEAQ (DX)(BX*1), R9
  9106
  9107	// matchLen
  9108	XORL R11, R11
  9109
  9110matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
  9111	CMPL DI, $0x10
  9112	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm10B
  9113	MOVQ (R8)(R11*1), R10
  9114	MOVQ 8(R8)(R11*1), R12
  9115	XORQ (R9)(R11*1), R10
  9116	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
  9117	XORQ 8(R9)(R11*1), R12
  9118	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
  9119	LEAL -16(DI), DI
  9120	LEAL 16(R11), R11
  9121	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
  9122
  9123matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
  9124#ifdef GOAMD64_v3
  9125	TZCNTQ R12, R12
  9126
  9127#else
  9128	BSFQ R12, R12
  9129
  9130#endif
  9131	SARQ $0x03, R12
  9132	LEAL 8(R11)(R12*1), R11
  9133	JMP  match_nolit_end_encodeBetterBlockAsm10B
  9134
  9135matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
  9136	CMPL DI, $0x08
  9137	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm10B
  9138	MOVQ (R8)(R11*1), R10
  9139	XORQ (R9)(R11*1), R10
  9140	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
  9141	LEAL -8(DI), DI
  9142	LEAL 8(R11), R11
  9143	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm10B
  9144
  9145matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
  9146#ifdef GOAMD64_v3
  9147	TZCNTQ R10, R10
  9148
  9149#else
  9150	BSFQ R10, R10
  9151
  9152#endif
  9153	SARQ $0x03, R10
  9154	LEAL (R11)(R10*1), R11
  9155	JMP  match_nolit_end_encodeBetterBlockAsm10B
  9156
  9157matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
  9158	CMPL DI, $0x04
  9159	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm10B
  9160	MOVL (R8)(R11*1), R10
  9161	CMPL (R9)(R11*1), R10
  9162	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm10B
  9163	LEAL -4(DI), DI
  9164	LEAL 4(R11), R11
  9165
  9166matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
  9167	CMPL DI, $0x01
  9168	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm10B
  9169	JB   match_nolit_end_encodeBetterBlockAsm10B
  9170	MOVW (R8)(R11*1), R10
  9171	CMPW (R9)(R11*1), R10
  9172	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm10B
  9173	LEAL 2(R11), R11
  9174	SUBL $0x02, DI
  9175	JZ   match_nolit_end_encodeBetterBlockAsm10B
  9176
  9177matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
  9178	MOVB (R8)(R11*1), R10
  9179	CMPB (R9)(R11*1), R10
  9180	JNE  match_nolit_end_encodeBetterBlockAsm10B
  9181	LEAL 1(R11), R11
  9182
  9183match_nolit_end_encodeBetterBlockAsm10B:
  9184	MOVL CX, DI
  9185	SUBL BX, DI
  9186
  9187	// Check if repeat
  9188	CMPL 16(SP), DI
  9189	JEQ  match_is_repeat_encodeBetterBlockAsm10B
  9190	MOVL DI, 16(SP)
  9191	MOVL 12(SP), BX
  9192	CMPL BX, SI
  9193	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm10B
  9194	MOVL SI, R8
  9195	MOVL SI, 12(SP)
  9196	LEAQ (DX)(BX*1), R9
  9197	SUBL BX, R8
  9198	LEAL -1(R8), BX
  9199	CMPL BX, $0x3c
  9200	JB   one_byte_match_emit_encodeBetterBlockAsm10B
  9201	CMPL BX, $0x00000100
  9202	JB   two_bytes_match_emit_encodeBetterBlockAsm10B
  9203	JB   three_bytes_match_emit_encodeBetterBlockAsm10B
  9204
  9205three_bytes_match_emit_encodeBetterBlockAsm10B:
  9206	MOVB $0xf4, (AX)
  9207	MOVW BX, 1(AX)
  9208	ADDQ $0x03, AX
  9209	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
  9210
  9211two_bytes_match_emit_encodeBetterBlockAsm10B:
  9212	MOVB $0xf0, (AX)
  9213	MOVB BL, 1(AX)
  9214	ADDQ $0x02, AX
  9215	CMPL BX, $0x40
  9216	JB   memmove_match_emit_encodeBetterBlockAsm10B
  9217	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
  9218
  9219one_byte_match_emit_encodeBetterBlockAsm10B:
  9220	SHLB $0x02, BL
  9221	MOVB BL, (AX)
  9222	ADDQ $0x01, AX
  9223
  9224memmove_match_emit_encodeBetterBlockAsm10B:
  9225	LEAQ (AX)(R8*1), BX
  9226
  9227	// genMemMoveShort
  9228	CMPQ R8, $0x04
  9229	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
  9230	CMPQ R8, $0x08
  9231	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
  9232	CMPQ R8, $0x10
  9233	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
  9234	CMPQ R8, $0x20
  9235	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
  9236	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
  9237
  9238emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
  9239	MOVL (R9), R10
  9240	MOVL R10, (AX)
  9241	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  9242
  9243emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
  9244	MOVL (R9), R10
  9245	MOVL -4(R9)(R8*1), R9
  9246	MOVL R10, (AX)
  9247	MOVL R9, -4(AX)(R8*1)
  9248	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  9249
  9250emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
  9251	MOVQ (R9), R10
  9252	MOVQ -8(R9)(R8*1), R9
  9253	MOVQ R10, (AX)
  9254	MOVQ R9, -8(AX)(R8*1)
  9255	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  9256
  9257emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
  9258	MOVOU (R9), X0
  9259	MOVOU -16(R9)(R8*1), X1
  9260	MOVOU X0, (AX)
  9261	MOVOU X1, -16(AX)(R8*1)
  9262	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  9263
  9264emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
  9265	MOVOU (R9), X0
  9266	MOVOU 16(R9), X1
  9267	MOVOU -32(R9)(R8*1), X2
  9268	MOVOU -16(R9)(R8*1), X3
  9269	MOVOU X0, (AX)
  9270	MOVOU X1, 16(AX)
  9271	MOVOU X2, -32(AX)(R8*1)
  9272	MOVOU X3, -16(AX)(R8*1)
  9273
  9274memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
  9275	MOVQ BX, AX
  9276	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm10B
  9277
  9278memmove_long_match_emit_encodeBetterBlockAsm10B:
  9279	LEAQ (AX)(R8*1), BX
  9280
  9281	// genMemMoveLong
  9282	MOVOU (R9), X0
  9283	MOVOU 16(R9), X1
  9284	MOVOU -32(R9)(R8*1), X2
  9285	MOVOU -16(R9)(R8*1), X3
  9286	MOVQ  R8, R12
  9287	SHRQ  $0x05, R12
  9288	MOVQ  AX, R10
  9289	ANDL  $0x0000001f, R10
  9290	MOVQ  $0x00000040, R13
  9291	SUBQ  R10, R13
  9292	DECQ  R12
  9293	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  9294	LEAQ  -32(R9)(R13*1), R10
  9295	LEAQ  -32(AX)(R13*1), R14
  9296
  9297emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
  9298	MOVOU (R10), X4
  9299	MOVOU 16(R10), X5
  9300	MOVOA X4, (R14)
  9301	MOVOA X5, 16(R14)
  9302	ADDQ  $0x20, R14
  9303	ADDQ  $0x20, R10
  9304	ADDQ  $0x20, R13
  9305	DECQ  R12
  9306	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
  9307
  9308emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
  9309	MOVOU -32(R9)(R13*1), X4
  9310	MOVOU -16(R9)(R13*1), X5
  9311	MOVOA X4, -32(AX)(R13*1)
  9312	MOVOA X5, -16(AX)(R13*1)
  9313	ADDQ  $0x20, R13
  9314	CMPQ  R8, R13
  9315	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  9316	MOVOU X0, (AX)
  9317	MOVOU X1, 16(AX)
  9318	MOVOU X2, -32(AX)(R8*1)
  9319	MOVOU X3, -16(AX)(R8*1)
  9320	MOVQ  BX, AX
  9321
  9322emit_literal_done_match_emit_encodeBetterBlockAsm10B:
  9323	ADDL R11, CX
  9324	ADDL $0x04, R11
  9325	MOVL CX, 12(SP)
  9326
  9327	// emitCopy
  9328	CMPL R11, $0x40
  9329	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
  9330	CMPL DI, $0x00000800
  9331	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm10B
  9332	MOVL $0x00000001, BX
  9333	LEAL 16(BX), BX
  9334	MOVB DI, 1(AX)
  9335	SHRL $0x08, DI
  9336	SHLL $0x05, DI
  9337	ORL  DI, BX
  9338	MOVB BL, (AX)
  9339	ADDQ $0x02, AX
  9340	SUBL $0x08, R11
  9341
  9342	// emitRepeat
  9343	LEAL -4(R11), R11
  9344	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  9345	MOVL R11, BX
  9346	LEAL -4(R11), R11
  9347	CMPL BX, $0x08
  9348	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  9349	CMPL BX, $0x0c
  9350	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  9351	CMPL DI, $0x00000800
  9352	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  9353
  9354cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  9355	CMPL R11, $0x00000104
  9356	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  9357	LEAL -256(R11), R11
  9358	MOVW $0x0019, (AX)
  9359	MOVW R11, 2(AX)
  9360	ADDQ $0x04, AX
  9361	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9362
  9363repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  9364	LEAL -4(R11), R11
  9365	MOVW $0x0015, (AX)
  9366	MOVB R11, 2(AX)
  9367	ADDQ $0x03, AX
  9368	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9369
  9370repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  9371	SHLL $0x02, R11
  9372	ORL  $0x01, R11
  9373	MOVW R11, (AX)
  9374	ADDQ $0x02, AX
  9375	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9376
  9377repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  9378	XORQ BX, BX
  9379	LEAL 1(BX)(R11*4), R11
  9380	MOVB DI, 1(AX)
  9381	SARL $0x08, DI
  9382	SHLL $0x05, DI
  9383	ORL  DI, R11
  9384	MOVB R11, (AX)
  9385	ADDQ $0x02, AX
  9386	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9387
  9388long_offset_short_match_nolit_encodeBetterBlockAsm10B:
  9389	MOVB $0xee, (AX)
  9390	MOVW DI, 1(AX)
  9391	LEAL -60(R11), R11
  9392	ADDQ $0x03, AX
  9393
  9394	// emitRepeat
  9395	MOVL R11, BX
  9396	LEAL -4(R11), R11
  9397	CMPL BX, $0x08
  9398	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  9399	CMPL BX, $0x0c
  9400	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  9401	CMPL DI, $0x00000800
  9402	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  9403
  9404cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  9405	CMPL R11, $0x00000104
  9406	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  9407	LEAL -256(R11), R11
  9408	MOVW $0x0019, (AX)
  9409	MOVW R11, 2(AX)
  9410	ADDQ $0x04, AX
  9411	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9412
  9413repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  9414	LEAL -4(R11), R11
  9415	MOVW $0x0015, (AX)
  9416	MOVB R11, 2(AX)
  9417	ADDQ $0x03, AX
  9418	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9419
  9420repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  9421	SHLL $0x02, R11
  9422	ORL  $0x01, R11
  9423	MOVW R11, (AX)
  9424	ADDQ $0x02, AX
  9425	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9426
  9427repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  9428	XORQ BX, BX
  9429	LEAL 1(BX)(R11*4), R11
  9430	MOVB DI, 1(AX)
  9431	SARL $0x08, DI
  9432	SHLL $0x05, DI
  9433	ORL  DI, R11
  9434	MOVB R11, (AX)
  9435	ADDQ $0x02, AX
  9436	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9437
  9438two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
  9439	MOVL R11, BX
  9440	SHLL $0x02, BX
  9441	CMPL R11, $0x0c
  9442	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
  9443	CMPL DI, $0x00000800
  9444	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
  9445	LEAL -15(BX), BX
  9446	MOVB DI, 1(AX)
  9447	SHRL $0x08, DI
  9448	SHLL $0x05, DI
  9449	ORL  DI, BX
  9450	MOVB BL, (AX)
  9451	ADDQ $0x02, AX
  9452	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9453
  9454emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
  9455	LEAL -2(BX), BX
  9456	MOVB BL, (AX)
  9457	MOVW DI, 1(AX)
  9458	ADDQ $0x03, AX
  9459	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9460
  9461match_is_repeat_encodeBetterBlockAsm10B:
  9462	MOVL 12(SP), BX
  9463	CMPL BX, SI
  9464	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
  9465	MOVL SI, R8
  9466	MOVL SI, 12(SP)
  9467	LEAQ (DX)(BX*1), R9
  9468	SUBL BX, R8
  9469	LEAL -1(R8), BX
  9470	CMPL BX, $0x3c
  9471	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm10B
  9472	CMPL BX, $0x00000100
  9473	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
  9474	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
  9475
  9476three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
  9477	MOVB $0xf4, (AX)
  9478	MOVW BX, 1(AX)
  9479	ADDQ $0x03, AX
  9480	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
  9481
  9482two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
  9483	MOVB $0xf0, (AX)
  9484	MOVB BL, 1(AX)
  9485	ADDQ $0x02, AX
  9486	CMPL BX, $0x40
  9487	JB   memmove_match_emit_repeat_encodeBetterBlockAsm10B
  9488	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
  9489
  9490one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
  9491	SHLB $0x02, BL
  9492	MOVB BL, (AX)
  9493	ADDQ $0x01, AX
  9494
  9495memmove_match_emit_repeat_encodeBetterBlockAsm10B:
  9496	LEAQ (AX)(R8*1), BX
  9497
  9498	// genMemMoveShort
  9499	CMPQ R8, $0x04
  9500	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
  9501	CMPQ R8, $0x08
  9502	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
  9503	CMPQ R8, $0x10
  9504	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
  9505	CMPQ R8, $0x20
  9506	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
  9507	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
  9508
  9509emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
  9510	MOVL (R9), R10
  9511	MOVL R10, (AX)
  9512	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  9513
  9514emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
  9515	MOVL (R9), R10
  9516	MOVL -4(R9)(R8*1), R9
  9517	MOVL R10, (AX)
  9518	MOVL R9, -4(AX)(R8*1)
  9519	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  9520
  9521emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
  9522	MOVQ (R9), R10
  9523	MOVQ -8(R9)(R8*1), R9
  9524	MOVQ R10, (AX)
  9525	MOVQ R9, -8(AX)(R8*1)
  9526	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  9527
  9528emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
  9529	MOVOU (R9), X0
  9530	MOVOU -16(R9)(R8*1), X1
  9531	MOVOU X0, (AX)
  9532	MOVOU X1, -16(AX)(R8*1)
  9533	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  9534
  9535emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
  9536	MOVOU (R9), X0
  9537	MOVOU 16(R9), X1
  9538	MOVOU -32(R9)(R8*1), X2
  9539	MOVOU -16(R9)(R8*1), X3
  9540	MOVOU X0, (AX)
  9541	MOVOU X1, 16(AX)
  9542	MOVOU X2, -32(AX)(R8*1)
  9543	MOVOU X3, -16(AX)(R8*1)
  9544
  9545memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
  9546	MOVQ BX, AX
  9547	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
  9548
  9549memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
  9550	LEAQ (AX)(R8*1), BX
  9551
  9552	// genMemMoveLong
  9553	MOVOU (R9), X0
  9554	MOVOU 16(R9), X1
  9555	MOVOU -32(R9)(R8*1), X2
  9556	MOVOU -16(R9)(R8*1), X3
  9557	MOVQ  R8, R12
  9558	SHRQ  $0x05, R12
  9559	MOVQ  AX, R10
  9560	ANDL  $0x0000001f, R10
  9561	MOVQ  $0x00000040, R13
  9562	SUBQ  R10, R13
  9563	DECQ  R12
  9564	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  9565	LEAQ  -32(R9)(R13*1), R10
  9566	LEAQ  -32(AX)(R13*1), R14
  9567
  9568emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
  9569	MOVOU (R10), X4
  9570	MOVOU 16(R10), X5
  9571	MOVOA X4, (R14)
  9572	MOVOA X5, 16(R14)
  9573	ADDQ  $0x20, R14
  9574	ADDQ  $0x20, R10
  9575	ADDQ  $0x20, R13
  9576	DECQ  R12
  9577	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
  9578
  9579emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
  9580	MOVOU -32(R9)(R13*1), X4
  9581	MOVOU -16(R9)(R13*1), X5
  9582	MOVOA X4, -32(AX)(R13*1)
  9583	MOVOA X5, -16(AX)(R13*1)
  9584	ADDQ  $0x20, R13
  9585	CMPQ  R8, R13
  9586	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  9587	MOVOU X0, (AX)
  9588	MOVOU X1, 16(AX)
  9589	MOVOU X2, -32(AX)(R8*1)
  9590	MOVOU X3, -16(AX)(R8*1)
  9591	MOVQ  BX, AX
  9592
  9593emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
  9594	ADDL R11, CX
  9595	ADDL $0x04, R11
  9596	MOVL CX, 12(SP)
  9597
  9598	// emitRepeat
  9599	MOVL R11, BX
  9600	LEAL -4(R11), R11
  9601	CMPL BX, $0x08
  9602	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
  9603	CMPL BX, $0x0c
  9604	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
  9605	CMPL DI, $0x00000800
  9606	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
  9607
  9608cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
  9609	CMPL R11, $0x00000104
  9610	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
  9611	LEAL -256(R11), R11
  9612	MOVW $0x0019, (AX)
  9613	MOVW R11, 2(AX)
  9614	ADDQ $0x04, AX
  9615	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9616
  9617repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
  9618	LEAL -4(R11), R11
  9619	MOVW $0x0015, (AX)
  9620	MOVB R11, 2(AX)
  9621	ADDQ $0x03, AX
  9622	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9623
  9624repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
  9625	SHLL $0x02, R11
  9626	ORL  $0x01, R11
  9627	MOVW R11, (AX)
  9628	ADDQ $0x02, AX
  9629	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  9630
  9631repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
  9632	XORQ BX, BX
  9633	LEAL 1(BX)(R11*4), R11
  9634	MOVB DI, 1(AX)
  9635	SARL $0x08, DI
  9636	SHLL $0x05, DI
  9637	ORL  DI, R11
  9638	MOVB R11, (AX)
  9639	ADDQ $0x02, AX
  9640
  9641match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
  9642	CMPL CX, 8(SP)
  9643	JAE  emit_remainder_encodeBetterBlockAsm10B
  9644	CMPQ AX, (SP)
  9645	JB   match_nolit_dst_ok_encodeBetterBlockAsm10B
  9646	MOVQ $0x00000000, ret+48(FP)
  9647	RET
  9648
  9649match_nolit_dst_ok_encodeBetterBlockAsm10B:
  9650	MOVQ  $0x0000cf1bbcdcbf9b, BX
  9651	MOVQ  $0x9e3779b1, DI
  9652	LEAQ  1(SI), SI
  9653	LEAQ  -2(CX), R8
  9654	MOVQ  (DX)(SI*1), R9
  9655	MOVQ  1(DX)(SI*1), R10
  9656	MOVQ  (DX)(R8*1), R11
  9657	MOVQ  1(DX)(R8*1), R12
  9658	SHLQ  $0x10, R9
  9659	IMULQ BX, R9
  9660	SHRQ  $0x34, R9
  9661	SHLQ  $0x20, R10
  9662	IMULQ DI, R10
  9663	SHRQ  $0x36, R10
  9664	SHLQ  $0x10, R11
  9665	IMULQ BX, R11
  9666	SHRQ  $0x34, R11
  9667	SHLQ  $0x20, R12
  9668	IMULQ DI, R12
  9669	SHRQ  $0x36, R12
  9670	LEAQ  1(SI), DI
  9671	LEAQ  1(R8), R13
  9672	MOVL  SI, 24(SP)(R9*4)
  9673	MOVL  R8, 24(SP)(R11*4)
  9674	MOVL  DI, 16408(SP)(R10*4)
  9675	MOVL  R13, 16408(SP)(R12*4)
  9676	LEAQ  1(R8)(SI*1), DI
  9677	SHRQ  $0x01, DI
  9678	ADDQ  $0x01, SI
  9679	SUBQ  $0x01, R8
  9680
  9681index_loop_encodeBetterBlockAsm10B:
  9682	CMPQ  DI, R8
  9683	JAE   search_loop_encodeBetterBlockAsm10B
  9684	MOVQ  (DX)(SI*1), R9
  9685	MOVQ  (DX)(DI*1), R10
  9686	SHLQ  $0x10, R9
  9687	IMULQ BX, R9
  9688	SHRQ  $0x34, R9
  9689	SHLQ  $0x10, R10
  9690	IMULQ BX, R10
  9691	SHRQ  $0x34, R10
  9692	MOVL  SI, 24(SP)(R9*4)
  9693	MOVL  DI, 24(SP)(R10*4)
  9694	ADDQ  $0x02, SI
  9695	ADDQ  $0x02, DI
  9696	JMP   index_loop_encodeBetterBlockAsm10B
  9697
  9698emit_remainder_encodeBetterBlockAsm10B:
  9699	MOVQ src_len+32(FP), CX
  9700	SUBL 12(SP), CX
  9701	LEAQ 3(AX)(CX*1), CX
  9702	CMPQ CX, (SP)
  9703	JB   emit_remainder_ok_encodeBetterBlockAsm10B
  9704	MOVQ $0x00000000, ret+48(FP)
  9705	RET
  9706
  9707emit_remainder_ok_encodeBetterBlockAsm10B:
  9708	MOVQ src_len+32(FP), CX
  9709	MOVL 12(SP), BX
  9710	CMPL BX, CX
  9711	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
  9712	MOVL CX, SI
  9713	MOVL CX, 12(SP)
  9714	LEAQ (DX)(BX*1), CX
  9715	SUBL BX, SI
  9716	LEAL -1(SI), DX
  9717	CMPL DX, $0x3c
  9718	JB   one_byte_emit_remainder_encodeBetterBlockAsm10B
  9719	CMPL DX, $0x00000100
  9720	JB   two_bytes_emit_remainder_encodeBetterBlockAsm10B
  9721	JB   three_bytes_emit_remainder_encodeBetterBlockAsm10B
  9722
  9723three_bytes_emit_remainder_encodeBetterBlockAsm10B:
  9724	MOVB $0xf4, (AX)
  9725	MOVW DX, 1(AX)
  9726	ADDQ $0x03, AX
  9727	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
  9728
  9729two_bytes_emit_remainder_encodeBetterBlockAsm10B:
  9730	MOVB $0xf0, (AX)
  9731	MOVB DL, 1(AX)
  9732	ADDQ $0x02, AX
  9733	CMPL DX, $0x40
  9734	JB   memmove_emit_remainder_encodeBetterBlockAsm10B
  9735	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
  9736
  9737one_byte_emit_remainder_encodeBetterBlockAsm10B:
  9738	SHLB $0x02, DL
  9739	MOVB DL, (AX)
  9740	ADDQ $0x01, AX
  9741
  9742memmove_emit_remainder_encodeBetterBlockAsm10B:
  9743	LEAQ (AX)(SI*1), DX
  9744	MOVL SI, BX
  9745
  9746	// genMemMoveShort
  9747	CMPQ BX, $0x03
  9748	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2
  9749	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3
  9750	CMPQ BX, $0x08
  9751	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
  9752	CMPQ BX, $0x10
  9753	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
  9754	CMPQ BX, $0x20
  9755	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
  9756	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
  9757
  9758emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
  9759	MOVB (CX), SI
  9760	MOVB -1(CX)(BX*1), CL
  9761	MOVB SI, (AX)
  9762	MOVB CL, -1(AX)(BX*1)
  9763	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  9764
  9765emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
  9766	MOVW (CX), SI
  9767	MOVB 2(CX), CL
  9768	MOVW SI, (AX)
  9769	MOVB CL, 2(AX)
  9770	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  9771
  9772emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
  9773	MOVL (CX), SI
  9774	MOVL -4(CX)(BX*1), CX
  9775	MOVL SI, (AX)
  9776	MOVL CX, -4(AX)(BX*1)
  9777	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  9778
  9779emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
  9780	MOVQ (CX), SI
  9781	MOVQ -8(CX)(BX*1), CX
  9782	MOVQ SI, (AX)
  9783	MOVQ CX, -8(AX)(BX*1)
  9784	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  9785
  9786emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
  9787	MOVOU (CX), X0
  9788	MOVOU -16(CX)(BX*1), X1
  9789	MOVOU X0, (AX)
  9790	MOVOU X1, -16(AX)(BX*1)
  9791	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  9792
  9793emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
  9794	MOVOU (CX), X0
  9795	MOVOU 16(CX), X1
  9796	MOVOU -32(CX)(BX*1), X2
  9797	MOVOU -16(CX)(BX*1), X3
  9798	MOVOU X0, (AX)
  9799	MOVOU X1, 16(AX)
  9800	MOVOU X2, -32(AX)(BX*1)
  9801	MOVOU X3, -16(AX)(BX*1)
  9802
  9803memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
  9804	MOVQ DX, AX
  9805	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
  9806
  9807memmove_long_emit_remainder_encodeBetterBlockAsm10B:
  9808	LEAQ (AX)(SI*1), DX
  9809	MOVL SI, BX
  9810
  9811	// genMemMoveLong
  9812	MOVOU (CX), X0
  9813	MOVOU 16(CX), X1
  9814	MOVOU -32(CX)(BX*1), X2
  9815	MOVOU -16(CX)(BX*1), X3
  9816	MOVQ  BX, DI
  9817	SHRQ  $0x05, DI
  9818	MOVQ  AX, SI
  9819	ANDL  $0x0000001f, SI
  9820	MOVQ  $0x00000040, R8
  9821	SUBQ  SI, R8
  9822	DECQ  DI
  9823	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  9824	LEAQ  -32(CX)(R8*1), SI
  9825	LEAQ  -32(AX)(R8*1), R9
  9826
  9827emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
  9828	MOVOU (SI), X4
  9829	MOVOU 16(SI), X5
  9830	MOVOA X4, (R9)
  9831	MOVOA X5, 16(R9)
  9832	ADDQ  $0x20, R9
  9833	ADDQ  $0x20, SI
  9834	ADDQ  $0x20, R8
  9835	DECQ  DI
  9836	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
  9837
  9838emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
  9839	MOVOU -32(CX)(R8*1), X4
  9840	MOVOU -16(CX)(R8*1), X5
  9841	MOVOA X4, -32(AX)(R8*1)
  9842	MOVOA X5, -16(AX)(R8*1)
  9843	ADDQ  $0x20, R8
  9844	CMPQ  BX, R8
  9845	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  9846	MOVOU X0, (AX)
  9847	MOVOU X1, 16(AX)
  9848	MOVOU X2, -32(AX)(BX*1)
  9849	MOVOU X3, -16(AX)(BX*1)
  9850	MOVQ  DX, AX
  9851
  9852emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
  9853	MOVQ dst_base+0(FP), CX
  9854	SUBQ CX, AX
  9855	MOVQ AX, ret+48(FP)
  9856	RET
  9857
  9858// func encodeBetterBlockAsm8B(dst []byte, src []byte) int
  9859// Requires: BMI, SSE2
  9860TEXT ·encodeBetterBlockAsm8B(SB), $5144-56
  9861	MOVQ dst_base+0(FP), AX
  9862	MOVQ $0x00000028, CX
  9863	LEAQ 24(SP), DX
  9864	PXOR X0, X0
  9865
  9866zero_loop_encodeBetterBlockAsm8B:
  9867	MOVOU X0, (DX)
  9868	MOVOU X0, 16(DX)
  9869	MOVOU X0, 32(DX)
  9870	MOVOU X0, 48(DX)
  9871	MOVOU X0, 64(DX)
  9872	MOVOU X0, 80(DX)
  9873	MOVOU X0, 96(DX)
  9874	MOVOU X0, 112(DX)
  9875	ADDQ  $0x80, DX
  9876	DECQ  CX
  9877	JNZ   zero_loop_encodeBetterBlockAsm8B
  9878	MOVL  $0x00000000, 12(SP)
  9879	MOVQ  src_len+32(FP), CX
  9880	LEAQ  -6(CX), DX
  9881	LEAQ  -8(CX), BX
  9882	MOVL  BX, 8(SP)
  9883	SHRQ  $0x05, CX
  9884	SUBL  CX, DX
  9885	LEAQ  (AX)(DX*1), DX
  9886	MOVQ  DX, (SP)
  9887	MOVL  $0x00000001, CX
  9888	MOVL  $0x00000000, 16(SP)
  9889	MOVQ  src_base+24(FP), DX
  9890
  9891search_loop_encodeBetterBlockAsm8B:
  9892	MOVL  CX, BX
  9893	SUBL  12(SP), BX
  9894	SHRL  $0x04, BX
  9895	LEAL  1(CX)(BX*1), BX
  9896	CMPL  BX, 8(SP)
  9897	JAE   emit_remainder_encodeBetterBlockAsm8B
  9898	MOVQ  (DX)(CX*1), SI
  9899	MOVL  BX, 20(SP)
  9900	MOVQ  $0x0000cf1bbcdcbf9b, R8
  9901	MOVQ  $0x9e3779b1, BX
  9902	MOVQ  SI, R9
  9903	MOVQ  SI, R10
  9904	SHLQ  $0x10, R9
  9905	IMULQ R8, R9
  9906	SHRQ  $0x36, R9
  9907	SHLQ  $0x20, R10
  9908	IMULQ BX, R10
  9909	SHRQ  $0x38, R10
  9910	MOVL  24(SP)(R9*4), BX
  9911	MOVL  4120(SP)(R10*4), DI
  9912	MOVL  CX, 24(SP)(R9*4)
  9913	MOVL  CX, 4120(SP)(R10*4)
  9914	MOVQ  (DX)(BX*1), R9
  9915	MOVQ  (DX)(DI*1), R10
  9916	CMPQ  R9, SI
  9917	JEQ   candidate_match_encodeBetterBlockAsm8B
  9918	CMPQ  R10, SI
  9919	JNE   no_short_found_encodeBetterBlockAsm8B
  9920	MOVL  DI, BX
  9921	JMP   candidate_match_encodeBetterBlockAsm8B
  9922
  9923no_short_found_encodeBetterBlockAsm8B:
  9924	CMPL R9, SI
  9925	JEQ  candidate_match_encodeBetterBlockAsm8B
  9926	CMPL R10, SI
  9927	JEQ  candidateS_match_encodeBetterBlockAsm8B
  9928	MOVL 20(SP), CX
  9929	JMP  search_loop_encodeBetterBlockAsm8B
  9930
  9931candidateS_match_encodeBetterBlockAsm8B:
  9932	SHRQ  $0x08, SI
  9933	MOVQ  SI, R9
  9934	SHLQ  $0x10, R9
  9935	IMULQ R8, R9
  9936	SHRQ  $0x36, R9
  9937	MOVL  24(SP)(R9*4), BX
  9938	INCL  CX
  9939	MOVL  CX, 24(SP)(R9*4)
  9940	CMPL  (DX)(BX*1), SI
  9941	JEQ   candidate_match_encodeBetterBlockAsm8B
  9942	DECL  CX
  9943	MOVL  DI, BX
  9944
  9945candidate_match_encodeBetterBlockAsm8B:
  9946	MOVL  12(SP), SI
  9947	TESTL BX, BX
  9948	JZ    match_extend_back_end_encodeBetterBlockAsm8B
  9949
  9950match_extend_back_loop_encodeBetterBlockAsm8B:
  9951	CMPL CX, SI
  9952	JBE  match_extend_back_end_encodeBetterBlockAsm8B
  9953	MOVB -1(DX)(BX*1), DI
  9954	MOVB -1(DX)(CX*1), R8
  9955	CMPB DI, R8
  9956	JNE  match_extend_back_end_encodeBetterBlockAsm8B
  9957	LEAL -1(CX), CX
  9958	DECL BX
  9959	JZ   match_extend_back_end_encodeBetterBlockAsm8B
  9960	JMP  match_extend_back_loop_encodeBetterBlockAsm8B
  9961
  9962match_extend_back_end_encodeBetterBlockAsm8B:
  9963	MOVL CX, SI
  9964	SUBL 12(SP), SI
  9965	LEAQ 3(AX)(SI*1), SI
  9966	CMPQ SI, (SP)
  9967	JB   match_dst_size_check_encodeBetterBlockAsm8B
  9968	MOVQ $0x00000000, ret+48(FP)
  9969	RET
  9970
  9971match_dst_size_check_encodeBetterBlockAsm8B:
  9972	MOVL CX, SI
  9973	ADDL $0x04, CX
  9974	ADDL $0x04, BX
  9975	MOVQ src_len+32(FP), DI
  9976	SUBL CX, DI
  9977	LEAQ (DX)(CX*1), R8
  9978	LEAQ (DX)(BX*1), R9
  9979
  9980	// matchLen
  9981	XORL R11, R11
  9982
  9983matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
  9984	CMPL DI, $0x10
  9985	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm8B
  9986	MOVQ (R8)(R11*1), R10
  9987	MOVQ 8(R8)(R11*1), R12
  9988	XORQ (R9)(R11*1), R10
  9989	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
  9990	XORQ 8(R9)(R11*1), R12
  9991	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
  9992	LEAL -16(DI), DI
  9993	LEAL 16(R11), R11
  9994	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
  9995
  9996matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
  9997#ifdef GOAMD64_v3
  9998	TZCNTQ R12, R12
  9999
 10000#else
 10001	BSFQ R12, R12
 10002
 10003#endif
 10004	SARQ $0x03, R12
 10005	LEAL 8(R11)(R12*1), R11
 10006	JMP  match_nolit_end_encodeBetterBlockAsm8B
 10007
 10008matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
 10009	CMPL DI, $0x08
 10010	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm8B
 10011	MOVQ (R8)(R11*1), R10
 10012	XORQ (R9)(R11*1), R10
 10013	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
 10014	LEAL -8(DI), DI
 10015	LEAL 8(R11), R11
 10016	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm8B
 10017
 10018matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
 10019#ifdef GOAMD64_v3
 10020	TZCNTQ R10, R10
 10021
 10022#else
 10023	BSFQ R10, R10
 10024
 10025#endif
 10026	SARQ $0x03, R10
 10027	LEAL (R11)(R10*1), R11
 10028	JMP  match_nolit_end_encodeBetterBlockAsm8B
 10029
 10030matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
 10031	CMPL DI, $0x04
 10032	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm8B
 10033	MOVL (R8)(R11*1), R10
 10034	CMPL (R9)(R11*1), R10
 10035	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm8B
 10036	LEAL -4(DI), DI
 10037	LEAL 4(R11), R11
 10038
 10039matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
 10040	CMPL DI, $0x01
 10041	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm8B
 10042	JB   match_nolit_end_encodeBetterBlockAsm8B
 10043	MOVW (R8)(R11*1), R10
 10044	CMPW (R9)(R11*1), R10
 10045	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm8B
 10046	LEAL 2(R11), R11
 10047	SUBL $0x02, DI
 10048	JZ   match_nolit_end_encodeBetterBlockAsm8B
 10049
 10050matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
 10051	MOVB (R8)(R11*1), R10
 10052	CMPB (R9)(R11*1), R10
 10053	JNE  match_nolit_end_encodeBetterBlockAsm8B
 10054	LEAL 1(R11), R11
 10055
 10056match_nolit_end_encodeBetterBlockAsm8B:
 10057	MOVL CX, DI
 10058	SUBL BX, DI
 10059
 10060	// Check if repeat
 10061	CMPL 16(SP), DI
 10062	JEQ  match_is_repeat_encodeBetterBlockAsm8B
 10063	MOVL DI, 16(SP)
 10064	MOVL 12(SP), BX
 10065	CMPL BX, SI
 10066	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm8B
 10067	MOVL SI, R8
 10068	MOVL SI, 12(SP)
 10069	LEAQ (DX)(BX*1), R9
 10070	SUBL BX, R8
 10071	LEAL -1(R8), BX
 10072	CMPL BX, $0x3c
 10073	JB   one_byte_match_emit_encodeBetterBlockAsm8B
 10074	CMPL BX, $0x00000100
 10075	JB   two_bytes_match_emit_encodeBetterBlockAsm8B
 10076	JB   three_bytes_match_emit_encodeBetterBlockAsm8B
 10077
 10078three_bytes_match_emit_encodeBetterBlockAsm8B:
 10079	MOVB $0xf4, (AX)
 10080	MOVW BX, 1(AX)
 10081	ADDQ $0x03, AX
 10082	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
 10083
 10084two_bytes_match_emit_encodeBetterBlockAsm8B:
 10085	MOVB $0xf0, (AX)
 10086	MOVB BL, 1(AX)
 10087	ADDQ $0x02, AX
 10088	CMPL BX, $0x40
 10089	JB   memmove_match_emit_encodeBetterBlockAsm8B
 10090	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
 10091
 10092one_byte_match_emit_encodeBetterBlockAsm8B:
 10093	SHLB $0x02, BL
 10094	MOVB BL, (AX)
 10095	ADDQ $0x01, AX
 10096
 10097memmove_match_emit_encodeBetterBlockAsm8B:
 10098	LEAQ (AX)(R8*1), BX
 10099
 10100	// genMemMoveShort
 10101	CMPQ R8, $0x04
 10102	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
 10103	CMPQ R8, $0x08
 10104	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
 10105	CMPQ R8, $0x10
 10106	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
 10107	CMPQ R8, $0x20
 10108	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
 10109	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
 10110
 10111emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
 10112	MOVL (R9), R10
 10113	MOVL R10, (AX)
 10114	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 10115
 10116emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
 10117	MOVL (R9), R10
 10118	MOVL -4(R9)(R8*1), R9
 10119	MOVL R10, (AX)
 10120	MOVL R9, -4(AX)(R8*1)
 10121	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 10122
 10123emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
 10124	MOVQ (R9), R10
 10125	MOVQ -8(R9)(R8*1), R9
 10126	MOVQ R10, (AX)
 10127	MOVQ R9, -8(AX)(R8*1)
 10128	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 10129
 10130emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
 10131	MOVOU (R9), X0
 10132	MOVOU -16(R9)(R8*1), X1
 10133	MOVOU X0, (AX)
 10134	MOVOU X1, -16(AX)(R8*1)
 10135	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 10136
 10137emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
 10138	MOVOU (R9), X0
 10139	MOVOU 16(R9), X1
 10140	MOVOU -32(R9)(R8*1), X2
 10141	MOVOU -16(R9)(R8*1), X3
 10142	MOVOU X0, (AX)
 10143	MOVOU X1, 16(AX)
 10144	MOVOU X2, -32(AX)(R8*1)
 10145	MOVOU X3, -16(AX)(R8*1)
 10146
 10147memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
 10148	MOVQ BX, AX
 10149	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm8B
 10150
 10151memmove_long_match_emit_encodeBetterBlockAsm8B:
 10152	LEAQ (AX)(R8*1), BX
 10153
 10154	// genMemMoveLong
 10155	MOVOU (R9), X0
 10156	MOVOU 16(R9), X1
 10157	MOVOU -32(R9)(R8*1), X2
 10158	MOVOU -16(R9)(R8*1), X3
 10159	MOVQ  R8, R12
 10160	SHRQ  $0x05, R12
 10161	MOVQ  AX, R10
 10162	ANDL  $0x0000001f, R10
 10163	MOVQ  $0x00000040, R13
 10164	SUBQ  R10, R13
 10165	DECQ  R12
 10166	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 10167	LEAQ  -32(R9)(R13*1), R10
 10168	LEAQ  -32(AX)(R13*1), R14
 10169
 10170emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
 10171	MOVOU (R10), X4
 10172	MOVOU 16(R10), X5
 10173	MOVOA X4, (R14)
 10174	MOVOA X5, 16(R14)
 10175	ADDQ  $0x20, R14
 10176	ADDQ  $0x20, R10
 10177	ADDQ  $0x20, R13
 10178	DECQ  R12
 10179	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
 10180
 10181emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
 10182	MOVOU -32(R9)(R13*1), X4
 10183	MOVOU -16(R9)(R13*1), X5
 10184	MOVOA X4, -32(AX)(R13*1)
 10185	MOVOA X5, -16(AX)(R13*1)
 10186	ADDQ  $0x20, R13
 10187	CMPQ  R8, R13
 10188	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 10189	MOVOU X0, (AX)
 10190	MOVOU X1, 16(AX)
 10191	MOVOU X2, -32(AX)(R8*1)
 10192	MOVOU X3, -16(AX)(R8*1)
 10193	MOVQ  BX, AX
 10194
 10195emit_literal_done_match_emit_encodeBetterBlockAsm8B:
 10196	ADDL R11, CX
 10197	ADDL $0x04, R11
 10198	MOVL CX, 12(SP)
 10199
 10200	// emitCopy
 10201	CMPL R11, $0x40
 10202	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
 10203	CMPL DI, $0x00000800
 10204	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm8B
 10205	MOVL $0x00000001, BX
 10206	LEAL 16(BX), BX
 10207	MOVB DI, 1(AX)
 10208	SHRL $0x08, DI
 10209	SHLL $0x05, DI
 10210	ORL  DI, BX
 10211	MOVB BL, (AX)
 10212	ADDQ $0x02, AX
 10213	SUBL $0x08, R11
 10214
 10215	// emitRepeat
 10216	LEAL -4(R11), R11
 10217	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 10218	MOVL R11, BX
 10219	LEAL -4(R11), R11
 10220	CMPL BX, $0x08
 10221	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 10222	CMPL BX, $0x0c
 10223	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 10224
 10225cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
 10226	CMPL R11, $0x00000104
 10227	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 10228	LEAL -256(R11), R11
 10229	MOVW $0x0019, (AX)
 10230	MOVW R11, 2(AX)
 10231	ADDQ $0x04, AX
 10232	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10233
 10234repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
 10235	LEAL -4(R11), R11
 10236	MOVW $0x0015, (AX)
 10237	MOVB R11, 2(AX)
 10238	ADDQ $0x03, AX
 10239	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10240
 10241repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
 10242	SHLL $0x02, R11
 10243	ORL  $0x01, R11
 10244	MOVW R11, (AX)
 10245	ADDQ $0x02, AX
 10246	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10247	XORQ BX, BX
 10248	LEAL 1(BX)(R11*4), R11
 10249	MOVB DI, 1(AX)
 10250	SARL $0x08, DI
 10251	SHLL $0x05, DI
 10252	ORL  DI, R11
 10253	MOVB R11, (AX)
 10254	ADDQ $0x02, AX
 10255	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10256
 10257long_offset_short_match_nolit_encodeBetterBlockAsm8B:
 10258	MOVB $0xee, (AX)
 10259	MOVW DI, 1(AX)
 10260	LEAL -60(R11), R11
 10261	ADDQ $0x03, AX
 10262
 10263	// emitRepeat
 10264	MOVL R11, BX
 10265	LEAL -4(R11), R11
 10266	CMPL BX, $0x08
 10267	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 10268	CMPL BX, $0x0c
 10269	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 10270
 10271cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
 10272	CMPL R11, $0x00000104
 10273	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 10274	LEAL -256(R11), R11
 10275	MOVW $0x0019, (AX)
 10276	MOVW R11, 2(AX)
 10277	ADDQ $0x04, AX
 10278	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10279
 10280repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
 10281	LEAL -4(R11), R11
 10282	MOVW $0x0015, (AX)
 10283	MOVB R11, 2(AX)
 10284	ADDQ $0x03, AX
 10285	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10286
 10287repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
 10288	SHLL $0x02, R11
 10289	ORL  $0x01, R11
 10290	MOVW R11, (AX)
 10291	ADDQ $0x02, AX
 10292	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10293	XORQ BX, BX
 10294	LEAL 1(BX)(R11*4), R11
 10295	MOVB DI, 1(AX)
 10296	SARL $0x08, DI
 10297	SHLL $0x05, DI
 10298	ORL  DI, R11
 10299	MOVB R11, (AX)
 10300	ADDQ $0x02, AX
 10301	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10302
 10303two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
 10304	MOVL R11, BX
 10305	SHLL $0x02, BX
 10306	CMPL R11, $0x0c
 10307	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm8B
 10308	LEAL -15(BX), BX
 10309	MOVB DI, 1(AX)
 10310	SHRL $0x08, DI
 10311	SHLL $0x05, DI
 10312	ORL  DI, BX
 10313	MOVB BL, (AX)
 10314	ADDQ $0x02, AX
 10315	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10316
 10317emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
 10318	LEAL -2(BX), BX
 10319	MOVB BL, (AX)
 10320	MOVW DI, 1(AX)
 10321	ADDQ $0x03, AX
 10322	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10323
 10324match_is_repeat_encodeBetterBlockAsm8B:
 10325	MOVL 12(SP), BX
 10326	CMPL BX, SI
 10327	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
 10328	MOVL SI, DI
 10329	MOVL SI, 12(SP)
 10330	LEAQ (DX)(BX*1), R8
 10331	SUBL BX, DI
 10332	LEAL -1(DI), BX
 10333	CMPL BX, $0x3c
 10334	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm8B
 10335	CMPL BX, $0x00000100
 10336	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
 10337	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
 10338
 10339three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
 10340	MOVB $0xf4, (AX)
 10341	MOVW BX, 1(AX)
 10342	ADDQ $0x03, AX
 10343	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
 10344
 10345two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
 10346	MOVB $0xf0, (AX)
 10347	MOVB BL, 1(AX)
 10348	ADDQ $0x02, AX
 10349	CMPL BX, $0x40
 10350	JB   memmove_match_emit_repeat_encodeBetterBlockAsm8B
 10351	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
 10352
 10353one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
 10354	SHLB $0x02, BL
 10355	MOVB BL, (AX)
 10356	ADDQ $0x01, AX
 10357
 10358memmove_match_emit_repeat_encodeBetterBlockAsm8B:
 10359	LEAQ (AX)(DI*1), BX
 10360
 10361	// genMemMoveShort
 10362	CMPQ DI, $0x04
 10363	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
 10364	CMPQ DI, $0x08
 10365	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
 10366	CMPQ DI, $0x10
 10367	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
 10368	CMPQ DI, $0x20
 10369	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
 10370	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
 10371
 10372emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
 10373	MOVL (R8), R9
 10374	MOVL R9, (AX)
 10375	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 10376
 10377emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
 10378	MOVL (R8), R9
 10379	MOVL -4(R8)(DI*1), R8
 10380	MOVL R9, (AX)
 10381	MOVL R8, -4(AX)(DI*1)
 10382	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 10383
 10384emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
 10385	MOVQ (R8), R9
 10386	MOVQ -8(R8)(DI*1), R8
 10387	MOVQ R9, (AX)
 10388	MOVQ R8, -8(AX)(DI*1)
 10389	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 10390
 10391emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
 10392	MOVOU (R8), X0
 10393	MOVOU -16(R8)(DI*1), X1
 10394	MOVOU X0, (AX)
 10395	MOVOU X1, -16(AX)(DI*1)
 10396	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 10397
 10398emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
 10399	MOVOU (R8), X0
 10400	MOVOU 16(R8), X1
 10401	MOVOU -32(R8)(DI*1), X2
 10402	MOVOU -16(R8)(DI*1), X3
 10403	MOVOU X0, (AX)
 10404	MOVOU X1, 16(AX)
 10405	MOVOU X2, -32(AX)(DI*1)
 10406	MOVOU X3, -16(AX)(DI*1)
 10407
 10408memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
 10409	MOVQ BX, AX
 10410	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
 10411
 10412memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
 10413	LEAQ (AX)(DI*1), BX
 10414
 10415	// genMemMoveLong
 10416	MOVOU (R8), X0
 10417	MOVOU 16(R8), X1
 10418	MOVOU -32(R8)(DI*1), X2
 10419	MOVOU -16(R8)(DI*1), X3
 10420	MOVQ  DI, R10
 10421	SHRQ  $0x05, R10
 10422	MOVQ  AX, R9
 10423	ANDL  $0x0000001f, R9
 10424	MOVQ  $0x00000040, R12
 10425	SUBQ  R9, R12
 10426	DECQ  R10
 10427	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 10428	LEAQ  -32(R8)(R12*1), R9
 10429	LEAQ  -32(AX)(R12*1), R13
 10430
 10431emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
 10432	MOVOU (R9), X4
 10433	MOVOU 16(R9), X5
 10434	MOVOA X4, (R13)
 10435	MOVOA X5, 16(R13)
 10436	ADDQ  $0x20, R13
 10437	ADDQ  $0x20, R9
 10438	ADDQ  $0x20, R12
 10439	DECQ  R10
 10440	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
 10441
 10442emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
 10443	MOVOU -32(R8)(R12*1), X4
 10444	MOVOU -16(R8)(R12*1), X5
 10445	MOVOA X4, -32(AX)(R12*1)
 10446	MOVOA X5, -16(AX)(R12*1)
 10447	ADDQ  $0x20, R12
 10448	CMPQ  DI, R12
 10449	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 10450	MOVOU X0, (AX)
 10451	MOVOU X1, 16(AX)
 10452	MOVOU X2, -32(AX)(DI*1)
 10453	MOVOU X3, -16(AX)(DI*1)
 10454	MOVQ  BX, AX
 10455
 10456emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
 10457	ADDL R11, CX
 10458	ADDL $0x04, R11
 10459	MOVL CX, 12(SP)
 10460
 10461	// emitRepeat
 10462	MOVL R11, BX
 10463	LEAL -4(R11), R11
 10464	CMPL BX, $0x08
 10465	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
 10466	CMPL BX, $0x0c
 10467	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
 10468
 10469cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
 10470	CMPL R11, $0x00000104
 10471	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
 10472	LEAL -256(R11), R11
 10473	MOVW $0x0019, (AX)
 10474	MOVW R11, 2(AX)
 10475	ADDQ $0x04, AX
 10476	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10477
 10478repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
 10479	LEAL -4(R11), R11
 10480	MOVW $0x0015, (AX)
 10481	MOVB R11, 2(AX)
 10482	ADDQ $0x03, AX
 10483	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10484
 10485repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
 10486	SHLL $0x02, R11
 10487	ORL  $0x01, R11
 10488	MOVW R11, (AX)
 10489	ADDQ $0x02, AX
 10490	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 10491	XORQ BX, BX
 10492	LEAL 1(BX)(R11*4), R11
 10493	MOVB DI, 1(AX)
 10494	SARL $0x08, DI
 10495	SHLL $0x05, DI
 10496	ORL  DI, R11
 10497	MOVB R11, (AX)
 10498	ADDQ $0x02, AX
 10499
 10500match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
 10501	CMPL CX, 8(SP)
 10502	JAE  emit_remainder_encodeBetterBlockAsm8B
 10503	CMPQ AX, (SP)
 10504	JB   match_nolit_dst_ok_encodeBetterBlockAsm8B
 10505	MOVQ $0x00000000, ret+48(FP)
 10506	RET
 10507
 10508match_nolit_dst_ok_encodeBetterBlockAsm8B:
 10509	MOVQ  $0x0000cf1bbcdcbf9b, BX
 10510	MOVQ  $0x9e3779b1, DI
 10511	LEAQ  1(SI), SI
 10512	LEAQ  -2(CX), R8
 10513	MOVQ  (DX)(SI*1), R9
 10514	MOVQ  1(DX)(SI*1), R10
 10515	MOVQ  (DX)(R8*1), R11
 10516	MOVQ  1(DX)(R8*1), R12
 10517	SHLQ  $0x10, R9
 10518	IMULQ BX, R9
 10519	SHRQ  $0x36, R9
 10520	SHLQ  $0x20, R10
 10521	IMULQ DI, R10
 10522	SHRQ  $0x38, R10
 10523	SHLQ  $0x10, R11
 10524	IMULQ BX, R11
 10525	SHRQ  $0x36, R11
 10526	SHLQ  $0x20, R12
 10527	IMULQ DI, R12
 10528	SHRQ  $0x38, R12
 10529	LEAQ  1(SI), DI
 10530	LEAQ  1(R8), R13
 10531	MOVL  SI, 24(SP)(R9*4)
 10532	MOVL  R8, 24(SP)(R11*4)
 10533	MOVL  DI, 4120(SP)(R10*4)
 10534	MOVL  R13, 4120(SP)(R12*4)
 10535	LEAQ  1(R8)(SI*1), DI
 10536	SHRQ  $0x01, DI
 10537	ADDQ  $0x01, SI
 10538	SUBQ  $0x01, R8
 10539
 10540index_loop_encodeBetterBlockAsm8B:
 10541	CMPQ  DI, R8
 10542	JAE   search_loop_encodeBetterBlockAsm8B
 10543	MOVQ  (DX)(SI*1), R9
 10544	MOVQ  (DX)(DI*1), R10
 10545	SHLQ  $0x10, R9
 10546	IMULQ BX, R9
 10547	SHRQ  $0x36, R9
 10548	SHLQ  $0x10, R10
 10549	IMULQ BX, R10
 10550	SHRQ  $0x36, R10
 10551	MOVL  SI, 24(SP)(R9*4)
 10552	MOVL  DI, 24(SP)(R10*4)
 10553	ADDQ  $0x02, SI
 10554	ADDQ  $0x02, DI
 10555	JMP   index_loop_encodeBetterBlockAsm8B
 10556
 10557emit_remainder_encodeBetterBlockAsm8B:
 10558	MOVQ src_len+32(FP), CX
 10559	SUBL 12(SP), CX
 10560	LEAQ 3(AX)(CX*1), CX
 10561	CMPQ CX, (SP)
 10562	JB   emit_remainder_ok_encodeBetterBlockAsm8B
 10563	MOVQ $0x00000000, ret+48(FP)
 10564	RET
 10565
 10566emit_remainder_ok_encodeBetterBlockAsm8B:
 10567	MOVQ src_len+32(FP), CX
 10568	MOVL 12(SP), BX
 10569	CMPL BX, CX
 10570	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
 10571	MOVL CX, SI
 10572	MOVL CX, 12(SP)
 10573	LEAQ (DX)(BX*1), CX
 10574	SUBL BX, SI
 10575	LEAL -1(SI), DX
 10576	CMPL DX, $0x3c
 10577	JB   one_byte_emit_remainder_encodeBetterBlockAsm8B
 10578	CMPL DX, $0x00000100
 10579	JB   two_bytes_emit_remainder_encodeBetterBlockAsm8B
 10580	JB   three_bytes_emit_remainder_encodeBetterBlockAsm8B
 10581
 10582three_bytes_emit_remainder_encodeBetterBlockAsm8B:
 10583	MOVB $0xf4, (AX)
 10584	MOVW DX, 1(AX)
 10585	ADDQ $0x03, AX
 10586	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
 10587
 10588two_bytes_emit_remainder_encodeBetterBlockAsm8B:
 10589	MOVB $0xf0, (AX)
 10590	MOVB DL, 1(AX)
 10591	ADDQ $0x02, AX
 10592	CMPL DX, $0x40
 10593	JB   memmove_emit_remainder_encodeBetterBlockAsm8B
 10594	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
 10595
 10596one_byte_emit_remainder_encodeBetterBlockAsm8B:
 10597	SHLB $0x02, DL
 10598	MOVB DL, (AX)
 10599	ADDQ $0x01, AX
 10600
 10601memmove_emit_remainder_encodeBetterBlockAsm8B:
 10602	LEAQ (AX)(SI*1), DX
 10603	MOVL SI, BX
 10604
 10605	// genMemMoveShort
 10606	CMPQ BX, $0x03
 10607	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2
 10608	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3
 10609	CMPQ BX, $0x08
 10610	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
 10611	CMPQ BX, $0x10
 10612	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
 10613	CMPQ BX, $0x20
 10614	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
 10615	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
 10616
 10617emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
 10618	MOVB (CX), SI
 10619	MOVB -1(CX)(BX*1), CL
 10620	MOVB SI, (AX)
 10621	MOVB CL, -1(AX)(BX*1)
 10622	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 10623
 10624emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
 10625	MOVW (CX), SI
 10626	MOVB 2(CX), CL
 10627	MOVW SI, (AX)
 10628	MOVB CL, 2(AX)
 10629	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 10630
 10631emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
 10632	MOVL (CX), SI
 10633	MOVL -4(CX)(BX*1), CX
 10634	MOVL SI, (AX)
 10635	MOVL CX, -4(AX)(BX*1)
 10636	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 10637
 10638emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
 10639	MOVQ (CX), SI
 10640	MOVQ -8(CX)(BX*1), CX
 10641	MOVQ SI, (AX)
 10642	MOVQ CX, -8(AX)(BX*1)
 10643	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 10644
 10645emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
 10646	MOVOU (CX), X0
 10647	MOVOU -16(CX)(BX*1), X1
 10648	MOVOU X0, (AX)
 10649	MOVOU X1, -16(AX)(BX*1)
 10650	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 10651
 10652emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
 10653	MOVOU (CX), X0
 10654	MOVOU 16(CX), X1
 10655	MOVOU -32(CX)(BX*1), X2
 10656	MOVOU -16(CX)(BX*1), X3
 10657	MOVOU X0, (AX)
 10658	MOVOU X1, 16(AX)
 10659	MOVOU X2, -32(AX)(BX*1)
 10660	MOVOU X3, -16(AX)(BX*1)
 10661
 10662memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
 10663	MOVQ DX, AX
 10664	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
 10665
 10666memmove_long_emit_remainder_encodeBetterBlockAsm8B:
 10667	LEAQ (AX)(SI*1), DX
 10668	MOVL SI, BX
 10669
 10670	// genMemMoveLong
 10671	MOVOU (CX), X0
 10672	MOVOU 16(CX), X1
 10673	MOVOU -32(CX)(BX*1), X2
 10674	MOVOU -16(CX)(BX*1), X3
 10675	MOVQ  BX, DI
 10676	SHRQ  $0x05, DI
 10677	MOVQ  AX, SI
 10678	ANDL  $0x0000001f, SI
 10679	MOVQ  $0x00000040, R8
 10680	SUBQ  SI, R8
 10681	DECQ  DI
 10682	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 10683	LEAQ  -32(CX)(R8*1), SI
 10684	LEAQ  -32(AX)(R8*1), R9
 10685
 10686emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
 10687	MOVOU (SI), X4
 10688	MOVOU 16(SI), X5
 10689	MOVOA X4, (R9)
 10690	MOVOA X5, 16(R9)
 10691	ADDQ  $0x20, R9
 10692	ADDQ  $0x20, SI
 10693	ADDQ  $0x20, R8
 10694	DECQ  DI
 10695	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
 10696
 10697emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
 10698	MOVOU -32(CX)(R8*1), X4
 10699	MOVOU -16(CX)(R8*1), X5
 10700	MOVOA X4, -32(AX)(R8*1)
 10701	MOVOA X5, -16(AX)(R8*1)
 10702	ADDQ  $0x20, R8
 10703	CMPQ  BX, R8
 10704	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 10705	MOVOU X0, (AX)
 10706	MOVOU X1, 16(AX)
 10707	MOVOU X2, -32(AX)(BX*1)
 10708	MOVOU X3, -16(AX)(BX*1)
 10709	MOVQ  DX, AX
 10710
 10711emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
 10712	MOVQ dst_base+0(FP), CX
 10713	SUBQ CX, AX
 10714	MOVQ AX, ret+48(FP)
 10715	RET
 10716
 10717// func encodeSnappyBlockAsm(dst []byte, src []byte) int
 10718// Requires: BMI, SSE2
 10719TEXT ·encodeSnappyBlockAsm(SB), $65560-56
 10720	MOVQ dst_base+0(FP), AX
 10721	MOVQ $0x00000200, CX
 10722	LEAQ 24(SP), DX
 10723	PXOR X0, X0
 10724
 10725zero_loop_encodeSnappyBlockAsm:
 10726	MOVOU X0, (DX)
 10727	MOVOU X0, 16(DX)
 10728	MOVOU X0, 32(DX)
 10729	MOVOU X0, 48(DX)
 10730	MOVOU X0, 64(DX)
 10731	MOVOU X0, 80(DX)
 10732	MOVOU X0, 96(DX)
 10733	MOVOU X0, 112(DX)
 10734	ADDQ  $0x80, DX
 10735	DECQ  CX
 10736	JNZ   zero_loop_encodeSnappyBlockAsm
 10737	MOVL  $0x00000000, 12(SP)
 10738	MOVQ  src_len+32(FP), CX
 10739	LEAQ  -9(CX), DX
 10740	LEAQ  -8(CX), BX
 10741	MOVL  BX, 8(SP)
 10742	SHRQ  $0x05, CX
 10743	SUBL  CX, DX
 10744	LEAQ  (AX)(DX*1), DX
 10745	MOVQ  DX, (SP)
 10746	MOVL  $0x00000001, CX
 10747	MOVL  CX, 16(SP)
 10748	MOVQ  src_base+24(FP), DX
 10749
 10750search_loop_encodeSnappyBlockAsm:
 10751	MOVL  CX, BX
 10752	SUBL  12(SP), BX
 10753	SHRL  $0x06, BX
 10754	LEAL  4(CX)(BX*1), BX
 10755	CMPL  BX, 8(SP)
 10756	JAE   emit_remainder_encodeSnappyBlockAsm
 10757	MOVQ  (DX)(CX*1), SI
 10758	MOVL  BX, 20(SP)
 10759	MOVQ  $0x0000cf1bbcdcbf9b, R8
 10760	MOVQ  SI, R9
 10761	MOVQ  SI, R10
 10762	SHRQ  $0x08, R10
 10763	SHLQ  $0x10, R9
 10764	IMULQ R8, R9
 10765	SHRQ  $0x32, R9
 10766	SHLQ  $0x10, R10
 10767	IMULQ R8, R10
 10768	SHRQ  $0x32, R10
 10769	MOVL  24(SP)(R9*4), BX
 10770	MOVL  24(SP)(R10*4), DI
 10771	MOVL  CX, 24(SP)(R9*4)
 10772	LEAL  1(CX), R9
 10773	MOVL  R9, 24(SP)(R10*4)
 10774	MOVQ  SI, R9
 10775	SHRQ  $0x10, R9
 10776	SHLQ  $0x10, R9
 10777	IMULQ R8, R9
 10778	SHRQ  $0x32, R9
 10779	MOVL  CX, R8
 10780	SUBL  16(SP), R8
 10781	MOVL  1(DX)(R8*1), R10
 10782	MOVQ  SI, R8
 10783	SHRQ  $0x08, R8
 10784	CMPL  R8, R10
 10785	JNE   no_repeat_found_encodeSnappyBlockAsm
 10786	LEAL  1(CX), SI
 10787	MOVL  12(SP), BX
 10788	MOVL  SI, DI
 10789	SUBL  16(SP), DI
 10790	JZ    repeat_extend_back_end_encodeSnappyBlockAsm
 10791
 10792repeat_extend_back_loop_encodeSnappyBlockAsm:
 10793	CMPL SI, BX
 10794	JBE  repeat_extend_back_end_encodeSnappyBlockAsm
 10795	MOVB -1(DX)(DI*1), R8
 10796	MOVB -1(DX)(SI*1), R9
 10797	CMPB R8, R9
 10798	JNE  repeat_extend_back_end_encodeSnappyBlockAsm
 10799	LEAL -1(SI), SI
 10800	DECL DI
 10801	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm
 10802
 10803repeat_extend_back_end_encodeSnappyBlockAsm:
 10804	MOVL SI, BX
 10805	SUBL 12(SP), BX
 10806	LEAQ 5(AX)(BX*1), BX
 10807	CMPQ BX, (SP)
 10808	JB   repeat_dst_size_check_encodeSnappyBlockAsm
 10809	MOVQ $0x00000000, ret+48(FP)
 10810	RET
 10811
 10812repeat_dst_size_check_encodeSnappyBlockAsm:
 10813	MOVL 12(SP), BX
 10814	CMPL BX, SI
 10815	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
 10816	MOVL SI, DI
 10817	MOVL SI, 12(SP)
 10818	LEAQ (DX)(BX*1), R8
 10819	SUBL BX, DI
 10820	LEAL -1(DI), BX
 10821	CMPL BX, $0x3c
 10822	JB   one_byte_repeat_emit_encodeSnappyBlockAsm
 10823	CMPL BX, $0x00000100
 10824	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm
 10825	CMPL BX, $0x00010000
 10826	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm
 10827	CMPL BX, $0x01000000
 10828	JB   four_bytes_repeat_emit_encodeSnappyBlockAsm
 10829	MOVB $0xfc, (AX)
 10830	MOVL BX, 1(AX)
 10831	ADDQ $0x05, AX
 10832	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 10833
 10834four_bytes_repeat_emit_encodeSnappyBlockAsm:
 10835	MOVL BX, R9
 10836	SHRL $0x10, R9
 10837	MOVB $0xf8, (AX)
 10838	MOVW BX, 1(AX)
 10839	MOVB R9, 3(AX)
 10840	ADDQ $0x04, AX
 10841	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 10842
 10843three_bytes_repeat_emit_encodeSnappyBlockAsm:
 10844	MOVB $0xf4, (AX)
 10845	MOVW BX, 1(AX)
 10846	ADDQ $0x03, AX
 10847	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 10848
 10849two_bytes_repeat_emit_encodeSnappyBlockAsm:
 10850	MOVB $0xf0, (AX)
 10851	MOVB BL, 1(AX)
 10852	ADDQ $0x02, AX
 10853	CMPL BX, $0x40
 10854	JB   memmove_repeat_emit_encodeSnappyBlockAsm
 10855	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 10856
 10857one_byte_repeat_emit_encodeSnappyBlockAsm:
 10858	SHLB $0x02, BL
 10859	MOVB BL, (AX)
 10860	ADDQ $0x01, AX
 10861
 10862memmove_repeat_emit_encodeSnappyBlockAsm:
 10863	LEAQ (AX)(DI*1), BX
 10864
 10865	// genMemMoveShort
 10866	CMPQ DI, $0x08
 10867	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
 10868	CMPQ DI, $0x10
 10869	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
 10870	CMPQ DI, $0x20
 10871	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
 10872	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
 10873
 10874emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
 10875	MOVQ (R8), R9
 10876	MOVQ R9, (AX)
 10877	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 10878
 10879emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
 10880	MOVQ (R8), R9
 10881	MOVQ -8(R8)(DI*1), R8
 10882	MOVQ R9, (AX)
 10883	MOVQ R8, -8(AX)(DI*1)
 10884	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 10885
 10886emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
 10887	MOVOU (R8), X0
 10888	MOVOU -16(R8)(DI*1), X1
 10889	MOVOU X0, (AX)
 10890	MOVOU X1, -16(AX)(DI*1)
 10891	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 10892
 10893emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
 10894	MOVOU (R8), X0
 10895	MOVOU 16(R8), X1
 10896	MOVOU -32(R8)(DI*1), X2
 10897	MOVOU -16(R8)(DI*1), X3
 10898	MOVOU X0, (AX)
 10899	MOVOU X1, 16(AX)
 10900	MOVOU X2, -32(AX)(DI*1)
 10901	MOVOU X3, -16(AX)(DI*1)
 10902
 10903memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
 10904	MOVQ BX, AX
 10905	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
 10906
 10907memmove_long_repeat_emit_encodeSnappyBlockAsm:
 10908	LEAQ (AX)(DI*1), BX
 10909
 10910	// genMemMoveLong
 10911	MOVOU (R8), X0
 10912	MOVOU 16(R8), X1
 10913	MOVOU -32(R8)(DI*1), X2
 10914	MOVOU -16(R8)(DI*1), X3
 10915	MOVQ  DI, R10
 10916	SHRQ  $0x05, R10
 10917	MOVQ  AX, R9
 10918	ANDL  $0x0000001f, R9
 10919	MOVQ  $0x00000040, R11
 10920	SUBQ  R9, R11
 10921	DECQ  R10
 10922	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 10923	LEAQ  -32(R8)(R11*1), R9
 10924	LEAQ  -32(AX)(R11*1), R12
 10925
 10926emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
 10927	MOVOU (R9), X4
 10928	MOVOU 16(R9), X5
 10929	MOVOA X4, (R12)
 10930	MOVOA X5, 16(R12)
 10931	ADDQ  $0x20, R12
 10932	ADDQ  $0x20, R9
 10933	ADDQ  $0x20, R11
 10934	DECQ  R10
 10935	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
 10936
 10937emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
 10938	MOVOU -32(R8)(R11*1), X4
 10939	MOVOU -16(R8)(R11*1), X5
 10940	MOVOA X4, -32(AX)(R11*1)
 10941	MOVOA X5, -16(AX)(R11*1)
 10942	ADDQ  $0x20, R11
 10943	CMPQ  DI, R11
 10944	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 10945	MOVOU X0, (AX)
 10946	MOVOU X1, 16(AX)
 10947	MOVOU X2, -32(AX)(DI*1)
 10948	MOVOU X3, -16(AX)(DI*1)
 10949	MOVQ  BX, AX
 10950
 10951emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
 10952	ADDL $0x05, CX
 10953	MOVL CX, BX
 10954	SUBL 16(SP), BX
 10955	MOVQ src_len+32(FP), DI
 10956	SUBL CX, DI
 10957	LEAQ (DX)(CX*1), R8
 10958	LEAQ (DX)(BX*1), BX
 10959
 10960	// matchLen
 10961	XORL R10, R10
 10962
 10963matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
 10964	CMPL DI, $0x10
 10965	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm
 10966	MOVQ (R8)(R10*1), R9
 10967	MOVQ 8(R8)(R10*1), R11
 10968	XORQ (BX)(R10*1), R9
 10969	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
 10970	XORQ 8(BX)(R10*1), R11
 10971	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
 10972	LEAL -16(DI), DI
 10973	LEAL 16(R10), R10
 10974	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
 10975
 10976matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
 10977#ifdef GOAMD64_v3
 10978	TZCNTQ R11, R11
 10979
 10980#else
 10981	BSFQ R11, R11
 10982
 10983#endif
 10984	SARQ $0x03, R11
 10985	LEAL 8(R10)(R11*1), R10
 10986	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
 10987
 10988matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
 10989	CMPL DI, $0x08
 10990	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm
 10991	MOVQ (R8)(R10*1), R9
 10992	XORQ (BX)(R10*1), R9
 10993	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
 10994	LEAL -8(DI), DI
 10995	LEAL 8(R10), R10
 10996	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm
 10997
 10998matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
 10999#ifdef GOAMD64_v3
 11000	TZCNTQ R9, R9
 11001
 11002#else
 11003	BSFQ R9, R9
 11004
 11005#endif
 11006	SARQ $0x03, R9
 11007	LEAL (R10)(R9*1), R10
 11008	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
 11009
 11010matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
 11011	CMPL DI, $0x04
 11012	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm
 11013	MOVL (R8)(R10*1), R9
 11014	CMPL (BX)(R10*1), R9
 11015	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm
 11016	LEAL -4(DI), DI
 11017	LEAL 4(R10), R10
 11018
 11019matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
 11020	CMPL DI, $0x01
 11021	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm
 11022	JB   repeat_extend_forward_end_encodeSnappyBlockAsm
 11023	MOVW (R8)(R10*1), R9
 11024	CMPW (BX)(R10*1), R9
 11025	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm
 11026	LEAL 2(R10), R10
 11027	SUBL $0x02, DI
 11028	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm
 11029
 11030matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
 11031	MOVB (R8)(R10*1), R9
 11032	CMPB (BX)(R10*1), R9
 11033	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm
 11034	LEAL 1(R10), R10
 11035
 11036repeat_extend_forward_end_encodeSnappyBlockAsm:
 11037	ADDL R10, CX
 11038	MOVL CX, BX
 11039	SUBL SI, BX
 11040	MOVL 16(SP), SI
 11041
 11042	// emitCopy
 11043	CMPL SI, $0x00010000
 11044	JB   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
 11045
 11046four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
 11047	CMPL BX, $0x40
 11048	JBE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
 11049	MOVB $0xff, (AX)
 11050	MOVL SI, 1(AX)
 11051	LEAL -64(BX), BX
 11052	ADDQ $0x05, AX
 11053	CMPL BX, $0x04
 11054	JB   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
 11055	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
 11056
 11057four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
 11058	TESTL BX, BX
 11059	JZ    repeat_end_emit_encodeSnappyBlockAsm
 11060	XORL  DI, DI
 11061	LEAL  -1(DI)(BX*4), BX
 11062	MOVB  BL, (AX)
 11063	MOVL  SI, 1(AX)
 11064	ADDQ  $0x05, AX
 11065	JMP   repeat_end_emit_encodeSnappyBlockAsm
 11066
 11067two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
 11068	CMPL BX, $0x40
 11069	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
 11070	MOVB $0xee, (AX)
 11071	MOVW SI, 1(AX)
 11072	LEAL -60(BX), BX
 11073	ADDQ $0x03, AX
 11074	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
 11075
 11076two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
 11077	MOVL BX, DI
 11078	SHLL $0x02, DI
 11079	CMPL BX, $0x0c
 11080	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
 11081	CMPL SI, $0x00000800
 11082	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
 11083	LEAL -15(DI), DI
 11084	MOVB SI, 1(AX)
 11085	SHRL $0x08, SI
 11086	SHLL $0x05, SI
 11087	ORL  SI, DI
 11088	MOVB DI, (AX)
 11089	ADDQ $0x02, AX
 11090	JMP  repeat_end_emit_encodeSnappyBlockAsm
 11091
 11092emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
 11093	LEAL -2(DI), DI
 11094	MOVB DI, (AX)
 11095	MOVW SI, 1(AX)
 11096	ADDQ $0x03, AX
 11097
 11098repeat_end_emit_encodeSnappyBlockAsm:
 11099	MOVL CX, 12(SP)
 11100	JMP  search_loop_encodeSnappyBlockAsm
 11101
 11102no_repeat_found_encodeSnappyBlockAsm:
 11103	CMPL (DX)(BX*1), SI
 11104	JEQ  candidate_match_encodeSnappyBlockAsm
 11105	SHRQ $0x08, SI
 11106	MOVL 24(SP)(R9*4), BX
 11107	LEAL 2(CX), R8
 11108	CMPL (DX)(DI*1), SI
 11109	JEQ  candidate2_match_encodeSnappyBlockAsm
 11110	MOVL R8, 24(SP)(R9*4)
 11111	SHRQ $0x08, SI
 11112	CMPL (DX)(BX*1), SI
 11113	JEQ  candidate3_match_encodeSnappyBlockAsm
 11114	MOVL 20(SP), CX
 11115	JMP  search_loop_encodeSnappyBlockAsm
 11116
 11117candidate3_match_encodeSnappyBlockAsm:
 11118	ADDL $0x02, CX
 11119	JMP  candidate_match_encodeSnappyBlockAsm
 11120
 11121candidate2_match_encodeSnappyBlockAsm:
 11122	MOVL R8, 24(SP)(R9*4)
 11123	INCL CX
 11124	MOVL DI, BX
 11125
 11126candidate_match_encodeSnappyBlockAsm:
 11127	MOVL  12(SP), SI
 11128	TESTL BX, BX
 11129	JZ    match_extend_back_end_encodeSnappyBlockAsm
 11130
 11131match_extend_back_loop_encodeSnappyBlockAsm:
 11132	CMPL CX, SI
 11133	JBE  match_extend_back_end_encodeSnappyBlockAsm
 11134	MOVB -1(DX)(BX*1), DI
 11135	MOVB -1(DX)(CX*1), R8
 11136	CMPB DI, R8
 11137	JNE  match_extend_back_end_encodeSnappyBlockAsm
 11138	LEAL -1(CX), CX
 11139	DECL BX
 11140	JZ   match_extend_back_end_encodeSnappyBlockAsm
 11141	JMP  match_extend_back_loop_encodeSnappyBlockAsm
 11142
 11143match_extend_back_end_encodeSnappyBlockAsm:
 11144	MOVL CX, SI
 11145	SUBL 12(SP), SI
 11146	LEAQ 5(AX)(SI*1), SI
 11147	CMPQ SI, (SP)
 11148	JB   match_dst_size_check_encodeSnappyBlockAsm
 11149	MOVQ $0x00000000, ret+48(FP)
 11150	RET
 11151
 11152match_dst_size_check_encodeSnappyBlockAsm:
 11153	MOVL CX, SI
 11154	MOVL 12(SP), DI
 11155	CMPL DI, SI
 11156	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm
 11157	MOVL SI, R8
 11158	MOVL SI, 12(SP)
 11159	LEAQ (DX)(DI*1), SI
 11160	SUBL DI, R8
 11161	LEAL -1(R8), DI
 11162	CMPL DI, $0x3c
 11163	JB   one_byte_match_emit_encodeSnappyBlockAsm
 11164	CMPL DI, $0x00000100
 11165	JB   two_bytes_match_emit_encodeSnappyBlockAsm
 11166	CMPL DI, $0x00010000
 11167	JB   three_bytes_match_emit_encodeSnappyBlockAsm
 11168	CMPL DI, $0x01000000
 11169	JB   four_bytes_match_emit_encodeSnappyBlockAsm
 11170	MOVB $0xfc, (AX)
 11171	MOVL DI, 1(AX)
 11172	ADDQ $0x05, AX
 11173	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 11174
 11175four_bytes_match_emit_encodeSnappyBlockAsm:
 11176	MOVL DI, R9
 11177	SHRL $0x10, R9
 11178	MOVB $0xf8, (AX)
 11179	MOVW DI, 1(AX)
 11180	MOVB R9, 3(AX)
 11181	ADDQ $0x04, AX
 11182	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 11183
 11184three_bytes_match_emit_encodeSnappyBlockAsm:
 11185	MOVB $0xf4, (AX)
 11186	MOVW DI, 1(AX)
 11187	ADDQ $0x03, AX
 11188	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 11189
 11190two_bytes_match_emit_encodeSnappyBlockAsm:
 11191	MOVB $0xf0, (AX)
 11192	MOVB DI, 1(AX)
 11193	ADDQ $0x02, AX
 11194	CMPL DI, $0x40
 11195	JB   memmove_match_emit_encodeSnappyBlockAsm
 11196	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 11197
 11198one_byte_match_emit_encodeSnappyBlockAsm:
 11199	SHLB $0x02, DI
 11200	MOVB DI, (AX)
 11201	ADDQ $0x01, AX
 11202
 11203memmove_match_emit_encodeSnappyBlockAsm:
 11204	LEAQ (AX)(R8*1), DI
 11205
 11206	// genMemMoveShort
 11207	CMPQ R8, $0x08
 11208	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
 11209	CMPQ R8, $0x10
 11210	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
 11211	CMPQ R8, $0x20
 11212	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
 11213	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
 11214
 11215emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
 11216	MOVQ (SI), R9
 11217	MOVQ R9, (AX)
 11218	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
 11219
 11220emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
 11221	MOVQ (SI), R9
 11222	MOVQ -8(SI)(R8*1), SI
 11223	MOVQ R9, (AX)
 11224	MOVQ SI, -8(AX)(R8*1)
 11225	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
 11226
 11227emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
 11228	MOVOU (SI), X0
 11229	MOVOU -16(SI)(R8*1), X1
 11230	MOVOU X0, (AX)
 11231	MOVOU X1, -16(AX)(R8*1)
 11232	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm
 11233
 11234emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
 11235	MOVOU (SI), X0
 11236	MOVOU 16(SI), X1
 11237	MOVOU -32(SI)(R8*1), X2
 11238	MOVOU -16(SI)(R8*1), X3
 11239	MOVOU X0, (AX)
 11240	MOVOU X1, 16(AX)
 11241	MOVOU X2, -32(AX)(R8*1)
 11242	MOVOU X3, -16(AX)(R8*1)
 11243
 11244memmove_end_copy_match_emit_encodeSnappyBlockAsm:
 11245	MOVQ DI, AX
 11246	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm
 11247
 11248memmove_long_match_emit_encodeSnappyBlockAsm:
 11249	LEAQ (AX)(R8*1), DI
 11250
 11251	// genMemMoveLong
 11252	MOVOU (SI), X0
 11253	MOVOU 16(SI), X1
 11254	MOVOU -32(SI)(R8*1), X2
 11255	MOVOU -16(SI)(R8*1), X3
 11256	MOVQ  R8, R10
 11257	SHRQ  $0x05, R10
 11258	MOVQ  AX, R9
 11259	ANDL  $0x0000001f, R9
 11260	MOVQ  $0x00000040, R11
 11261	SUBQ  R9, R11
 11262	DECQ  R10
 11263	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 11264	LEAQ  -32(SI)(R11*1), R9
 11265	LEAQ  -32(AX)(R11*1), R12
 11266
 11267emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
 11268	MOVOU (R9), X4
 11269	MOVOU 16(R9), X5
 11270	MOVOA X4, (R12)
 11271	MOVOA X5, 16(R12)
 11272	ADDQ  $0x20, R12
 11273	ADDQ  $0x20, R9
 11274	ADDQ  $0x20, R11
 11275	DECQ  R10
 11276	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
 11277
 11278emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
 11279	MOVOU -32(SI)(R11*1), X4
 11280	MOVOU -16(SI)(R11*1), X5
 11281	MOVOA X4, -32(AX)(R11*1)
 11282	MOVOA X5, -16(AX)(R11*1)
 11283	ADDQ  $0x20, R11
 11284	CMPQ  R8, R11
 11285	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 11286	MOVOU X0, (AX)
 11287	MOVOU X1, 16(AX)
 11288	MOVOU X2, -32(AX)(R8*1)
 11289	MOVOU X3, -16(AX)(R8*1)
 11290	MOVQ  DI, AX
 11291
 11292emit_literal_done_match_emit_encodeSnappyBlockAsm:
 11293match_nolit_loop_encodeSnappyBlockAsm:
 11294	MOVL CX, SI
 11295	SUBL BX, SI
 11296	MOVL SI, 16(SP)
 11297	ADDL $0x04, CX
 11298	ADDL $0x04, BX
 11299	MOVQ src_len+32(FP), SI
 11300	SUBL CX, SI
 11301	LEAQ (DX)(CX*1), DI
 11302	LEAQ (DX)(BX*1), BX
 11303
 11304	// matchLen
 11305	XORL R9, R9
 11306
 11307matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
 11308	CMPL SI, $0x10
 11309	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm
 11310	MOVQ (DI)(R9*1), R8
 11311	MOVQ 8(DI)(R9*1), R10
 11312	XORQ (BX)(R9*1), R8
 11313	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
 11314	XORQ 8(BX)(R9*1), R10
 11315	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
 11316	LEAL -16(SI), SI
 11317	LEAL 16(R9), R9
 11318	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
 11319
 11320matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
 11321#ifdef GOAMD64_v3
 11322	TZCNTQ R10, R10
 11323
 11324#else
 11325	BSFQ R10, R10
 11326
 11327#endif
 11328	SARQ $0x03, R10
 11329	LEAL 8(R9)(R10*1), R9
 11330	JMP  match_nolit_end_encodeSnappyBlockAsm
 11331
 11332matchlen_match8_match_nolit_encodeSnappyBlockAsm:
 11333	CMPL SI, $0x08
 11334	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm
 11335	MOVQ (DI)(R9*1), R8
 11336	XORQ (BX)(R9*1), R8
 11337	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
 11338	LEAL -8(SI), SI
 11339	LEAL 8(R9), R9
 11340	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm
 11341
 11342matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
 11343#ifdef GOAMD64_v3
 11344	TZCNTQ R8, R8
 11345
 11346#else
 11347	BSFQ R8, R8
 11348
 11349#endif
 11350	SARQ $0x03, R8
 11351	LEAL (R9)(R8*1), R9
 11352	JMP  match_nolit_end_encodeSnappyBlockAsm
 11353
 11354matchlen_match4_match_nolit_encodeSnappyBlockAsm:
 11355	CMPL SI, $0x04
 11356	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm
 11357	MOVL (DI)(R9*1), R8
 11358	CMPL (BX)(R9*1), R8
 11359	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm
 11360	LEAL -4(SI), SI
 11361	LEAL 4(R9), R9
 11362
 11363matchlen_match2_match_nolit_encodeSnappyBlockAsm:
 11364	CMPL SI, $0x01
 11365	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm
 11366	JB   match_nolit_end_encodeSnappyBlockAsm
 11367	MOVW (DI)(R9*1), R8
 11368	CMPW (BX)(R9*1), R8
 11369	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm
 11370	LEAL 2(R9), R9
 11371	SUBL $0x02, SI
 11372	JZ   match_nolit_end_encodeSnappyBlockAsm
 11373
 11374matchlen_match1_match_nolit_encodeSnappyBlockAsm:
 11375	MOVB (DI)(R9*1), R8
 11376	CMPB (BX)(R9*1), R8
 11377	JNE  match_nolit_end_encodeSnappyBlockAsm
 11378	LEAL 1(R9), R9
 11379
 11380match_nolit_end_encodeSnappyBlockAsm:
 11381	ADDL R9, CX
 11382	MOVL 16(SP), BX
 11383	ADDL $0x04, R9
 11384	MOVL CX, 12(SP)
 11385
 11386	// emitCopy
 11387	CMPL BX, $0x00010000
 11388	JB   two_byte_offset_match_nolit_encodeSnappyBlockAsm
 11389
 11390four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
 11391	CMPL R9, $0x40
 11392	JBE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm
 11393	MOVB $0xff, (AX)
 11394	MOVL BX, 1(AX)
 11395	LEAL -64(R9), R9
 11396	ADDQ $0x05, AX
 11397	CMPL R9, $0x04
 11398	JB   four_bytes_remain_match_nolit_encodeSnappyBlockAsm
 11399	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
 11400
 11401four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
 11402	TESTL R9, R9
 11403	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm
 11404	XORL  SI, SI
 11405	LEAL  -1(SI)(R9*4), R9
 11406	MOVB  R9, (AX)
 11407	MOVL  BX, 1(AX)
 11408	ADDQ  $0x05, AX
 11409	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm
 11410
 11411two_byte_offset_match_nolit_encodeSnappyBlockAsm:
 11412	CMPL R9, $0x40
 11413	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
 11414	MOVB $0xee, (AX)
 11415	MOVW BX, 1(AX)
 11416	LEAL -60(R9), R9
 11417	ADDQ $0x03, AX
 11418	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm
 11419
 11420two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
 11421	MOVL R9, SI
 11422	SHLL $0x02, SI
 11423	CMPL R9, $0x0c
 11424	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
 11425	CMPL BX, $0x00000800
 11426	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
 11427	LEAL -15(SI), SI
 11428	MOVB BL, 1(AX)
 11429	SHRL $0x08, BX
 11430	SHLL $0x05, BX
 11431	ORL  BX, SI
 11432	MOVB SI, (AX)
 11433	ADDQ $0x02, AX
 11434	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm
 11435
 11436emit_copy_three_match_nolit_encodeSnappyBlockAsm:
 11437	LEAL -2(SI), SI
 11438	MOVB SI, (AX)
 11439	MOVW BX, 1(AX)
 11440	ADDQ $0x03, AX
 11441
 11442match_nolit_emitcopy_end_encodeSnappyBlockAsm:
 11443	CMPL CX, 8(SP)
 11444	JAE  emit_remainder_encodeSnappyBlockAsm
 11445	MOVQ -2(DX)(CX*1), SI
 11446	CMPQ AX, (SP)
 11447	JB   match_nolit_dst_ok_encodeSnappyBlockAsm
 11448	MOVQ $0x00000000, ret+48(FP)
 11449	RET
 11450
 11451match_nolit_dst_ok_encodeSnappyBlockAsm:
 11452	MOVQ  $0x0000cf1bbcdcbf9b, R8
 11453	MOVQ  SI, DI
 11454	SHRQ  $0x10, SI
 11455	MOVQ  SI, BX
 11456	SHLQ  $0x10, DI
 11457	IMULQ R8, DI
 11458	SHRQ  $0x32, DI
 11459	SHLQ  $0x10, BX
 11460	IMULQ R8, BX
 11461	SHRQ  $0x32, BX
 11462	LEAL  -2(CX), R8
 11463	LEAQ  24(SP)(BX*4), R9
 11464	MOVL  (R9), BX
 11465	MOVL  R8, 24(SP)(DI*4)
 11466	MOVL  CX, (R9)
 11467	CMPL  (DX)(BX*1), SI
 11468	JEQ   match_nolit_loop_encodeSnappyBlockAsm
 11469	INCL  CX
 11470	JMP   search_loop_encodeSnappyBlockAsm
 11471
 11472emit_remainder_encodeSnappyBlockAsm:
 11473	MOVQ src_len+32(FP), CX
 11474	SUBL 12(SP), CX
 11475	LEAQ 5(AX)(CX*1), CX
 11476	CMPQ CX, (SP)
 11477	JB   emit_remainder_ok_encodeSnappyBlockAsm
 11478	MOVQ $0x00000000, ret+48(FP)
 11479	RET
 11480
 11481emit_remainder_ok_encodeSnappyBlockAsm:
 11482	MOVQ src_len+32(FP), CX
 11483	MOVL 12(SP), BX
 11484	CMPL BX, CX
 11485	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
 11486	MOVL CX, SI
 11487	MOVL CX, 12(SP)
 11488	LEAQ (DX)(BX*1), CX
 11489	SUBL BX, SI
 11490	LEAL -1(SI), DX
 11491	CMPL DX, $0x3c
 11492	JB   one_byte_emit_remainder_encodeSnappyBlockAsm
 11493	CMPL DX, $0x00000100
 11494	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm
 11495	CMPL DX, $0x00010000
 11496	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm
 11497	CMPL DX, $0x01000000
 11498	JB   four_bytes_emit_remainder_encodeSnappyBlockAsm
 11499	MOVB $0xfc, (AX)
 11500	MOVL DX, 1(AX)
 11501	ADDQ $0x05, AX
 11502	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 11503
 11504four_bytes_emit_remainder_encodeSnappyBlockAsm:
 11505	MOVL DX, BX
 11506	SHRL $0x10, BX
 11507	MOVB $0xf8, (AX)
 11508	MOVW DX, 1(AX)
 11509	MOVB BL, 3(AX)
 11510	ADDQ $0x04, AX
 11511	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 11512
 11513three_bytes_emit_remainder_encodeSnappyBlockAsm:
 11514	MOVB $0xf4, (AX)
 11515	MOVW DX, 1(AX)
 11516	ADDQ $0x03, AX
 11517	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 11518
 11519two_bytes_emit_remainder_encodeSnappyBlockAsm:
 11520	MOVB $0xf0, (AX)
 11521	MOVB DL, 1(AX)
 11522	ADDQ $0x02, AX
 11523	CMPL DX, $0x40
 11524	JB   memmove_emit_remainder_encodeSnappyBlockAsm
 11525	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 11526
 11527one_byte_emit_remainder_encodeSnappyBlockAsm:
 11528	SHLB $0x02, DL
 11529	MOVB DL, (AX)
 11530	ADDQ $0x01, AX
 11531
 11532memmove_emit_remainder_encodeSnappyBlockAsm:
 11533	LEAQ (AX)(SI*1), DX
 11534	MOVL SI, BX
 11535
 11536	// genMemMoveShort
 11537	CMPQ BX, $0x03
 11538	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
 11539	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
 11540	CMPQ BX, $0x08
 11541	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7
 11542	CMPQ BX, $0x10
 11543	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
 11544	CMPQ BX, $0x20
 11545	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
 11546	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
 11547
 11548emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
 11549	MOVB (CX), SI
 11550	MOVB -1(CX)(BX*1), CL
 11551	MOVB SI, (AX)
 11552	MOVB CL, -1(AX)(BX*1)
 11553	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 11554
 11555emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
 11556	MOVW (CX), SI
 11557	MOVB 2(CX), CL
 11558	MOVW SI, (AX)
 11559	MOVB CL, 2(AX)
 11560	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 11561
 11562emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
 11563	MOVL (CX), SI
 11564	MOVL -4(CX)(BX*1), CX
 11565	MOVL SI, (AX)
 11566	MOVL CX, -4(AX)(BX*1)
 11567	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 11568
 11569emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
 11570	MOVQ (CX), SI
 11571	MOVQ -8(CX)(BX*1), CX
 11572	MOVQ SI, (AX)
 11573	MOVQ CX, -8(AX)(BX*1)
 11574	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 11575
 11576emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
 11577	MOVOU (CX), X0
 11578	MOVOU -16(CX)(BX*1), X1
 11579	MOVOU X0, (AX)
 11580	MOVOU X1, -16(AX)(BX*1)
 11581	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 11582
 11583emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
 11584	MOVOU (CX), X0
 11585	MOVOU 16(CX), X1
 11586	MOVOU -32(CX)(BX*1), X2
 11587	MOVOU -16(CX)(BX*1), X3
 11588	MOVOU X0, (AX)
 11589	MOVOU X1, 16(AX)
 11590	MOVOU X2, -32(AX)(BX*1)
 11591	MOVOU X3, -16(AX)(BX*1)
 11592
 11593memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
 11594	MOVQ DX, AX
 11595	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
 11596
 11597memmove_long_emit_remainder_encodeSnappyBlockAsm:
 11598	LEAQ (AX)(SI*1), DX
 11599	MOVL SI, BX
 11600
 11601	// genMemMoveLong
 11602	MOVOU (CX), X0
 11603	MOVOU 16(CX), X1
 11604	MOVOU -32(CX)(BX*1), X2
 11605	MOVOU -16(CX)(BX*1), X3
 11606	MOVQ  BX, DI
 11607	SHRQ  $0x05, DI
 11608	MOVQ  AX, SI
 11609	ANDL  $0x0000001f, SI
 11610	MOVQ  $0x00000040, R8
 11611	SUBQ  SI, R8
 11612	DECQ  DI
 11613	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 11614	LEAQ  -32(CX)(R8*1), SI
 11615	LEAQ  -32(AX)(R8*1), R9
 11616
 11617emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
 11618	MOVOU (SI), X4
 11619	MOVOU 16(SI), X5
 11620	MOVOA X4, (R9)
 11621	MOVOA X5, 16(R9)
 11622	ADDQ  $0x20, R9
 11623	ADDQ  $0x20, SI
 11624	ADDQ  $0x20, R8
 11625	DECQ  DI
 11626	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
 11627
 11628emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
 11629	MOVOU -32(CX)(R8*1), X4
 11630	MOVOU -16(CX)(R8*1), X5
 11631	MOVOA X4, -32(AX)(R8*1)
 11632	MOVOA X5, -16(AX)(R8*1)
 11633	ADDQ  $0x20, R8
 11634	CMPQ  BX, R8
 11635	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 11636	MOVOU X0, (AX)
 11637	MOVOU X1, 16(AX)
 11638	MOVOU X2, -32(AX)(BX*1)
 11639	MOVOU X3, -16(AX)(BX*1)
 11640	MOVQ  DX, AX
 11641
 11642emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
 11643	MOVQ dst_base+0(FP), CX
 11644	SUBQ CX, AX
 11645	MOVQ AX, ret+48(FP)
 11646	RET
 11647
 11648// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
 11649// Requires: BMI, SSE2
 11650TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56
 11651	MOVQ dst_base+0(FP), AX
 11652	MOVQ $0x00000200, CX
 11653	LEAQ 24(SP), DX
 11654	PXOR X0, X0
 11655
 11656zero_loop_encodeSnappyBlockAsm64K:
 11657	MOVOU X0, (DX)
 11658	MOVOU X0, 16(DX)
 11659	MOVOU X0, 32(DX)
 11660	MOVOU X0, 48(DX)
 11661	MOVOU X0, 64(DX)
 11662	MOVOU X0, 80(DX)
 11663	MOVOU X0, 96(DX)
 11664	MOVOU X0, 112(DX)
 11665	ADDQ  $0x80, DX
 11666	DECQ  CX
 11667	JNZ   zero_loop_encodeSnappyBlockAsm64K
 11668	MOVL  $0x00000000, 12(SP)
 11669	MOVQ  src_len+32(FP), CX
 11670	LEAQ  -9(CX), DX
 11671	LEAQ  -8(CX), BX
 11672	MOVL  BX, 8(SP)
 11673	SHRQ  $0x05, CX
 11674	SUBL  CX, DX
 11675	LEAQ  (AX)(DX*1), DX
 11676	MOVQ  DX, (SP)
 11677	MOVL  $0x00000001, CX
 11678	MOVL  CX, 16(SP)
 11679	MOVQ  src_base+24(FP), DX
 11680
 11681search_loop_encodeSnappyBlockAsm64K:
 11682	MOVL  CX, BX
 11683	SUBL  12(SP), BX
 11684	SHRL  $0x06, BX
 11685	LEAL  4(CX)(BX*1), BX
 11686	CMPL  BX, 8(SP)
 11687	JAE   emit_remainder_encodeSnappyBlockAsm64K
 11688	MOVQ  (DX)(CX*1), SI
 11689	MOVL  BX, 20(SP)
 11690	MOVQ  $0x0000cf1bbcdcbf9b, R8
 11691	MOVQ  SI, R9
 11692	MOVQ  SI, R10
 11693	SHRQ  $0x08, R10
 11694	SHLQ  $0x10, R9
 11695	IMULQ R8, R9
 11696	SHRQ  $0x32, R9
 11697	SHLQ  $0x10, R10
 11698	IMULQ R8, R10
 11699	SHRQ  $0x32, R10
 11700	MOVL  24(SP)(R9*4), BX
 11701	MOVL  24(SP)(R10*4), DI
 11702	MOVL  CX, 24(SP)(R9*4)
 11703	LEAL  1(CX), R9
 11704	MOVL  R9, 24(SP)(R10*4)
 11705	MOVQ  SI, R9
 11706	SHRQ  $0x10, R9
 11707	SHLQ  $0x10, R9
 11708	IMULQ R8, R9
 11709	SHRQ  $0x32, R9
 11710	MOVL  CX, R8
 11711	SUBL  16(SP), R8
 11712	MOVL  1(DX)(R8*1), R10
 11713	MOVQ  SI, R8
 11714	SHRQ  $0x08, R8
 11715	CMPL  R8, R10
 11716	JNE   no_repeat_found_encodeSnappyBlockAsm64K
 11717	LEAL  1(CX), SI
 11718	MOVL  12(SP), BX
 11719	MOVL  SI, DI
 11720	SUBL  16(SP), DI
 11721	JZ    repeat_extend_back_end_encodeSnappyBlockAsm64K
 11722
 11723repeat_extend_back_loop_encodeSnappyBlockAsm64K:
 11724	CMPL SI, BX
 11725	JBE  repeat_extend_back_end_encodeSnappyBlockAsm64K
 11726	MOVB -1(DX)(DI*1), R8
 11727	MOVB -1(DX)(SI*1), R9
 11728	CMPB R8, R9
 11729	JNE  repeat_extend_back_end_encodeSnappyBlockAsm64K
 11730	LEAL -1(SI), SI
 11731	DECL DI
 11732	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm64K
 11733
 11734repeat_extend_back_end_encodeSnappyBlockAsm64K:
 11735	MOVL SI, BX
 11736	SUBL 12(SP), BX
 11737	LEAQ 3(AX)(BX*1), BX
 11738	CMPQ BX, (SP)
 11739	JB   repeat_dst_size_check_encodeSnappyBlockAsm64K
 11740	MOVQ $0x00000000, ret+48(FP)
 11741	RET
 11742
 11743repeat_dst_size_check_encodeSnappyBlockAsm64K:
 11744	MOVL 12(SP), BX
 11745	CMPL BX, SI
 11746	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
 11747	MOVL SI, DI
 11748	MOVL SI, 12(SP)
 11749	LEAQ (DX)(BX*1), R8
 11750	SUBL BX, DI
 11751	LEAL -1(DI), BX
 11752	CMPL BX, $0x3c
 11753	JB   one_byte_repeat_emit_encodeSnappyBlockAsm64K
 11754	CMPL BX, $0x00000100
 11755	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm64K
 11756	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm64K
 11757
 11758three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
 11759	MOVB $0xf4, (AX)
 11760	MOVW BX, 1(AX)
 11761	ADDQ $0x03, AX
 11762	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
 11763
 11764two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
 11765	MOVB $0xf0, (AX)
 11766	MOVB BL, 1(AX)
 11767	ADDQ $0x02, AX
 11768	CMPL BX, $0x40
 11769	JB   memmove_repeat_emit_encodeSnappyBlockAsm64K
 11770	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
 11771
 11772one_byte_repeat_emit_encodeSnappyBlockAsm64K:
 11773	SHLB $0x02, BL
 11774	MOVB BL, (AX)
 11775	ADDQ $0x01, AX
 11776
 11777memmove_repeat_emit_encodeSnappyBlockAsm64K:
 11778	LEAQ (AX)(DI*1), BX
 11779
 11780	// genMemMoveShort
 11781	CMPQ DI, $0x08
 11782	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
 11783	CMPQ DI, $0x10
 11784	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
 11785	CMPQ DI, $0x20
 11786	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
 11787	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
 11788
 11789emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
 11790	MOVQ (R8), R9
 11791	MOVQ R9, (AX)
 11792	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 11793
 11794emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
 11795	MOVQ (R8), R9
 11796	MOVQ -8(R8)(DI*1), R8
 11797	MOVQ R9, (AX)
 11798	MOVQ R8, -8(AX)(DI*1)
 11799	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 11800
 11801emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
 11802	MOVOU (R8), X0
 11803	MOVOU -16(R8)(DI*1), X1
 11804	MOVOU X0, (AX)
 11805	MOVOU X1, -16(AX)(DI*1)
 11806	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 11807
 11808emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
 11809	MOVOU (R8), X0
 11810	MOVOU 16(R8), X1
 11811	MOVOU -32(R8)(DI*1), X2
 11812	MOVOU -16(R8)(DI*1), X3
 11813	MOVOU X0, (AX)
 11814	MOVOU X1, 16(AX)
 11815	MOVOU X2, -32(AX)(DI*1)
 11816	MOVOU X3, -16(AX)(DI*1)
 11817
 11818memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
 11819	MOVQ BX, AX
 11820	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
 11821
 11822memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
 11823	LEAQ (AX)(DI*1), BX
 11824
 11825	// genMemMoveLong
 11826	MOVOU (R8), X0
 11827	MOVOU 16(R8), X1
 11828	MOVOU -32(R8)(DI*1), X2
 11829	MOVOU -16(R8)(DI*1), X3
 11830	MOVQ  DI, R10
 11831	SHRQ  $0x05, R10
 11832	MOVQ  AX, R9
 11833	ANDL  $0x0000001f, R9
 11834	MOVQ  $0x00000040, R11
 11835	SUBQ  R9, R11
 11836	DECQ  R10
 11837	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 11838	LEAQ  -32(R8)(R11*1), R9
 11839	LEAQ  -32(AX)(R11*1), R12
 11840
 11841emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
 11842	MOVOU (R9), X4
 11843	MOVOU 16(R9), X5
 11844	MOVOA X4, (R12)
 11845	MOVOA X5, 16(R12)
 11846	ADDQ  $0x20, R12
 11847	ADDQ  $0x20, R9
 11848	ADDQ  $0x20, R11
 11849	DECQ  R10
 11850	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
 11851
 11852emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
 11853	MOVOU -32(R8)(R11*1), X4
 11854	MOVOU -16(R8)(R11*1), X5
 11855	MOVOA X4, -32(AX)(R11*1)
 11856	MOVOA X5, -16(AX)(R11*1)
 11857	ADDQ  $0x20, R11
 11858	CMPQ  DI, R11
 11859	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 11860	MOVOU X0, (AX)
 11861	MOVOU X1, 16(AX)
 11862	MOVOU X2, -32(AX)(DI*1)
 11863	MOVOU X3, -16(AX)(DI*1)
 11864	MOVQ  BX, AX
 11865
 11866emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
 11867	ADDL $0x05, CX
 11868	MOVL CX, BX
 11869	SUBL 16(SP), BX
 11870	MOVQ src_len+32(FP), DI
 11871	SUBL CX, DI
 11872	LEAQ (DX)(CX*1), R8
 11873	LEAQ (DX)(BX*1), BX
 11874
 11875	// matchLen
 11876	XORL R10, R10
 11877
 11878matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
 11879	CMPL DI, $0x10
 11880	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
 11881	MOVQ (R8)(R10*1), R9
 11882	MOVQ 8(R8)(R10*1), R11
 11883	XORQ (BX)(R10*1), R9
 11884	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
 11885	XORQ 8(BX)(R10*1), R11
 11886	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
 11887	LEAL -16(DI), DI
 11888	LEAL 16(R10), R10
 11889	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
 11890
 11891matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
 11892#ifdef GOAMD64_v3
 11893	TZCNTQ R11, R11
 11894
 11895#else
 11896	BSFQ R11, R11
 11897
 11898#endif
 11899	SARQ $0x03, R11
 11900	LEAL 8(R10)(R11*1), R10
 11901	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 11902
 11903matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
 11904	CMPL DI, $0x08
 11905	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
 11906	MOVQ (R8)(R10*1), R9
 11907	XORQ (BX)(R10*1), R9
 11908	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
 11909	LEAL -8(DI), DI
 11910	LEAL 8(R10), R10
 11911	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
 11912
 11913matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
 11914#ifdef GOAMD64_v3
 11915	TZCNTQ R9, R9
 11916
 11917#else
 11918	BSFQ R9, R9
 11919
 11920#endif
 11921	SARQ $0x03, R9
 11922	LEAL (R10)(R9*1), R10
 11923	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 11924
 11925matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
 11926	CMPL DI, $0x04
 11927	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
 11928	MOVL (R8)(R10*1), R9
 11929	CMPL (BX)(R10*1), R9
 11930	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
 11931	LEAL -4(DI), DI
 11932	LEAL 4(R10), R10
 11933
 11934matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
 11935	CMPL DI, $0x01
 11936	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
 11937	JB   repeat_extend_forward_end_encodeSnappyBlockAsm64K
 11938	MOVW (R8)(R10*1), R9
 11939	CMPW (BX)(R10*1), R9
 11940	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
 11941	LEAL 2(R10), R10
 11942	SUBL $0x02, DI
 11943	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm64K
 11944
 11945matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
 11946	MOVB (R8)(R10*1), R9
 11947	CMPB (BX)(R10*1), R9
 11948	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 11949	LEAL 1(R10), R10
 11950
 11951repeat_extend_forward_end_encodeSnappyBlockAsm64K:
 11952	ADDL R10, CX
 11953	MOVL CX, BX
 11954	SUBL SI, BX
 11955	MOVL 16(SP), SI
 11956
 11957	// emitCopy
 11958two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
 11959	CMPL BX, $0x40
 11960	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
 11961	MOVB $0xee, (AX)
 11962	MOVW SI, 1(AX)
 11963	LEAL -60(BX), BX
 11964	ADDQ $0x03, AX
 11965	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
 11966
 11967two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
 11968	MOVL BX, DI
 11969	SHLL $0x02, DI
 11970	CMPL BX, $0x0c
 11971	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
 11972	CMPL SI, $0x00000800
 11973	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
 11974	LEAL -15(DI), DI
 11975	MOVB SI, 1(AX)
 11976	SHRL $0x08, SI
 11977	SHLL $0x05, SI
 11978	ORL  SI, DI
 11979	MOVB DI, (AX)
 11980	ADDQ $0x02, AX
 11981	JMP  repeat_end_emit_encodeSnappyBlockAsm64K
 11982
 11983emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
 11984	LEAL -2(DI), DI
 11985	MOVB DI, (AX)
 11986	MOVW SI, 1(AX)
 11987	ADDQ $0x03, AX
 11988
 11989repeat_end_emit_encodeSnappyBlockAsm64K:
 11990	MOVL CX, 12(SP)
 11991	JMP  search_loop_encodeSnappyBlockAsm64K
 11992
 11993no_repeat_found_encodeSnappyBlockAsm64K:
 11994	CMPL (DX)(BX*1), SI
 11995	JEQ  candidate_match_encodeSnappyBlockAsm64K
 11996	SHRQ $0x08, SI
 11997	MOVL 24(SP)(R9*4), BX
 11998	LEAL 2(CX), R8
 11999	CMPL (DX)(DI*1), SI
 12000	JEQ  candidate2_match_encodeSnappyBlockAsm64K
 12001	MOVL R8, 24(SP)(R9*4)
 12002	SHRQ $0x08, SI
 12003	CMPL (DX)(BX*1), SI
 12004	JEQ  candidate3_match_encodeSnappyBlockAsm64K
 12005	MOVL 20(SP), CX
 12006	JMP  search_loop_encodeSnappyBlockAsm64K
 12007
 12008candidate3_match_encodeSnappyBlockAsm64K:
 12009	ADDL $0x02, CX
 12010	JMP  candidate_match_encodeSnappyBlockAsm64K
 12011
 12012candidate2_match_encodeSnappyBlockAsm64K:
 12013	MOVL R8, 24(SP)(R9*4)
 12014	INCL CX
 12015	MOVL DI, BX
 12016
 12017candidate_match_encodeSnappyBlockAsm64K:
 12018	MOVL  12(SP), SI
 12019	TESTL BX, BX
 12020	JZ    match_extend_back_end_encodeSnappyBlockAsm64K
 12021
 12022match_extend_back_loop_encodeSnappyBlockAsm64K:
 12023	CMPL CX, SI
 12024	JBE  match_extend_back_end_encodeSnappyBlockAsm64K
 12025	MOVB -1(DX)(BX*1), DI
 12026	MOVB -1(DX)(CX*1), R8
 12027	CMPB DI, R8
 12028	JNE  match_extend_back_end_encodeSnappyBlockAsm64K
 12029	LEAL -1(CX), CX
 12030	DECL BX
 12031	JZ   match_extend_back_end_encodeSnappyBlockAsm64K
 12032	JMP  match_extend_back_loop_encodeSnappyBlockAsm64K
 12033
 12034match_extend_back_end_encodeSnappyBlockAsm64K:
 12035	MOVL CX, SI
 12036	SUBL 12(SP), SI
 12037	LEAQ 3(AX)(SI*1), SI
 12038	CMPQ SI, (SP)
 12039	JB   match_dst_size_check_encodeSnappyBlockAsm64K
 12040	MOVQ $0x00000000, ret+48(FP)
 12041	RET
 12042
 12043match_dst_size_check_encodeSnappyBlockAsm64K:
 12044	MOVL CX, SI
 12045	MOVL 12(SP), DI
 12046	CMPL DI, SI
 12047	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
 12048	MOVL SI, R8
 12049	MOVL SI, 12(SP)
 12050	LEAQ (DX)(DI*1), SI
 12051	SUBL DI, R8
 12052	LEAL -1(R8), DI
 12053	CMPL DI, $0x3c
 12054	JB   one_byte_match_emit_encodeSnappyBlockAsm64K
 12055	CMPL DI, $0x00000100
 12056	JB   two_bytes_match_emit_encodeSnappyBlockAsm64K
 12057	JB   three_bytes_match_emit_encodeSnappyBlockAsm64K
 12058
 12059three_bytes_match_emit_encodeSnappyBlockAsm64K:
 12060	MOVB $0xf4, (AX)
 12061	MOVW DI, 1(AX)
 12062	ADDQ $0x03, AX
 12063	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
 12064
 12065two_bytes_match_emit_encodeSnappyBlockAsm64K:
 12066	MOVB $0xf0, (AX)
 12067	MOVB DI, 1(AX)
 12068	ADDQ $0x02, AX
 12069	CMPL DI, $0x40
 12070	JB   memmove_match_emit_encodeSnappyBlockAsm64K
 12071	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
 12072
 12073one_byte_match_emit_encodeSnappyBlockAsm64K:
 12074	SHLB $0x02, DI
 12075	MOVB DI, (AX)
 12076	ADDQ $0x01, AX
 12077
 12078memmove_match_emit_encodeSnappyBlockAsm64K:
 12079	LEAQ (AX)(R8*1), DI
 12080
 12081	// genMemMoveShort
 12082	CMPQ R8, $0x08
 12083	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
 12084	CMPQ R8, $0x10
 12085	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
 12086	CMPQ R8, $0x20
 12087	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
 12088	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
 12089
 12090emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
 12091	MOVQ (SI), R9
 12092	MOVQ R9, (AX)
 12093	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 12094
 12095emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
 12096	MOVQ (SI), R9
 12097	MOVQ -8(SI)(R8*1), SI
 12098	MOVQ R9, (AX)
 12099	MOVQ SI, -8(AX)(R8*1)
 12100	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 12101
 12102emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
 12103	MOVOU (SI), X0
 12104	MOVOU -16(SI)(R8*1), X1
 12105	MOVOU X0, (AX)
 12106	MOVOU X1, -16(AX)(R8*1)
 12107	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 12108
 12109emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
 12110	MOVOU (SI), X0
 12111	MOVOU 16(SI), X1
 12112	MOVOU -32(SI)(R8*1), X2
 12113	MOVOU -16(SI)(R8*1), X3
 12114	MOVOU X0, (AX)
 12115	MOVOU X1, 16(AX)
 12116	MOVOU X2, -32(AX)(R8*1)
 12117	MOVOU X3, -16(AX)(R8*1)
 12118
 12119memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
 12120	MOVQ DI, AX
 12121	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
 12122
 12123memmove_long_match_emit_encodeSnappyBlockAsm64K:
 12124	LEAQ (AX)(R8*1), DI
 12125
 12126	// genMemMoveLong
 12127	MOVOU (SI), X0
 12128	MOVOU 16(SI), X1
 12129	MOVOU -32(SI)(R8*1), X2
 12130	MOVOU -16(SI)(R8*1), X3
 12131	MOVQ  R8, R10
 12132	SHRQ  $0x05, R10
 12133	MOVQ  AX, R9
 12134	ANDL  $0x0000001f, R9
 12135	MOVQ  $0x00000040, R11
 12136	SUBQ  R9, R11
 12137	DECQ  R10
 12138	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 12139	LEAQ  -32(SI)(R11*1), R9
 12140	LEAQ  -32(AX)(R11*1), R12
 12141
 12142emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
 12143	MOVOU (R9), X4
 12144	MOVOU 16(R9), X5
 12145	MOVOA X4, (R12)
 12146	MOVOA X5, 16(R12)
 12147	ADDQ  $0x20, R12
 12148	ADDQ  $0x20, R9
 12149	ADDQ  $0x20, R11
 12150	DECQ  R10
 12151	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
 12152
 12153emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
 12154	MOVOU -32(SI)(R11*1), X4
 12155	MOVOU -16(SI)(R11*1), X5
 12156	MOVOA X4, -32(AX)(R11*1)
 12157	MOVOA X5, -16(AX)(R11*1)
 12158	ADDQ  $0x20, R11
 12159	CMPQ  R8, R11
 12160	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 12161	MOVOU X0, (AX)
 12162	MOVOU X1, 16(AX)
 12163	MOVOU X2, -32(AX)(R8*1)
 12164	MOVOU X3, -16(AX)(R8*1)
 12165	MOVQ  DI, AX
 12166
 12167emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
 12168match_nolit_loop_encodeSnappyBlockAsm64K:
 12169	MOVL CX, SI
 12170	SUBL BX, SI
 12171	MOVL SI, 16(SP)
 12172	ADDL $0x04, CX
 12173	ADDL $0x04, BX
 12174	MOVQ src_len+32(FP), SI
 12175	SUBL CX, SI
 12176	LEAQ (DX)(CX*1), DI
 12177	LEAQ (DX)(BX*1), BX
 12178
 12179	// matchLen
 12180	XORL R9, R9
 12181
 12182matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
 12183	CMPL SI, $0x10
 12184	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
 12185	MOVQ (DI)(R9*1), R8
 12186	MOVQ 8(DI)(R9*1), R10
 12187	XORQ (BX)(R9*1), R8
 12188	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
 12189	XORQ 8(BX)(R9*1), R10
 12190	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
 12191	LEAL -16(SI), SI
 12192	LEAL 16(R9), R9
 12193	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
 12194
 12195matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
 12196#ifdef GOAMD64_v3
 12197	TZCNTQ R10, R10
 12198
 12199#else
 12200	BSFQ R10, R10
 12201
 12202#endif
 12203	SARQ $0x03, R10
 12204	LEAL 8(R9)(R10*1), R9
 12205	JMP  match_nolit_end_encodeSnappyBlockAsm64K
 12206
 12207matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
 12208	CMPL SI, $0x08
 12209	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
 12210	MOVQ (DI)(R9*1), R8
 12211	XORQ (BX)(R9*1), R8
 12212	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
 12213	LEAL -8(SI), SI
 12214	LEAL 8(R9), R9
 12215	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
 12216
 12217matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
 12218#ifdef GOAMD64_v3
 12219	TZCNTQ R8, R8
 12220
 12221#else
 12222	BSFQ R8, R8
 12223
 12224#endif
 12225	SARQ $0x03, R8
 12226	LEAL (R9)(R8*1), R9
 12227	JMP  match_nolit_end_encodeSnappyBlockAsm64K
 12228
 12229matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
 12230	CMPL SI, $0x04
 12231	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
 12232	MOVL (DI)(R9*1), R8
 12233	CMPL (BX)(R9*1), R8
 12234	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
 12235	LEAL -4(SI), SI
 12236	LEAL 4(R9), R9
 12237
 12238matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
 12239	CMPL SI, $0x01
 12240	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
 12241	JB   match_nolit_end_encodeSnappyBlockAsm64K
 12242	MOVW (DI)(R9*1), R8
 12243	CMPW (BX)(R9*1), R8
 12244	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
 12245	LEAL 2(R9), R9
 12246	SUBL $0x02, SI
 12247	JZ   match_nolit_end_encodeSnappyBlockAsm64K
 12248
 12249matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
 12250	MOVB (DI)(R9*1), R8
 12251	CMPB (BX)(R9*1), R8
 12252	JNE  match_nolit_end_encodeSnappyBlockAsm64K
 12253	LEAL 1(R9), R9
 12254
 12255match_nolit_end_encodeSnappyBlockAsm64K:
 12256	ADDL R9, CX
 12257	MOVL 16(SP), BX
 12258	ADDL $0x04, R9
 12259	MOVL CX, 12(SP)
 12260
 12261	// emitCopy
 12262two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
 12263	CMPL R9, $0x40
 12264	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
 12265	MOVB $0xee, (AX)
 12266	MOVW BX, 1(AX)
 12267	LEAL -60(R9), R9
 12268	ADDQ $0x03, AX
 12269	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
 12270
 12271two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
 12272	MOVL R9, SI
 12273	SHLL $0x02, SI
 12274	CMPL R9, $0x0c
 12275	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
 12276	CMPL BX, $0x00000800
 12277	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
 12278	LEAL -15(SI), SI
 12279	MOVB BL, 1(AX)
 12280	SHRL $0x08, BX
 12281	SHLL $0x05, BX
 12282	ORL  BX, SI
 12283	MOVB SI, (AX)
 12284	ADDQ $0x02, AX
 12285	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
 12286
 12287emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
 12288	LEAL -2(SI), SI
 12289	MOVB SI, (AX)
 12290	MOVW BX, 1(AX)
 12291	ADDQ $0x03, AX
 12292
 12293match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
 12294	CMPL CX, 8(SP)
 12295	JAE  emit_remainder_encodeSnappyBlockAsm64K
 12296	MOVQ -2(DX)(CX*1), SI
 12297	CMPQ AX, (SP)
 12298	JB   match_nolit_dst_ok_encodeSnappyBlockAsm64K
 12299	MOVQ $0x00000000, ret+48(FP)
 12300	RET
 12301
 12302match_nolit_dst_ok_encodeSnappyBlockAsm64K:
 12303	MOVQ  $0x0000cf1bbcdcbf9b, R8
 12304	MOVQ  SI, DI
 12305	SHRQ  $0x10, SI
 12306	MOVQ  SI, BX
 12307	SHLQ  $0x10, DI
 12308	IMULQ R8, DI
 12309	SHRQ  $0x32, DI
 12310	SHLQ  $0x10, BX
 12311	IMULQ R8, BX
 12312	SHRQ  $0x32, BX
 12313	LEAL  -2(CX), R8
 12314	LEAQ  24(SP)(BX*4), R9
 12315	MOVL  (R9), BX
 12316	MOVL  R8, 24(SP)(DI*4)
 12317	MOVL  CX, (R9)
 12318	CMPL  (DX)(BX*1), SI
 12319	JEQ   match_nolit_loop_encodeSnappyBlockAsm64K
 12320	INCL  CX
 12321	JMP   search_loop_encodeSnappyBlockAsm64K
 12322
 12323emit_remainder_encodeSnappyBlockAsm64K:
 12324	MOVQ src_len+32(FP), CX
 12325	SUBL 12(SP), CX
 12326	LEAQ 3(AX)(CX*1), CX
 12327	CMPQ CX, (SP)
 12328	JB   emit_remainder_ok_encodeSnappyBlockAsm64K
 12329	MOVQ $0x00000000, ret+48(FP)
 12330	RET
 12331
 12332emit_remainder_ok_encodeSnappyBlockAsm64K:
 12333	MOVQ src_len+32(FP), CX
 12334	MOVL 12(SP), BX
 12335	CMPL BX, CX
 12336	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
 12337	MOVL CX, SI
 12338	MOVL CX, 12(SP)
 12339	LEAQ (DX)(BX*1), CX
 12340	SUBL BX, SI
 12341	LEAL -1(SI), DX
 12342	CMPL DX, $0x3c
 12343	JB   one_byte_emit_remainder_encodeSnappyBlockAsm64K
 12344	CMPL DX, $0x00000100
 12345	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm64K
 12346	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm64K
 12347
 12348three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
 12349	MOVB $0xf4, (AX)
 12350	MOVW DX, 1(AX)
 12351	ADDQ $0x03, AX
 12352	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
 12353
 12354two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
 12355	MOVB $0xf0, (AX)
 12356	MOVB DL, 1(AX)
 12357	ADDQ $0x02, AX
 12358	CMPL DX, $0x40
 12359	JB   memmove_emit_remainder_encodeSnappyBlockAsm64K
 12360	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
 12361
 12362one_byte_emit_remainder_encodeSnappyBlockAsm64K:
 12363	SHLB $0x02, DL
 12364	MOVB DL, (AX)
 12365	ADDQ $0x01, AX
 12366
 12367memmove_emit_remainder_encodeSnappyBlockAsm64K:
 12368	LEAQ (AX)(SI*1), DX
 12369	MOVL SI, BX
 12370
 12371	// genMemMoveShort
 12372	CMPQ BX, $0x03
 12373	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2
 12374	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3
 12375	CMPQ BX, $0x08
 12376	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7
 12377	CMPQ BX, $0x10
 12378	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
 12379	CMPQ BX, $0x20
 12380	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
 12381	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
 12382
 12383emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
 12384	MOVB (CX), SI
 12385	MOVB -1(CX)(BX*1), CL
 12386	MOVB SI, (AX)
 12387	MOVB CL, -1(AX)(BX*1)
 12388	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 12389
 12390emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
 12391	MOVW (CX), SI
 12392	MOVB 2(CX), CL
 12393	MOVW SI, (AX)
 12394	MOVB CL, 2(AX)
 12395	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 12396
 12397emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
 12398	MOVL (CX), SI
 12399	MOVL -4(CX)(BX*1), CX
 12400	MOVL SI, (AX)
 12401	MOVL CX, -4(AX)(BX*1)
 12402	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 12403
 12404emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
 12405	MOVQ (CX), SI
 12406	MOVQ -8(CX)(BX*1), CX
 12407	MOVQ SI, (AX)
 12408	MOVQ CX, -8(AX)(BX*1)
 12409	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 12410
 12411emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
 12412	MOVOU (CX), X0
 12413	MOVOU -16(CX)(BX*1), X1
 12414	MOVOU X0, (AX)
 12415	MOVOU X1, -16(AX)(BX*1)
 12416	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 12417
 12418emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
 12419	MOVOU (CX), X0
 12420	MOVOU 16(CX), X1
 12421	MOVOU -32(CX)(BX*1), X2
 12422	MOVOU -16(CX)(BX*1), X3
 12423	MOVOU X0, (AX)
 12424	MOVOU X1, 16(AX)
 12425	MOVOU X2, -32(AX)(BX*1)
 12426	MOVOU X3, -16(AX)(BX*1)
 12427
 12428memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
 12429	MOVQ DX, AX
 12430	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
 12431
 12432memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
 12433	LEAQ (AX)(SI*1), DX
 12434	MOVL SI, BX
 12435
 12436	// genMemMoveLong
 12437	MOVOU (CX), X0
 12438	MOVOU 16(CX), X1
 12439	MOVOU -32(CX)(BX*1), X2
 12440	MOVOU -16(CX)(BX*1), X3
 12441	MOVQ  BX, DI
 12442	SHRQ  $0x05, DI
 12443	MOVQ  AX, SI
 12444	ANDL  $0x0000001f, SI
 12445	MOVQ  $0x00000040, R8
 12446	SUBQ  SI, R8
 12447	DECQ  DI
 12448	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 12449	LEAQ  -32(CX)(R8*1), SI
 12450	LEAQ  -32(AX)(R8*1), R9
 12451
 12452emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
 12453	MOVOU (SI), X4
 12454	MOVOU 16(SI), X5
 12455	MOVOA X4, (R9)
 12456	MOVOA X5, 16(R9)
 12457	ADDQ  $0x20, R9
 12458	ADDQ  $0x20, SI
 12459	ADDQ  $0x20, R8
 12460	DECQ  DI
 12461	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
 12462
 12463emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
 12464	MOVOU -32(CX)(R8*1), X4
 12465	MOVOU -16(CX)(R8*1), X5
 12466	MOVOA X4, -32(AX)(R8*1)
 12467	MOVOA X5, -16(AX)(R8*1)
 12468	ADDQ  $0x20, R8
 12469	CMPQ  BX, R8
 12470	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 12471	MOVOU X0, (AX)
 12472	MOVOU X1, 16(AX)
 12473	MOVOU X2, -32(AX)(BX*1)
 12474	MOVOU X3, -16(AX)(BX*1)
 12475	MOVQ  DX, AX
 12476
 12477emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
 12478	MOVQ dst_base+0(FP), CX
 12479	SUBQ CX, AX
 12480	MOVQ AX, ret+48(FP)
 12481	RET
 12482
 12483// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
 12484// Requires: BMI, SSE2
 12485TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
 12486	MOVQ dst_base+0(FP), AX
 12487	MOVQ $0x00000080, CX
 12488	LEAQ 24(SP), DX
 12489	PXOR X0, X0
 12490
 12491zero_loop_encodeSnappyBlockAsm12B:
 12492	MOVOU X0, (DX)
 12493	MOVOU X0, 16(DX)
 12494	MOVOU X0, 32(DX)
 12495	MOVOU X0, 48(DX)
 12496	MOVOU X0, 64(DX)
 12497	MOVOU X0, 80(DX)
 12498	MOVOU X0, 96(DX)
 12499	MOVOU X0, 112(DX)
 12500	ADDQ  $0x80, DX
 12501	DECQ  CX
 12502	JNZ   zero_loop_encodeSnappyBlockAsm12B
 12503	MOVL  $0x00000000, 12(SP)
 12504	MOVQ  src_len+32(FP), CX
 12505	LEAQ  -9(CX), DX
 12506	LEAQ  -8(CX), BX
 12507	MOVL  BX, 8(SP)
 12508	SHRQ  $0x05, CX
 12509	SUBL  CX, DX
 12510	LEAQ  (AX)(DX*1), DX
 12511	MOVQ  DX, (SP)
 12512	MOVL  $0x00000001, CX
 12513	MOVL  CX, 16(SP)
 12514	MOVQ  src_base+24(FP), DX
 12515
 12516search_loop_encodeSnappyBlockAsm12B:
 12517	MOVL  CX, BX
 12518	SUBL  12(SP), BX
 12519	SHRL  $0x05, BX
 12520	LEAL  4(CX)(BX*1), BX
 12521	CMPL  BX, 8(SP)
 12522	JAE   emit_remainder_encodeSnappyBlockAsm12B
 12523	MOVQ  (DX)(CX*1), SI
 12524	MOVL  BX, 20(SP)
 12525	MOVQ  $0x000000cf1bbcdcbb, R8
 12526	MOVQ  SI, R9
 12527	MOVQ  SI, R10
 12528	SHRQ  $0x08, R10
 12529	SHLQ  $0x18, R9
 12530	IMULQ R8, R9
 12531	SHRQ  $0x34, R9
 12532	SHLQ  $0x18, R10
 12533	IMULQ R8, R10
 12534	SHRQ  $0x34, R10
 12535	MOVL  24(SP)(R9*4), BX
 12536	MOVL  24(SP)(R10*4), DI
 12537	MOVL  CX, 24(SP)(R9*4)
 12538	LEAL  1(CX), R9
 12539	MOVL  R9, 24(SP)(R10*4)
 12540	MOVQ  SI, R9
 12541	SHRQ  $0x10, R9
 12542	SHLQ  $0x18, R9
 12543	IMULQ R8, R9
 12544	SHRQ  $0x34, R9
 12545	MOVL  CX, R8
 12546	SUBL  16(SP), R8
 12547	MOVL  1(DX)(R8*1), R10
 12548	MOVQ  SI, R8
 12549	SHRQ  $0x08, R8
 12550	CMPL  R8, R10
 12551	JNE   no_repeat_found_encodeSnappyBlockAsm12B
 12552	LEAL  1(CX), SI
 12553	MOVL  12(SP), BX
 12554	MOVL  SI, DI
 12555	SUBL  16(SP), DI
 12556	JZ    repeat_extend_back_end_encodeSnappyBlockAsm12B
 12557
 12558repeat_extend_back_loop_encodeSnappyBlockAsm12B:
 12559	CMPL SI, BX
 12560	JBE  repeat_extend_back_end_encodeSnappyBlockAsm12B
 12561	MOVB -1(DX)(DI*1), R8
 12562	MOVB -1(DX)(SI*1), R9
 12563	CMPB R8, R9
 12564	JNE  repeat_extend_back_end_encodeSnappyBlockAsm12B
 12565	LEAL -1(SI), SI
 12566	DECL DI
 12567	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm12B
 12568
 12569repeat_extend_back_end_encodeSnappyBlockAsm12B:
 12570	MOVL SI, BX
 12571	SUBL 12(SP), BX
 12572	LEAQ 3(AX)(BX*1), BX
 12573	CMPQ BX, (SP)
 12574	JB   repeat_dst_size_check_encodeSnappyBlockAsm12B
 12575	MOVQ $0x00000000, ret+48(FP)
 12576	RET
 12577
 12578repeat_dst_size_check_encodeSnappyBlockAsm12B:
 12579	MOVL 12(SP), BX
 12580	CMPL BX, SI
 12581	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
 12582	MOVL SI, DI
 12583	MOVL SI, 12(SP)
 12584	LEAQ (DX)(BX*1), R8
 12585	SUBL BX, DI
 12586	LEAL -1(DI), BX
 12587	CMPL BX, $0x3c
 12588	JB   one_byte_repeat_emit_encodeSnappyBlockAsm12B
 12589	CMPL BX, $0x00000100
 12590	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm12B
 12591	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm12B
 12592
 12593three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
 12594	MOVB $0xf4, (AX)
 12595	MOVW BX, 1(AX)
 12596	ADDQ $0x03, AX
 12597	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
 12598
 12599two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
 12600	MOVB $0xf0, (AX)
 12601	MOVB BL, 1(AX)
 12602	ADDQ $0x02, AX
 12603	CMPL BX, $0x40
 12604	JB   memmove_repeat_emit_encodeSnappyBlockAsm12B
 12605	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
 12606
 12607one_byte_repeat_emit_encodeSnappyBlockAsm12B:
 12608	SHLB $0x02, BL
 12609	MOVB BL, (AX)
 12610	ADDQ $0x01, AX
 12611
 12612memmove_repeat_emit_encodeSnappyBlockAsm12B:
 12613	LEAQ (AX)(DI*1), BX
 12614
 12615	// genMemMoveShort
 12616	CMPQ DI, $0x08
 12617	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
 12618	CMPQ DI, $0x10
 12619	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
 12620	CMPQ DI, $0x20
 12621	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
 12622	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
 12623
 12624emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
 12625	MOVQ (R8), R9
 12626	MOVQ R9, (AX)
 12627	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 12628
 12629emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
 12630	MOVQ (R8), R9
 12631	MOVQ -8(R8)(DI*1), R8
 12632	MOVQ R9, (AX)
 12633	MOVQ R8, -8(AX)(DI*1)
 12634	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 12635
 12636emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
 12637	MOVOU (R8), X0
 12638	MOVOU -16(R8)(DI*1), X1
 12639	MOVOU X0, (AX)
 12640	MOVOU X1, -16(AX)(DI*1)
 12641	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 12642
 12643emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
 12644	MOVOU (R8), X0
 12645	MOVOU 16(R8), X1
 12646	MOVOU -32(R8)(DI*1), X2
 12647	MOVOU -16(R8)(DI*1), X3
 12648	MOVOU X0, (AX)
 12649	MOVOU X1, 16(AX)
 12650	MOVOU X2, -32(AX)(DI*1)
 12651	MOVOU X3, -16(AX)(DI*1)
 12652
 12653memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
 12654	MOVQ BX, AX
 12655	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
 12656
 12657memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
 12658	LEAQ (AX)(DI*1), BX
 12659
 12660	// genMemMoveLong
 12661	MOVOU (R8), X0
 12662	MOVOU 16(R8), X1
 12663	MOVOU -32(R8)(DI*1), X2
 12664	MOVOU -16(R8)(DI*1), X3
 12665	MOVQ  DI, R10
 12666	SHRQ  $0x05, R10
 12667	MOVQ  AX, R9
 12668	ANDL  $0x0000001f, R9
 12669	MOVQ  $0x00000040, R11
 12670	SUBQ  R9, R11
 12671	DECQ  R10
 12672	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 12673	LEAQ  -32(R8)(R11*1), R9
 12674	LEAQ  -32(AX)(R11*1), R12
 12675
 12676emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
 12677	MOVOU (R9), X4
 12678	MOVOU 16(R9), X5
 12679	MOVOA X4, (R12)
 12680	MOVOA X5, 16(R12)
 12681	ADDQ  $0x20, R12
 12682	ADDQ  $0x20, R9
 12683	ADDQ  $0x20, R11
 12684	DECQ  R10
 12685	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
 12686
 12687emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
 12688	MOVOU -32(R8)(R11*1), X4
 12689	MOVOU -16(R8)(R11*1), X5
 12690	MOVOA X4, -32(AX)(R11*1)
 12691	MOVOA X5, -16(AX)(R11*1)
 12692	ADDQ  $0x20, R11
 12693	CMPQ  DI, R11
 12694	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 12695	MOVOU X0, (AX)
 12696	MOVOU X1, 16(AX)
 12697	MOVOU X2, -32(AX)(DI*1)
 12698	MOVOU X3, -16(AX)(DI*1)
 12699	MOVQ  BX, AX
 12700
 12701emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
 12702	ADDL $0x05, CX
 12703	MOVL CX, BX
 12704	SUBL 16(SP), BX
 12705	MOVQ src_len+32(FP), DI
 12706	SUBL CX, DI
 12707	LEAQ (DX)(CX*1), R8
 12708	LEAQ (DX)(BX*1), BX
 12709
 12710	// matchLen
 12711	XORL R10, R10
 12712
 12713matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
 12714	CMPL DI, $0x10
 12715	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
 12716	MOVQ (R8)(R10*1), R9
 12717	MOVQ 8(R8)(R10*1), R11
 12718	XORQ (BX)(R10*1), R9
 12719	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
 12720	XORQ 8(BX)(R10*1), R11
 12721	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
 12722	LEAL -16(DI), DI
 12723	LEAL 16(R10), R10
 12724	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
 12725
 12726matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
 12727#ifdef GOAMD64_v3
 12728	TZCNTQ R11, R11
 12729
 12730#else
 12731	BSFQ R11, R11
 12732
 12733#endif
 12734	SARQ $0x03, R11
 12735	LEAL 8(R10)(R11*1), R10
 12736	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 12737
 12738matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
 12739	CMPL DI, $0x08
 12740	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
 12741	MOVQ (R8)(R10*1), R9
 12742	XORQ (BX)(R10*1), R9
 12743	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
 12744	LEAL -8(DI), DI
 12745	LEAL 8(R10), R10
 12746	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
 12747
 12748matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
 12749#ifdef GOAMD64_v3
 12750	TZCNTQ R9, R9
 12751
 12752#else
 12753	BSFQ R9, R9
 12754
 12755#endif
 12756	SARQ $0x03, R9
 12757	LEAL (R10)(R9*1), R10
 12758	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 12759
 12760matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
 12761	CMPL DI, $0x04
 12762	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
 12763	MOVL (R8)(R10*1), R9
 12764	CMPL (BX)(R10*1), R9
 12765	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
 12766	LEAL -4(DI), DI
 12767	LEAL 4(R10), R10
 12768
 12769matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
 12770	CMPL DI, $0x01
 12771	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
 12772	JB   repeat_extend_forward_end_encodeSnappyBlockAsm12B
 12773	MOVW (R8)(R10*1), R9
 12774	CMPW (BX)(R10*1), R9
 12775	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
 12776	LEAL 2(R10), R10
 12777	SUBL $0x02, DI
 12778	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm12B
 12779
 12780matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
 12781	MOVB (R8)(R10*1), R9
 12782	CMPB (BX)(R10*1), R9
 12783	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 12784	LEAL 1(R10), R10
 12785
 12786repeat_extend_forward_end_encodeSnappyBlockAsm12B:
 12787	ADDL R10, CX
 12788	MOVL CX, BX
 12789	SUBL SI, BX
 12790	MOVL 16(SP), SI
 12791
 12792	// emitCopy
 12793two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
 12794	CMPL BX, $0x40
 12795	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
 12796	MOVB $0xee, (AX)
 12797	MOVW SI, 1(AX)
 12798	LEAL -60(BX), BX
 12799	ADDQ $0x03, AX
 12800	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
 12801
 12802two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
 12803	MOVL BX, DI
 12804	SHLL $0x02, DI
 12805	CMPL BX, $0x0c
 12806	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
 12807	CMPL SI, $0x00000800
 12808	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
 12809	LEAL -15(DI), DI
 12810	MOVB SI, 1(AX)
 12811	SHRL $0x08, SI
 12812	SHLL $0x05, SI
 12813	ORL  SI, DI
 12814	MOVB DI, (AX)
 12815	ADDQ $0x02, AX
 12816	JMP  repeat_end_emit_encodeSnappyBlockAsm12B
 12817
 12818emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
 12819	LEAL -2(DI), DI
 12820	MOVB DI, (AX)
 12821	MOVW SI, 1(AX)
 12822	ADDQ $0x03, AX
 12823
 12824repeat_end_emit_encodeSnappyBlockAsm12B:
 12825	MOVL CX, 12(SP)
 12826	JMP  search_loop_encodeSnappyBlockAsm12B
 12827
 12828no_repeat_found_encodeSnappyBlockAsm12B:
 12829	CMPL (DX)(BX*1), SI
 12830	JEQ  candidate_match_encodeSnappyBlockAsm12B
 12831	SHRQ $0x08, SI
 12832	MOVL 24(SP)(R9*4), BX
 12833	LEAL 2(CX), R8
 12834	CMPL (DX)(DI*1), SI
 12835	JEQ  candidate2_match_encodeSnappyBlockAsm12B
 12836	MOVL R8, 24(SP)(R9*4)
 12837	SHRQ $0x08, SI
 12838	CMPL (DX)(BX*1), SI
 12839	JEQ  candidate3_match_encodeSnappyBlockAsm12B
 12840	MOVL 20(SP), CX
 12841	JMP  search_loop_encodeSnappyBlockAsm12B
 12842
 12843candidate3_match_encodeSnappyBlockAsm12B:
 12844	ADDL $0x02, CX
 12845	JMP  candidate_match_encodeSnappyBlockAsm12B
 12846
 12847candidate2_match_encodeSnappyBlockAsm12B:
 12848	MOVL R8, 24(SP)(R9*4)
 12849	INCL CX
 12850	MOVL DI, BX
 12851
 12852candidate_match_encodeSnappyBlockAsm12B:
 12853	MOVL  12(SP), SI
 12854	TESTL BX, BX
 12855	JZ    match_extend_back_end_encodeSnappyBlockAsm12B
 12856
 12857match_extend_back_loop_encodeSnappyBlockAsm12B:
 12858	CMPL CX, SI
 12859	JBE  match_extend_back_end_encodeSnappyBlockAsm12B
 12860	MOVB -1(DX)(BX*1), DI
 12861	MOVB -1(DX)(CX*1), R8
 12862	CMPB DI, R8
 12863	JNE  match_extend_back_end_encodeSnappyBlockAsm12B
 12864	LEAL -1(CX), CX
 12865	DECL BX
 12866	JZ   match_extend_back_end_encodeSnappyBlockAsm12B
 12867	JMP  match_extend_back_loop_encodeSnappyBlockAsm12B
 12868
 12869match_extend_back_end_encodeSnappyBlockAsm12B:
 12870	MOVL CX, SI
 12871	SUBL 12(SP), SI
 12872	LEAQ 3(AX)(SI*1), SI
 12873	CMPQ SI, (SP)
 12874	JB   match_dst_size_check_encodeSnappyBlockAsm12B
 12875	MOVQ $0x00000000, ret+48(FP)
 12876	RET
 12877
 12878match_dst_size_check_encodeSnappyBlockAsm12B:
 12879	MOVL CX, SI
 12880	MOVL 12(SP), DI
 12881	CMPL DI, SI
 12882	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
 12883	MOVL SI, R8
 12884	MOVL SI, 12(SP)
 12885	LEAQ (DX)(DI*1), SI
 12886	SUBL DI, R8
 12887	LEAL -1(R8), DI
 12888	CMPL DI, $0x3c
 12889	JB   one_byte_match_emit_encodeSnappyBlockAsm12B
 12890	CMPL DI, $0x00000100
 12891	JB   two_bytes_match_emit_encodeSnappyBlockAsm12B
 12892	JB   three_bytes_match_emit_encodeSnappyBlockAsm12B
 12893
 12894three_bytes_match_emit_encodeSnappyBlockAsm12B:
 12895	MOVB $0xf4, (AX)
 12896	MOVW DI, 1(AX)
 12897	ADDQ $0x03, AX
 12898	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
 12899
 12900two_bytes_match_emit_encodeSnappyBlockAsm12B:
 12901	MOVB $0xf0, (AX)
 12902	MOVB DI, 1(AX)
 12903	ADDQ $0x02, AX
 12904	CMPL DI, $0x40
 12905	JB   memmove_match_emit_encodeSnappyBlockAsm12B
 12906	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
 12907
 12908one_byte_match_emit_encodeSnappyBlockAsm12B:
 12909	SHLB $0x02, DI
 12910	MOVB DI, (AX)
 12911	ADDQ $0x01, AX
 12912
 12913memmove_match_emit_encodeSnappyBlockAsm12B:
 12914	LEAQ (AX)(R8*1), DI
 12915
 12916	// genMemMoveShort
 12917	CMPQ R8, $0x08
 12918	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
 12919	CMPQ R8, $0x10
 12920	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
 12921	CMPQ R8, $0x20
 12922	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
 12923	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
 12924
 12925emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
 12926	MOVQ (SI), R9
 12927	MOVQ R9, (AX)
 12928	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 12929
 12930emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
 12931	MOVQ (SI), R9
 12932	MOVQ -8(SI)(R8*1), SI
 12933	MOVQ R9, (AX)
 12934	MOVQ SI, -8(AX)(R8*1)
 12935	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 12936
 12937emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
 12938	MOVOU (SI), X0
 12939	MOVOU -16(SI)(R8*1), X1
 12940	MOVOU X0, (AX)
 12941	MOVOU X1, -16(AX)(R8*1)
 12942	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 12943
 12944emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
 12945	MOVOU (SI), X0
 12946	MOVOU 16(SI), X1
 12947	MOVOU -32(SI)(R8*1), X2
 12948	MOVOU -16(SI)(R8*1), X3
 12949	MOVOU X0, (AX)
 12950	MOVOU X1, 16(AX)
 12951	MOVOU X2, -32(AX)(R8*1)
 12952	MOVOU X3, -16(AX)(R8*1)
 12953
 12954memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
 12955	MOVQ DI, AX
 12956	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
 12957
 12958memmove_long_match_emit_encodeSnappyBlockAsm12B:
 12959	LEAQ (AX)(R8*1), DI
 12960
 12961	// genMemMoveLong
 12962	MOVOU (SI), X0
 12963	MOVOU 16(SI), X1
 12964	MOVOU -32(SI)(R8*1), X2
 12965	MOVOU -16(SI)(R8*1), X3
 12966	MOVQ  R8, R10
 12967	SHRQ  $0x05, R10
 12968	MOVQ  AX, R9
 12969	ANDL  $0x0000001f, R9
 12970	MOVQ  $0x00000040, R11
 12971	SUBQ  R9, R11
 12972	DECQ  R10
 12973	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 12974	LEAQ  -32(SI)(R11*1), R9
 12975	LEAQ  -32(AX)(R11*1), R12
 12976
 12977emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
 12978	MOVOU (R9), X4
 12979	MOVOU 16(R9), X5
 12980	MOVOA X4, (R12)
 12981	MOVOA X5, 16(R12)
 12982	ADDQ  $0x20, R12
 12983	ADDQ  $0x20, R9
 12984	ADDQ  $0x20, R11
 12985	DECQ  R10
 12986	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
 12987
 12988emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
 12989	MOVOU -32(SI)(R11*1), X4
 12990	MOVOU -16(SI)(R11*1), X5
 12991	MOVOA X4, -32(AX)(R11*1)
 12992	MOVOA X5, -16(AX)(R11*1)
 12993	ADDQ  $0x20, R11
 12994	CMPQ  R8, R11
 12995	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 12996	MOVOU X0, (AX)
 12997	MOVOU X1, 16(AX)
 12998	MOVOU X2, -32(AX)(R8*1)
 12999	MOVOU X3, -16(AX)(R8*1)
 13000	MOVQ  DI, AX
 13001
 13002emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
 13003match_nolit_loop_encodeSnappyBlockAsm12B:
 13004	MOVL CX, SI
 13005	SUBL BX, SI
 13006	MOVL SI, 16(SP)
 13007	ADDL $0x04, CX
 13008	ADDL $0x04, BX
 13009	MOVQ src_len+32(FP), SI
 13010	SUBL CX, SI
 13011	LEAQ (DX)(CX*1), DI
 13012	LEAQ (DX)(BX*1), BX
 13013
 13014	// matchLen
 13015	XORL R9, R9
 13016
 13017matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
 13018	CMPL SI, $0x10
 13019	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
 13020	MOVQ (DI)(R9*1), R8
 13021	MOVQ 8(DI)(R9*1), R10
 13022	XORQ (BX)(R9*1), R8
 13023	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
 13024	XORQ 8(BX)(R9*1), R10
 13025	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
 13026	LEAL -16(SI), SI
 13027	LEAL 16(R9), R9
 13028	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
 13029
 13030matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
 13031#ifdef GOAMD64_v3
 13032	TZCNTQ R10, R10
 13033
 13034#else
 13035	BSFQ R10, R10
 13036
 13037#endif
 13038	SARQ $0x03, R10
 13039	LEAL 8(R9)(R10*1), R9
 13040	JMP  match_nolit_end_encodeSnappyBlockAsm12B
 13041
 13042matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
 13043	CMPL SI, $0x08
 13044	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
 13045	MOVQ (DI)(R9*1), R8
 13046	XORQ (BX)(R9*1), R8
 13047	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
 13048	LEAL -8(SI), SI
 13049	LEAL 8(R9), R9
 13050	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
 13051
 13052matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
 13053#ifdef GOAMD64_v3
 13054	TZCNTQ R8, R8
 13055
 13056#else
 13057	BSFQ R8, R8
 13058
 13059#endif
 13060	SARQ $0x03, R8
 13061	LEAL (R9)(R8*1), R9
 13062	JMP  match_nolit_end_encodeSnappyBlockAsm12B
 13063
 13064matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
 13065	CMPL SI, $0x04
 13066	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
 13067	MOVL (DI)(R9*1), R8
 13068	CMPL (BX)(R9*1), R8
 13069	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
 13070	LEAL -4(SI), SI
 13071	LEAL 4(R9), R9
 13072
 13073matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
 13074	CMPL SI, $0x01
 13075	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
 13076	JB   match_nolit_end_encodeSnappyBlockAsm12B
 13077	MOVW (DI)(R9*1), R8
 13078	CMPW (BX)(R9*1), R8
 13079	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
 13080	LEAL 2(R9), R9
 13081	SUBL $0x02, SI
 13082	JZ   match_nolit_end_encodeSnappyBlockAsm12B
 13083
 13084matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
 13085	MOVB (DI)(R9*1), R8
 13086	CMPB (BX)(R9*1), R8
 13087	JNE  match_nolit_end_encodeSnappyBlockAsm12B
 13088	LEAL 1(R9), R9
 13089
 13090match_nolit_end_encodeSnappyBlockAsm12B:
 13091	ADDL R9, CX
 13092	MOVL 16(SP), BX
 13093	ADDL $0x04, R9
 13094	MOVL CX, 12(SP)
 13095
 13096	// emitCopy
 13097two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
 13098	CMPL R9, $0x40
 13099	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
 13100	MOVB $0xee, (AX)
 13101	MOVW BX, 1(AX)
 13102	LEAL -60(R9), R9
 13103	ADDQ $0x03, AX
 13104	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
 13105
 13106two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
 13107	MOVL R9, SI
 13108	SHLL $0x02, SI
 13109	CMPL R9, $0x0c
 13110	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
 13111	CMPL BX, $0x00000800
 13112	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
 13113	LEAL -15(SI), SI
 13114	MOVB BL, 1(AX)
 13115	SHRL $0x08, BX
 13116	SHLL $0x05, BX
 13117	ORL  BX, SI
 13118	MOVB SI, (AX)
 13119	ADDQ $0x02, AX
 13120	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
 13121
 13122emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
 13123	LEAL -2(SI), SI
 13124	MOVB SI, (AX)
 13125	MOVW BX, 1(AX)
 13126	ADDQ $0x03, AX
 13127
 13128match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
 13129	CMPL CX, 8(SP)
 13130	JAE  emit_remainder_encodeSnappyBlockAsm12B
 13131	MOVQ -2(DX)(CX*1), SI
 13132	CMPQ AX, (SP)
 13133	JB   match_nolit_dst_ok_encodeSnappyBlockAsm12B
 13134	MOVQ $0x00000000, ret+48(FP)
 13135	RET
 13136
 13137match_nolit_dst_ok_encodeSnappyBlockAsm12B:
 13138	MOVQ  $0x000000cf1bbcdcbb, R8
 13139	MOVQ  SI, DI
 13140	SHRQ  $0x10, SI
 13141	MOVQ  SI, BX
 13142	SHLQ  $0x18, DI
 13143	IMULQ R8, DI
 13144	SHRQ  $0x34, DI
 13145	SHLQ  $0x18, BX
 13146	IMULQ R8, BX
 13147	SHRQ  $0x34, BX
 13148	LEAL  -2(CX), R8
 13149	LEAQ  24(SP)(BX*4), R9
 13150	MOVL  (R9), BX
 13151	MOVL  R8, 24(SP)(DI*4)
 13152	MOVL  CX, (R9)
 13153	CMPL  (DX)(BX*1), SI
 13154	JEQ   match_nolit_loop_encodeSnappyBlockAsm12B
 13155	INCL  CX
 13156	JMP   search_loop_encodeSnappyBlockAsm12B
 13157
 13158emit_remainder_encodeSnappyBlockAsm12B:
 13159	MOVQ src_len+32(FP), CX
 13160	SUBL 12(SP), CX
 13161	LEAQ 3(AX)(CX*1), CX
 13162	CMPQ CX, (SP)
 13163	JB   emit_remainder_ok_encodeSnappyBlockAsm12B
 13164	MOVQ $0x00000000, ret+48(FP)
 13165	RET
 13166
 13167emit_remainder_ok_encodeSnappyBlockAsm12B:
 13168	MOVQ src_len+32(FP), CX
 13169	MOVL 12(SP), BX
 13170	CMPL BX, CX
 13171	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
 13172	MOVL CX, SI
 13173	MOVL CX, 12(SP)
 13174	LEAQ (DX)(BX*1), CX
 13175	SUBL BX, SI
 13176	LEAL -1(SI), DX
 13177	CMPL DX, $0x3c
 13178	JB   one_byte_emit_remainder_encodeSnappyBlockAsm12B
 13179	CMPL DX, $0x00000100
 13180	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm12B
 13181	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm12B
 13182
 13183three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
 13184	MOVB $0xf4, (AX)
 13185	MOVW DX, 1(AX)
 13186	ADDQ $0x03, AX
 13187	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
 13188
 13189two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
 13190	MOVB $0xf0, (AX)
 13191	MOVB DL, 1(AX)
 13192	ADDQ $0x02, AX
 13193	CMPL DX, $0x40
 13194	JB   memmove_emit_remainder_encodeSnappyBlockAsm12B
 13195	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
 13196
 13197one_byte_emit_remainder_encodeSnappyBlockAsm12B:
 13198	SHLB $0x02, DL
 13199	MOVB DL, (AX)
 13200	ADDQ $0x01, AX
 13201
 13202memmove_emit_remainder_encodeSnappyBlockAsm12B:
 13203	LEAQ (AX)(SI*1), DX
 13204	MOVL SI, BX
 13205
 13206	// genMemMoveShort
 13207	CMPQ BX, $0x03
 13208	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
 13209	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
 13210	CMPQ BX, $0x08
 13211	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7
 13212	CMPQ BX, $0x10
 13213	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
 13214	CMPQ BX, $0x20
 13215	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
 13216	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
 13217
 13218emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
 13219	MOVB (CX), SI
 13220	MOVB -1(CX)(BX*1), CL
 13221	MOVB SI, (AX)
 13222	MOVB CL, -1(AX)(BX*1)
 13223	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 13224
 13225emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
 13226	MOVW (CX), SI
 13227	MOVB 2(CX), CL
 13228	MOVW SI, (AX)
 13229	MOVB CL, 2(AX)
 13230	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 13231
 13232emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
 13233	MOVL (CX), SI
 13234	MOVL -4(CX)(BX*1), CX
 13235	MOVL SI, (AX)
 13236	MOVL CX, -4(AX)(BX*1)
 13237	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 13238
 13239emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
 13240	MOVQ (CX), SI
 13241	MOVQ -8(CX)(BX*1), CX
 13242	MOVQ SI, (AX)
 13243	MOVQ CX, -8(AX)(BX*1)
 13244	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 13245
 13246emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
 13247	MOVOU (CX), X0
 13248	MOVOU -16(CX)(BX*1), X1
 13249	MOVOU X0, (AX)
 13250	MOVOU X1, -16(AX)(BX*1)
 13251	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 13252
 13253emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
 13254	MOVOU (CX), X0
 13255	MOVOU 16(CX), X1
 13256	MOVOU -32(CX)(BX*1), X2
 13257	MOVOU -16(CX)(BX*1), X3
 13258	MOVOU X0, (AX)
 13259	MOVOU X1, 16(AX)
 13260	MOVOU X2, -32(AX)(BX*1)
 13261	MOVOU X3, -16(AX)(BX*1)
 13262
 13263memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
 13264	MOVQ DX, AX
 13265	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
 13266
 13267memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
 13268	LEAQ (AX)(SI*1), DX
 13269	MOVL SI, BX
 13270
 13271	// genMemMoveLong
 13272	MOVOU (CX), X0
 13273	MOVOU 16(CX), X1
 13274	MOVOU -32(CX)(BX*1), X2
 13275	MOVOU -16(CX)(BX*1), X3
 13276	MOVQ  BX, DI
 13277	SHRQ  $0x05, DI
 13278	MOVQ  AX, SI
 13279	ANDL  $0x0000001f, SI
 13280	MOVQ  $0x00000040, R8
 13281	SUBQ  SI, R8
 13282	DECQ  DI
 13283	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 13284	LEAQ  -32(CX)(R8*1), SI
 13285	LEAQ  -32(AX)(R8*1), R9
 13286
 13287emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
 13288	MOVOU (SI), X4
 13289	MOVOU 16(SI), X5
 13290	MOVOA X4, (R9)
 13291	MOVOA X5, 16(R9)
 13292	ADDQ  $0x20, R9
 13293	ADDQ  $0x20, SI
 13294	ADDQ  $0x20, R8
 13295	DECQ  DI
 13296	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
 13297
 13298emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
 13299	MOVOU -32(CX)(R8*1), X4
 13300	MOVOU -16(CX)(R8*1), X5
 13301	MOVOA X4, -32(AX)(R8*1)
 13302	MOVOA X5, -16(AX)(R8*1)
 13303	ADDQ  $0x20, R8
 13304	CMPQ  BX, R8
 13305	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 13306	MOVOU X0, (AX)
 13307	MOVOU X1, 16(AX)
 13308	MOVOU X2, -32(AX)(BX*1)
 13309	MOVOU X3, -16(AX)(BX*1)
 13310	MOVQ  DX, AX
 13311
 13312emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
 13313	MOVQ dst_base+0(FP), CX
 13314	SUBQ CX, AX
 13315	MOVQ AX, ret+48(FP)
 13316	RET
 13317
 13318// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
 13319// Requires: BMI, SSE2
 13320TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
 13321	MOVQ dst_base+0(FP), AX
 13322	MOVQ $0x00000020, CX
 13323	LEAQ 24(SP), DX
 13324	PXOR X0, X0
 13325
 13326zero_loop_encodeSnappyBlockAsm10B:
 13327	MOVOU X0, (DX)
 13328	MOVOU X0, 16(DX)
 13329	MOVOU X0, 32(DX)
 13330	MOVOU X0, 48(DX)
 13331	MOVOU X0, 64(DX)
 13332	MOVOU X0, 80(DX)
 13333	MOVOU X0, 96(DX)
 13334	MOVOU X0, 112(DX)
 13335	ADDQ  $0x80, DX
 13336	DECQ  CX
 13337	JNZ   zero_loop_encodeSnappyBlockAsm10B
 13338	MOVL  $0x00000000, 12(SP)
 13339	MOVQ  src_len+32(FP), CX
 13340	LEAQ  -9(CX), DX
 13341	LEAQ  -8(CX), BX
 13342	MOVL  BX, 8(SP)
 13343	SHRQ  $0x05, CX
 13344	SUBL  CX, DX
 13345	LEAQ  (AX)(DX*1), DX
 13346	MOVQ  DX, (SP)
 13347	MOVL  $0x00000001, CX
 13348	MOVL  CX, 16(SP)
 13349	MOVQ  src_base+24(FP), DX
 13350
 13351search_loop_encodeSnappyBlockAsm10B:
 13352	MOVL  CX, BX
 13353	SUBL  12(SP), BX
 13354	SHRL  $0x05, BX
 13355	LEAL  4(CX)(BX*1), BX
 13356	CMPL  BX, 8(SP)
 13357	JAE   emit_remainder_encodeSnappyBlockAsm10B
 13358	MOVQ  (DX)(CX*1), SI
 13359	MOVL  BX, 20(SP)
 13360	MOVQ  $0x9e3779b1, R8
 13361	MOVQ  SI, R9
 13362	MOVQ  SI, R10
 13363	SHRQ  $0x08, R10
 13364	SHLQ  $0x20, R9
 13365	IMULQ R8, R9
 13366	SHRQ  $0x36, R9
 13367	SHLQ  $0x20, R10
 13368	IMULQ R8, R10
 13369	SHRQ  $0x36, R10
 13370	MOVL  24(SP)(R9*4), BX
 13371	MOVL  24(SP)(R10*4), DI
 13372	MOVL  CX, 24(SP)(R9*4)
 13373	LEAL  1(CX), R9
 13374	MOVL  R9, 24(SP)(R10*4)
 13375	MOVQ  SI, R9
 13376	SHRQ  $0x10, R9
 13377	SHLQ  $0x20, R9
 13378	IMULQ R8, R9
 13379	SHRQ  $0x36, R9
 13380	MOVL  CX, R8
 13381	SUBL  16(SP), R8
 13382	MOVL  1(DX)(R8*1), R10
 13383	MOVQ  SI, R8
 13384	SHRQ  $0x08, R8
 13385	CMPL  R8, R10
 13386	JNE   no_repeat_found_encodeSnappyBlockAsm10B
 13387	LEAL  1(CX), SI
 13388	MOVL  12(SP), BX
 13389	MOVL  SI, DI
 13390	SUBL  16(SP), DI
 13391	JZ    repeat_extend_back_end_encodeSnappyBlockAsm10B
 13392
 13393repeat_extend_back_loop_encodeSnappyBlockAsm10B:
 13394	CMPL SI, BX
 13395	JBE  repeat_extend_back_end_encodeSnappyBlockAsm10B
 13396	MOVB -1(DX)(DI*1), R8
 13397	MOVB -1(DX)(SI*1), R9
 13398	CMPB R8, R9
 13399	JNE  repeat_extend_back_end_encodeSnappyBlockAsm10B
 13400	LEAL -1(SI), SI
 13401	DECL DI
 13402	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm10B
 13403
 13404repeat_extend_back_end_encodeSnappyBlockAsm10B:
 13405	MOVL SI, BX
 13406	SUBL 12(SP), BX
 13407	LEAQ 3(AX)(BX*1), BX
 13408	CMPQ BX, (SP)
 13409	JB   repeat_dst_size_check_encodeSnappyBlockAsm10B
 13410	MOVQ $0x00000000, ret+48(FP)
 13411	RET
 13412
 13413repeat_dst_size_check_encodeSnappyBlockAsm10B:
 13414	MOVL 12(SP), BX
 13415	CMPL BX, SI
 13416	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
 13417	MOVL SI, DI
 13418	MOVL SI, 12(SP)
 13419	LEAQ (DX)(BX*1), R8
 13420	SUBL BX, DI
 13421	LEAL -1(DI), BX
 13422	CMPL BX, $0x3c
 13423	JB   one_byte_repeat_emit_encodeSnappyBlockAsm10B
 13424	CMPL BX, $0x00000100
 13425	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm10B
 13426	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm10B
 13427
 13428three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
 13429	MOVB $0xf4, (AX)
 13430	MOVW BX, 1(AX)
 13431	ADDQ $0x03, AX
 13432	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
 13433
 13434two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
 13435	MOVB $0xf0, (AX)
 13436	MOVB BL, 1(AX)
 13437	ADDQ $0x02, AX
 13438	CMPL BX, $0x40
 13439	JB   memmove_repeat_emit_encodeSnappyBlockAsm10B
 13440	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
 13441
 13442one_byte_repeat_emit_encodeSnappyBlockAsm10B:
 13443	SHLB $0x02, BL
 13444	MOVB BL, (AX)
 13445	ADDQ $0x01, AX
 13446
 13447memmove_repeat_emit_encodeSnappyBlockAsm10B:
 13448	LEAQ (AX)(DI*1), BX
 13449
 13450	// genMemMoveShort
 13451	CMPQ DI, $0x08
 13452	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
 13453	CMPQ DI, $0x10
 13454	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
 13455	CMPQ DI, $0x20
 13456	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
 13457	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
 13458
 13459emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
 13460	MOVQ (R8), R9
 13461	MOVQ R9, (AX)
 13462	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 13463
 13464emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
 13465	MOVQ (R8), R9
 13466	MOVQ -8(R8)(DI*1), R8
 13467	MOVQ R9, (AX)
 13468	MOVQ R8, -8(AX)(DI*1)
 13469	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 13470
 13471emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
 13472	MOVOU (R8), X0
 13473	MOVOU -16(R8)(DI*1), X1
 13474	MOVOU X0, (AX)
 13475	MOVOU X1, -16(AX)(DI*1)
 13476	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 13477
 13478emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
 13479	MOVOU (R8), X0
 13480	MOVOU 16(R8), X1
 13481	MOVOU -32(R8)(DI*1), X2
 13482	MOVOU -16(R8)(DI*1), X3
 13483	MOVOU X0, (AX)
 13484	MOVOU X1, 16(AX)
 13485	MOVOU X2, -32(AX)(DI*1)
 13486	MOVOU X3, -16(AX)(DI*1)
 13487
 13488memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
 13489	MOVQ BX, AX
 13490	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
 13491
 13492memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
 13493	LEAQ (AX)(DI*1), BX
 13494
 13495	// genMemMoveLong
 13496	MOVOU (R8), X0
 13497	MOVOU 16(R8), X1
 13498	MOVOU -32(R8)(DI*1), X2
 13499	MOVOU -16(R8)(DI*1), X3
 13500	MOVQ  DI, R10
 13501	SHRQ  $0x05, R10
 13502	MOVQ  AX, R9
 13503	ANDL  $0x0000001f, R9
 13504	MOVQ  $0x00000040, R11
 13505	SUBQ  R9, R11
 13506	DECQ  R10
 13507	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 13508	LEAQ  -32(R8)(R11*1), R9
 13509	LEAQ  -32(AX)(R11*1), R12
 13510
 13511emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
 13512	MOVOU (R9), X4
 13513	MOVOU 16(R9), X5
 13514	MOVOA X4, (R12)
 13515	MOVOA X5, 16(R12)
 13516	ADDQ  $0x20, R12
 13517	ADDQ  $0x20, R9
 13518	ADDQ  $0x20, R11
 13519	DECQ  R10
 13520	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
 13521
 13522emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
 13523	MOVOU -32(R8)(R11*1), X4
 13524	MOVOU -16(R8)(R11*1), X5
 13525	MOVOA X4, -32(AX)(R11*1)
 13526	MOVOA X5, -16(AX)(R11*1)
 13527	ADDQ  $0x20, R11
 13528	CMPQ  DI, R11
 13529	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 13530	MOVOU X0, (AX)
 13531	MOVOU X1, 16(AX)
 13532	MOVOU X2, -32(AX)(DI*1)
 13533	MOVOU X3, -16(AX)(DI*1)
 13534	MOVQ  BX, AX
 13535
 13536emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
 13537	ADDL $0x05, CX
 13538	MOVL CX, BX
 13539	SUBL 16(SP), BX
 13540	MOVQ src_len+32(FP), DI
 13541	SUBL CX, DI
 13542	LEAQ (DX)(CX*1), R8
 13543	LEAQ (DX)(BX*1), BX
 13544
 13545	// matchLen
 13546	XORL R10, R10
 13547
 13548matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
 13549	CMPL DI, $0x10
 13550	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
 13551	MOVQ (R8)(R10*1), R9
 13552	MOVQ 8(R8)(R10*1), R11
 13553	XORQ (BX)(R10*1), R9
 13554	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
 13555	XORQ 8(BX)(R10*1), R11
 13556	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
 13557	LEAL -16(DI), DI
 13558	LEAL 16(R10), R10
 13559	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
 13560
 13561matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
 13562#ifdef GOAMD64_v3
 13563	TZCNTQ R11, R11
 13564
 13565#else
 13566	BSFQ R11, R11
 13567
 13568#endif
 13569	SARQ $0x03, R11
 13570	LEAL 8(R10)(R11*1), R10
 13571	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 13572
 13573matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
 13574	CMPL DI, $0x08
 13575	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
 13576	MOVQ (R8)(R10*1), R9
 13577	XORQ (BX)(R10*1), R9
 13578	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
 13579	LEAL -8(DI), DI
 13580	LEAL 8(R10), R10
 13581	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
 13582
 13583matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
 13584#ifdef GOAMD64_v3
 13585	TZCNTQ R9, R9
 13586
 13587#else
 13588	BSFQ R9, R9
 13589
 13590#endif
 13591	SARQ $0x03, R9
 13592	LEAL (R10)(R9*1), R10
 13593	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 13594
 13595matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
 13596	CMPL DI, $0x04
 13597	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
 13598	MOVL (R8)(R10*1), R9
 13599	CMPL (BX)(R10*1), R9
 13600	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
 13601	LEAL -4(DI), DI
 13602	LEAL 4(R10), R10
 13603
 13604matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
 13605	CMPL DI, $0x01
 13606	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
 13607	JB   repeat_extend_forward_end_encodeSnappyBlockAsm10B
 13608	MOVW (R8)(R10*1), R9
 13609	CMPW (BX)(R10*1), R9
 13610	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
 13611	LEAL 2(R10), R10
 13612	SUBL $0x02, DI
 13613	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm10B
 13614
 13615matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
 13616	MOVB (R8)(R10*1), R9
 13617	CMPB (BX)(R10*1), R9
 13618	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 13619	LEAL 1(R10), R10
 13620
 13621repeat_extend_forward_end_encodeSnappyBlockAsm10B:
 13622	ADDL R10, CX
 13623	MOVL CX, BX
 13624	SUBL SI, BX
 13625	MOVL 16(SP), SI
 13626
 13627	// emitCopy
 13628two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
 13629	CMPL BX, $0x40
 13630	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
 13631	MOVB $0xee, (AX)
 13632	MOVW SI, 1(AX)
 13633	LEAL -60(BX), BX
 13634	ADDQ $0x03, AX
 13635	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
 13636
 13637two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
 13638	MOVL BX, DI
 13639	SHLL $0x02, DI
 13640	CMPL BX, $0x0c
 13641	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
 13642	CMPL SI, $0x00000800
 13643	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
 13644	LEAL -15(DI), DI
 13645	MOVB SI, 1(AX)
 13646	SHRL $0x08, SI
 13647	SHLL $0x05, SI
 13648	ORL  SI, DI
 13649	MOVB DI, (AX)
 13650	ADDQ $0x02, AX
 13651	JMP  repeat_end_emit_encodeSnappyBlockAsm10B
 13652
 13653emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
 13654	LEAL -2(DI), DI
 13655	MOVB DI, (AX)
 13656	MOVW SI, 1(AX)
 13657	ADDQ $0x03, AX
 13658
 13659repeat_end_emit_encodeSnappyBlockAsm10B:
 13660	MOVL CX, 12(SP)
 13661	JMP  search_loop_encodeSnappyBlockAsm10B
 13662
 13663no_repeat_found_encodeSnappyBlockAsm10B:
 13664	CMPL (DX)(BX*1), SI
 13665	JEQ  candidate_match_encodeSnappyBlockAsm10B
 13666	SHRQ $0x08, SI
 13667	MOVL 24(SP)(R9*4), BX
 13668	LEAL 2(CX), R8
 13669	CMPL (DX)(DI*1), SI
 13670	JEQ  candidate2_match_encodeSnappyBlockAsm10B
 13671	MOVL R8, 24(SP)(R9*4)
 13672	SHRQ $0x08, SI
 13673	CMPL (DX)(BX*1), SI
 13674	JEQ  candidate3_match_encodeSnappyBlockAsm10B
 13675	MOVL 20(SP), CX
 13676	JMP  search_loop_encodeSnappyBlockAsm10B
 13677
 13678candidate3_match_encodeSnappyBlockAsm10B:
 13679	ADDL $0x02, CX
 13680	JMP  candidate_match_encodeSnappyBlockAsm10B
 13681
 13682candidate2_match_encodeSnappyBlockAsm10B:
 13683	MOVL R8, 24(SP)(R9*4)
 13684	INCL CX
 13685	MOVL DI, BX
 13686
 13687candidate_match_encodeSnappyBlockAsm10B:
 13688	MOVL  12(SP), SI
 13689	TESTL BX, BX
 13690	JZ    match_extend_back_end_encodeSnappyBlockAsm10B
 13691
 13692match_extend_back_loop_encodeSnappyBlockAsm10B:
 13693	CMPL CX, SI
 13694	JBE  match_extend_back_end_encodeSnappyBlockAsm10B
 13695	MOVB -1(DX)(BX*1), DI
 13696	MOVB -1(DX)(CX*1), R8
 13697	CMPB DI, R8
 13698	JNE  match_extend_back_end_encodeSnappyBlockAsm10B
 13699	LEAL -1(CX), CX
 13700	DECL BX
 13701	JZ   match_extend_back_end_encodeSnappyBlockAsm10B
 13702	JMP  match_extend_back_loop_encodeSnappyBlockAsm10B
 13703
 13704match_extend_back_end_encodeSnappyBlockAsm10B:
 13705	MOVL CX, SI
 13706	SUBL 12(SP), SI
 13707	LEAQ 3(AX)(SI*1), SI
 13708	CMPQ SI, (SP)
 13709	JB   match_dst_size_check_encodeSnappyBlockAsm10B
 13710	MOVQ $0x00000000, ret+48(FP)
 13711	RET
 13712
 13713match_dst_size_check_encodeSnappyBlockAsm10B:
 13714	MOVL CX, SI
 13715	MOVL 12(SP), DI
 13716	CMPL DI, SI
 13717	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
 13718	MOVL SI, R8
 13719	MOVL SI, 12(SP)
 13720	LEAQ (DX)(DI*1), SI
 13721	SUBL DI, R8
 13722	LEAL -1(R8), DI
 13723	CMPL DI, $0x3c
 13724	JB   one_byte_match_emit_encodeSnappyBlockAsm10B
 13725	CMPL DI, $0x00000100
 13726	JB   two_bytes_match_emit_encodeSnappyBlockAsm10B
 13727	JB   three_bytes_match_emit_encodeSnappyBlockAsm10B
 13728
 13729three_bytes_match_emit_encodeSnappyBlockAsm10B:
 13730	MOVB $0xf4, (AX)
 13731	MOVW DI, 1(AX)
 13732	ADDQ $0x03, AX
 13733	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
 13734
 13735two_bytes_match_emit_encodeSnappyBlockAsm10B:
 13736	MOVB $0xf0, (AX)
 13737	MOVB DI, 1(AX)
 13738	ADDQ $0x02, AX
 13739	CMPL DI, $0x40
 13740	JB   memmove_match_emit_encodeSnappyBlockAsm10B
 13741	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
 13742
 13743one_byte_match_emit_encodeSnappyBlockAsm10B:
 13744	SHLB $0x02, DI
 13745	MOVB DI, (AX)
 13746	ADDQ $0x01, AX
 13747
 13748memmove_match_emit_encodeSnappyBlockAsm10B:
 13749	LEAQ (AX)(R8*1), DI
 13750
 13751	// genMemMoveShort
 13752	CMPQ R8, $0x08
 13753	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
 13754	CMPQ R8, $0x10
 13755	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
 13756	CMPQ R8, $0x20
 13757	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
 13758	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
 13759
 13760emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
 13761	MOVQ (SI), R9
 13762	MOVQ R9, (AX)
 13763	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 13764
 13765emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
 13766	MOVQ (SI), R9
 13767	MOVQ -8(SI)(R8*1), SI
 13768	MOVQ R9, (AX)
 13769	MOVQ SI, -8(AX)(R8*1)
 13770	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 13771
 13772emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
 13773	MOVOU (SI), X0
 13774	MOVOU -16(SI)(R8*1), X1
 13775	MOVOU X0, (AX)
 13776	MOVOU X1, -16(AX)(R8*1)
 13777	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 13778
 13779emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
 13780	MOVOU (SI), X0
 13781	MOVOU 16(SI), X1
 13782	MOVOU -32(SI)(R8*1), X2
 13783	MOVOU -16(SI)(R8*1), X3
 13784	MOVOU X0, (AX)
 13785	MOVOU X1, 16(AX)
 13786	MOVOU X2, -32(AX)(R8*1)
 13787	MOVOU X3, -16(AX)(R8*1)
 13788
 13789memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
 13790	MOVQ DI, AX
 13791	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
 13792
 13793memmove_long_match_emit_encodeSnappyBlockAsm10B:
 13794	LEAQ (AX)(R8*1), DI
 13795
 13796	// genMemMoveLong
 13797	MOVOU (SI), X0
 13798	MOVOU 16(SI), X1
 13799	MOVOU -32(SI)(R8*1), X2
 13800	MOVOU -16(SI)(R8*1), X3
 13801	MOVQ  R8, R10
 13802	SHRQ  $0x05, R10
 13803	MOVQ  AX, R9
 13804	ANDL  $0x0000001f, R9
 13805	MOVQ  $0x00000040, R11
 13806	SUBQ  R9, R11
 13807	DECQ  R10
 13808	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 13809	LEAQ  -32(SI)(R11*1), R9
 13810	LEAQ  -32(AX)(R11*1), R12
 13811
 13812emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
 13813	MOVOU (R9), X4
 13814	MOVOU 16(R9), X5
 13815	MOVOA X4, (R12)
 13816	MOVOA X5, 16(R12)
 13817	ADDQ  $0x20, R12
 13818	ADDQ  $0x20, R9
 13819	ADDQ  $0x20, R11
 13820	DECQ  R10
 13821	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
 13822
 13823emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
 13824	MOVOU -32(SI)(R11*1), X4
 13825	MOVOU -16(SI)(R11*1), X5
 13826	MOVOA X4, -32(AX)(R11*1)
 13827	MOVOA X5, -16(AX)(R11*1)
 13828	ADDQ  $0x20, R11
 13829	CMPQ  R8, R11
 13830	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 13831	MOVOU X0, (AX)
 13832	MOVOU X1, 16(AX)
 13833	MOVOU X2, -32(AX)(R8*1)
 13834	MOVOU X3, -16(AX)(R8*1)
 13835	MOVQ  DI, AX
 13836
 13837emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
 13838match_nolit_loop_encodeSnappyBlockAsm10B:
 13839	MOVL CX, SI
 13840	SUBL BX, SI
 13841	MOVL SI, 16(SP)
 13842	ADDL $0x04, CX
 13843	ADDL $0x04, BX
 13844	MOVQ src_len+32(FP), SI
 13845	SUBL CX, SI
 13846	LEAQ (DX)(CX*1), DI
 13847	LEAQ (DX)(BX*1), BX
 13848
 13849	// matchLen
 13850	XORL R9, R9
 13851
 13852matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
 13853	CMPL SI, $0x10
 13854	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
 13855	MOVQ (DI)(R9*1), R8
 13856	MOVQ 8(DI)(R9*1), R10
 13857	XORQ (BX)(R9*1), R8
 13858	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
 13859	XORQ 8(BX)(R9*1), R10
 13860	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
 13861	LEAL -16(SI), SI
 13862	LEAL 16(R9), R9
 13863	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
 13864
 13865matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
 13866#ifdef GOAMD64_v3
 13867	TZCNTQ R10, R10
 13868
 13869#else
 13870	BSFQ R10, R10
 13871
 13872#endif
 13873	SARQ $0x03, R10
 13874	LEAL 8(R9)(R10*1), R9
 13875	JMP  match_nolit_end_encodeSnappyBlockAsm10B
 13876
 13877matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
 13878	CMPL SI, $0x08
 13879	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
 13880	MOVQ (DI)(R9*1), R8
 13881	XORQ (BX)(R9*1), R8
 13882	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
 13883	LEAL -8(SI), SI
 13884	LEAL 8(R9), R9
 13885	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
 13886
 13887matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
 13888#ifdef GOAMD64_v3
 13889	TZCNTQ R8, R8
 13890
 13891#else
 13892	BSFQ R8, R8
 13893
 13894#endif
 13895	SARQ $0x03, R8
 13896	LEAL (R9)(R8*1), R9
 13897	JMP  match_nolit_end_encodeSnappyBlockAsm10B
 13898
 13899matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
 13900	CMPL SI, $0x04
 13901	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
 13902	MOVL (DI)(R9*1), R8
 13903	CMPL (BX)(R9*1), R8
 13904	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
 13905	LEAL -4(SI), SI
 13906	LEAL 4(R9), R9
 13907
 13908matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
 13909	CMPL SI, $0x01
 13910	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
 13911	JB   match_nolit_end_encodeSnappyBlockAsm10B
 13912	MOVW (DI)(R9*1), R8
 13913	CMPW (BX)(R9*1), R8
 13914	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
 13915	LEAL 2(R9), R9
 13916	SUBL $0x02, SI
 13917	JZ   match_nolit_end_encodeSnappyBlockAsm10B
 13918
 13919matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
 13920	MOVB (DI)(R9*1), R8
 13921	CMPB (BX)(R9*1), R8
 13922	JNE  match_nolit_end_encodeSnappyBlockAsm10B
 13923	LEAL 1(R9), R9
 13924
 13925match_nolit_end_encodeSnappyBlockAsm10B:
 13926	ADDL R9, CX
 13927	MOVL 16(SP), BX
 13928	ADDL $0x04, R9
 13929	MOVL CX, 12(SP)
 13930
 13931	// emitCopy
 13932two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
 13933	CMPL R9, $0x40
 13934	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
 13935	MOVB $0xee, (AX)
 13936	MOVW BX, 1(AX)
 13937	LEAL -60(R9), R9
 13938	ADDQ $0x03, AX
 13939	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
 13940
 13941two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
 13942	MOVL R9, SI
 13943	SHLL $0x02, SI
 13944	CMPL R9, $0x0c
 13945	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
 13946	CMPL BX, $0x00000800
 13947	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
 13948	LEAL -15(SI), SI
 13949	MOVB BL, 1(AX)
 13950	SHRL $0x08, BX
 13951	SHLL $0x05, BX
 13952	ORL  BX, SI
 13953	MOVB SI, (AX)
 13954	ADDQ $0x02, AX
 13955	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
 13956
 13957emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
 13958	LEAL -2(SI), SI
 13959	MOVB SI, (AX)
 13960	MOVW BX, 1(AX)
 13961	ADDQ $0x03, AX
 13962
 13963match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
 13964	CMPL CX, 8(SP)
 13965	JAE  emit_remainder_encodeSnappyBlockAsm10B
 13966	MOVQ -2(DX)(CX*1), SI
 13967	CMPQ AX, (SP)
 13968	JB   match_nolit_dst_ok_encodeSnappyBlockAsm10B
 13969	MOVQ $0x00000000, ret+48(FP)
 13970	RET
 13971
 13972match_nolit_dst_ok_encodeSnappyBlockAsm10B:
 13973	MOVQ  $0x9e3779b1, R8
 13974	MOVQ  SI, DI
 13975	SHRQ  $0x10, SI
 13976	MOVQ  SI, BX
 13977	SHLQ  $0x20, DI
 13978	IMULQ R8, DI
 13979	SHRQ  $0x36, DI
 13980	SHLQ  $0x20, BX
 13981	IMULQ R8, BX
 13982	SHRQ  $0x36, BX
 13983	LEAL  -2(CX), R8
 13984	LEAQ  24(SP)(BX*4), R9
 13985	MOVL  (R9), BX
 13986	MOVL  R8, 24(SP)(DI*4)
 13987	MOVL  CX, (R9)
 13988	CMPL  (DX)(BX*1), SI
 13989	JEQ   match_nolit_loop_encodeSnappyBlockAsm10B
 13990	INCL  CX
 13991	JMP   search_loop_encodeSnappyBlockAsm10B
 13992
 13993emit_remainder_encodeSnappyBlockAsm10B:
 13994	MOVQ src_len+32(FP), CX
 13995	SUBL 12(SP), CX
 13996	LEAQ 3(AX)(CX*1), CX
 13997	CMPQ CX, (SP)
 13998	JB   emit_remainder_ok_encodeSnappyBlockAsm10B
 13999	MOVQ $0x00000000, ret+48(FP)
 14000	RET
 14001
 14002emit_remainder_ok_encodeSnappyBlockAsm10B:
 14003	MOVQ src_len+32(FP), CX
 14004	MOVL 12(SP), BX
 14005	CMPL BX, CX
 14006	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
 14007	MOVL CX, SI
 14008	MOVL CX, 12(SP)
 14009	LEAQ (DX)(BX*1), CX
 14010	SUBL BX, SI
 14011	LEAL -1(SI), DX
 14012	CMPL DX, $0x3c
 14013	JB   one_byte_emit_remainder_encodeSnappyBlockAsm10B
 14014	CMPL DX, $0x00000100
 14015	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm10B
 14016	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm10B
 14017
 14018three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
 14019	MOVB $0xf4, (AX)
 14020	MOVW DX, 1(AX)
 14021	ADDQ $0x03, AX
 14022	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
 14023
 14024two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
 14025	MOVB $0xf0, (AX)
 14026	MOVB DL, 1(AX)
 14027	ADDQ $0x02, AX
 14028	CMPL DX, $0x40
 14029	JB   memmove_emit_remainder_encodeSnappyBlockAsm10B
 14030	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
 14031
 14032one_byte_emit_remainder_encodeSnappyBlockAsm10B:
 14033	SHLB $0x02, DL
 14034	MOVB DL, (AX)
 14035	ADDQ $0x01, AX
 14036
 14037memmove_emit_remainder_encodeSnappyBlockAsm10B:
 14038	LEAQ (AX)(SI*1), DX
 14039	MOVL SI, BX
 14040
 14041	// genMemMoveShort
 14042	CMPQ BX, $0x03
 14043	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
 14044	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
 14045	CMPQ BX, $0x08
 14046	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7
 14047	CMPQ BX, $0x10
 14048	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
 14049	CMPQ BX, $0x20
 14050	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
 14051	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
 14052
 14053emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
 14054	MOVB (CX), SI
 14055	MOVB -1(CX)(BX*1), CL
 14056	MOVB SI, (AX)
 14057	MOVB CL, -1(AX)(BX*1)
 14058	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 14059
 14060emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
 14061	MOVW (CX), SI
 14062	MOVB 2(CX), CL
 14063	MOVW SI, (AX)
 14064	MOVB CL, 2(AX)
 14065	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 14066
 14067emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
 14068	MOVL (CX), SI
 14069	MOVL -4(CX)(BX*1), CX
 14070	MOVL SI, (AX)
 14071	MOVL CX, -4(AX)(BX*1)
 14072	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 14073
 14074emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
 14075	MOVQ (CX), SI
 14076	MOVQ -8(CX)(BX*1), CX
 14077	MOVQ SI, (AX)
 14078	MOVQ CX, -8(AX)(BX*1)
 14079	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 14080
 14081emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
 14082	MOVOU (CX), X0
 14083	MOVOU -16(CX)(BX*1), X1
 14084	MOVOU X0, (AX)
 14085	MOVOU X1, -16(AX)(BX*1)
 14086	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 14087
 14088emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
 14089	MOVOU (CX), X0
 14090	MOVOU 16(CX), X1
 14091	MOVOU -32(CX)(BX*1), X2
 14092	MOVOU -16(CX)(BX*1), X3
 14093	MOVOU X0, (AX)
 14094	MOVOU X1, 16(AX)
 14095	MOVOU X2, -32(AX)(BX*1)
 14096	MOVOU X3, -16(AX)(BX*1)
 14097
 14098memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
 14099	MOVQ DX, AX
 14100	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
 14101
 14102memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
 14103	LEAQ (AX)(SI*1), DX
 14104	MOVL SI, BX
 14105
 14106	// genMemMoveLong
 14107	MOVOU (CX), X0
 14108	MOVOU 16(CX), X1
 14109	MOVOU -32(CX)(BX*1), X2
 14110	MOVOU -16(CX)(BX*1), X3
 14111	MOVQ  BX, DI
 14112	SHRQ  $0x05, DI
 14113	MOVQ  AX, SI
 14114	ANDL  $0x0000001f, SI
 14115	MOVQ  $0x00000040, R8
 14116	SUBQ  SI, R8
 14117	DECQ  DI
 14118	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 14119	LEAQ  -32(CX)(R8*1), SI
 14120	LEAQ  -32(AX)(R8*1), R9
 14121
 14122emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
 14123	MOVOU (SI), X4
 14124	MOVOU 16(SI), X5
 14125	MOVOA X4, (R9)
 14126	MOVOA X5, 16(R9)
 14127	ADDQ  $0x20, R9
 14128	ADDQ  $0x20, SI
 14129	ADDQ  $0x20, R8
 14130	DECQ  DI
 14131	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
 14132
 14133emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
 14134	MOVOU -32(CX)(R8*1), X4
 14135	MOVOU -16(CX)(R8*1), X5
 14136	MOVOA X4, -32(AX)(R8*1)
 14137	MOVOA X5, -16(AX)(R8*1)
 14138	ADDQ  $0x20, R8
 14139	CMPQ  BX, R8
 14140	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 14141	MOVOU X0, (AX)
 14142	MOVOU X1, 16(AX)
 14143	MOVOU X2, -32(AX)(BX*1)
 14144	MOVOU X3, -16(AX)(BX*1)
 14145	MOVQ  DX, AX
 14146
 14147emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
 14148	MOVQ dst_base+0(FP), CX
 14149	SUBQ CX, AX
 14150	MOVQ AX, ret+48(FP)
 14151	RET
 14152
 14153// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
 14154// Requires: BMI, SSE2
 14155TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
 14156	MOVQ dst_base+0(FP), AX
 14157	MOVQ $0x00000008, CX
 14158	LEAQ 24(SP), DX
 14159	PXOR X0, X0
 14160
 14161zero_loop_encodeSnappyBlockAsm8B:
 14162	MOVOU X0, (DX)
 14163	MOVOU X0, 16(DX)
 14164	MOVOU X0, 32(DX)
 14165	MOVOU X0, 48(DX)
 14166	MOVOU X0, 64(DX)
 14167	MOVOU X0, 80(DX)
 14168	MOVOU X0, 96(DX)
 14169	MOVOU X0, 112(DX)
 14170	ADDQ  $0x80, DX
 14171	DECQ  CX
 14172	JNZ   zero_loop_encodeSnappyBlockAsm8B
 14173	MOVL  $0x00000000, 12(SP)
 14174	MOVQ  src_len+32(FP), CX
 14175	LEAQ  -9(CX), DX
 14176	LEAQ  -8(CX), BX
 14177	MOVL  BX, 8(SP)
 14178	SHRQ  $0x05, CX
 14179	SUBL  CX, DX
 14180	LEAQ  (AX)(DX*1), DX
 14181	MOVQ  DX, (SP)
 14182	MOVL  $0x00000001, CX
 14183	MOVL  CX, 16(SP)
 14184	MOVQ  src_base+24(FP), DX
 14185
 14186search_loop_encodeSnappyBlockAsm8B:
 14187	MOVL  CX, BX
 14188	SUBL  12(SP), BX
 14189	SHRL  $0x04, BX
 14190	LEAL  4(CX)(BX*1), BX
 14191	CMPL  BX, 8(SP)
 14192	JAE   emit_remainder_encodeSnappyBlockAsm8B
 14193	MOVQ  (DX)(CX*1), SI
 14194	MOVL  BX, 20(SP)
 14195	MOVQ  $0x9e3779b1, R8
 14196	MOVQ  SI, R9
 14197	MOVQ  SI, R10
 14198	SHRQ  $0x08, R10
 14199	SHLQ  $0x20, R9
 14200	IMULQ R8, R9
 14201	SHRQ  $0x38, R9
 14202	SHLQ  $0x20, R10
 14203	IMULQ R8, R10
 14204	SHRQ  $0x38, R10
 14205	MOVL  24(SP)(R9*4), BX
 14206	MOVL  24(SP)(R10*4), DI
 14207	MOVL  CX, 24(SP)(R9*4)
 14208	LEAL  1(CX), R9
 14209	MOVL  R9, 24(SP)(R10*4)
 14210	MOVQ  SI, R9
 14211	SHRQ  $0x10, R9
 14212	SHLQ  $0x20, R9
 14213	IMULQ R8, R9
 14214	SHRQ  $0x38, R9
 14215	MOVL  CX, R8
 14216	SUBL  16(SP), R8
 14217	MOVL  1(DX)(R8*1), R10
 14218	MOVQ  SI, R8
 14219	SHRQ  $0x08, R8
 14220	CMPL  R8, R10
 14221	JNE   no_repeat_found_encodeSnappyBlockAsm8B
 14222	LEAL  1(CX), SI
 14223	MOVL  12(SP), BX
 14224	MOVL  SI, DI
 14225	SUBL  16(SP), DI
 14226	JZ    repeat_extend_back_end_encodeSnappyBlockAsm8B
 14227
 14228repeat_extend_back_loop_encodeSnappyBlockAsm8B:
 14229	CMPL SI, BX
 14230	JBE  repeat_extend_back_end_encodeSnappyBlockAsm8B
 14231	MOVB -1(DX)(DI*1), R8
 14232	MOVB -1(DX)(SI*1), R9
 14233	CMPB R8, R9
 14234	JNE  repeat_extend_back_end_encodeSnappyBlockAsm8B
 14235	LEAL -1(SI), SI
 14236	DECL DI
 14237	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm8B
 14238
 14239repeat_extend_back_end_encodeSnappyBlockAsm8B:
 14240	MOVL SI, BX
 14241	SUBL 12(SP), BX
 14242	LEAQ 3(AX)(BX*1), BX
 14243	CMPQ BX, (SP)
 14244	JB   repeat_dst_size_check_encodeSnappyBlockAsm8B
 14245	MOVQ $0x00000000, ret+48(FP)
 14246	RET
 14247
 14248repeat_dst_size_check_encodeSnappyBlockAsm8B:
 14249	MOVL 12(SP), BX
 14250	CMPL BX, SI
 14251	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
 14252	MOVL SI, DI
 14253	MOVL SI, 12(SP)
 14254	LEAQ (DX)(BX*1), R8
 14255	SUBL BX, DI
 14256	LEAL -1(DI), BX
 14257	CMPL BX, $0x3c
 14258	JB   one_byte_repeat_emit_encodeSnappyBlockAsm8B
 14259	CMPL BX, $0x00000100
 14260	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm8B
 14261	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm8B
 14262
 14263three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
 14264	MOVB $0xf4, (AX)
 14265	MOVW BX, 1(AX)
 14266	ADDQ $0x03, AX
 14267	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
 14268
 14269two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
 14270	MOVB $0xf0, (AX)
 14271	MOVB BL, 1(AX)
 14272	ADDQ $0x02, AX
 14273	CMPL BX, $0x40
 14274	JB   memmove_repeat_emit_encodeSnappyBlockAsm8B
 14275	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
 14276
 14277one_byte_repeat_emit_encodeSnappyBlockAsm8B:
 14278	SHLB $0x02, BL
 14279	MOVB BL, (AX)
 14280	ADDQ $0x01, AX
 14281
 14282memmove_repeat_emit_encodeSnappyBlockAsm8B:
 14283	LEAQ (AX)(DI*1), BX
 14284
 14285	// genMemMoveShort
 14286	CMPQ DI, $0x08
 14287	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
 14288	CMPQ DI, $0x10
 14289	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
 14290	CMPQ DI, $0x20
 14291	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
 14292	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
 14293
 14294emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
 14295	MOVQ (R8), R9
 14296	MOVQ R9, (AX)
 14297	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 14298
 14299emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
 14300	MOVQ (R8), R9
 14301	MOVQ -8(R8)(DI*1), R8
 14302	MOVQ R9, (AX)
 14303	MOVQ R8, -8(AX)(DI*1)
 14304	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 14305
 14306emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
 14307	MOVOU (R8), X0
 14308	MOVOU -16(R8)(DI*1), X1
 14309	MOVOU X0, (AX)
 14310	MOVOU X1, -16(AX)(DI*1)
 14311	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 14312
 14313emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
 14314	MOVOU (R8), X0
 14315	MOVOU 16(R8), X1
 14316	MOVOU -32(R8)(DI*1), X2
 14317	MOVOU -16(R8)(DI*1), X3
 14318	MOVOU X0, (AX)
 14319	MOVOU X1, 16(AX)
 14320	MOVOU X2, -32(AX)(DI*1)
 14321	MOVOU X3, -16(AX)(DI*1)
 14322
 14323memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
 14324	MOVQ BX, AX
 14325	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
 14326
 14327memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
 14328	LEAQ (AX)(DI*1), BX
 14329
 14330	// genMemMoveLong
 14331	MOVOU (R8), X0
 14332	MOVOU 16(R8), X1
 14333	MOVOU -32(R8)(DI*1), X2
 14334	MOVOU -16(R8)(DI*1), X3
 14335	MOVQ  DI, R10
 14336	SHRQ  $0x05, R10
 14337	MOVQ  AX, R9
 14338	ANDL  $0x0000001f, R9
 14339	MOVQ  $0x00000040, R11
 14340	SUBQ  R9, R11
 14341	DECQ  R10
 14342	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 14343	LEAQ  -32(R8)(R11*1), R9
 14344	LEAQ  -32(AX)(R11*1), R12
 14345
 14346emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
 14347	MOVOU (R9), X4
 14348	MOVOU 16(R9), X5
 14349	MOVOA X4, (R12)
 14350	MOVOA X5, 16(R12)
 14351	ADDQ  $0x20, R12
 14352	ADDQ  $0x20, R9
 14353	ADDQ  $0x20, R11
 14354	DECQ  R10
 14355	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
 14356
 14357emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
 14358	MOVOU -32(R8)(R11*1), X4
 14359	MOVOU -16(R8)(R11*1), X5
 14360	MOVOA X4, -32(AX)(R11*1)
 14361	MOVOA X5, -16(AX)(R11*1)
 14362	ADDQ  $0x20, R11
 14363	CMPQ  DI, R11
 14364	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 14365	MOVOU X0, (AX)
 14366	MOVOU X1, 16(AX)
 14367	MOVOU X2, -32(AX)(DI*1)
 14368	MOVOU X3, -16(AX)(DI*1)
 14369	MOVQ  BX, AX
 14370
 14371emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
 14372	ADDL $0x05, CX
 14373	MOVL CX, BX
 14374	SUBL 16(SP), BX
 14375	MOVQ src_len+32(FP), DI
 14376	SUBL CX, DI
 14377	LEAQ (DX)(CX*1), R8
 14378	LEAQ (DX)(BX*1), BX
 14379
 14380	// matchLen
 14381	XORL R10, R10
 14382
 14383matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
 14384	CMPL DI, $0x10
 14385	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
 14386	MOVQ (R8)(R10*1), R9
 14387	MOVQ 8(R8)(R10*1), R11
 14388	XORQ (BX)(R10*1), R9
 14389	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
 14390	XORQ 8(BX)(R10*1), R11
 14391	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
 14392	LEAL -16(DI), DI
 14393	LEAL 16(R10), R10
 14394	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
 14395
 14396matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
 14397#ifdef GOAMD64_v3
 14398	TZCNTQ R11, R11
 14399
 14400#else
 14401	BSFQ R11, R11
 14402
 14403#endif
 14404	SARQ $0x03, R11
 14405	LEAL 8(R10)(R11*1), R10
 14406	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 14407
 14408matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
 14409	CMPL DI, $0x08
 14410	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
 14411	MOVQ (R8)(R10*1), R9
 14412	XORQ (BX)(R10*1), R9
 14413	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
 14414	LEAL -8(DI), DI
 14415	LEAL 8(R10), R10
 14416	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
 14417
 14418matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
 14419#ifdef GOAMD64_v3
 14420	TZCNTQ R9, R9
 14421
 14422#else
 14423	BSFQ R9, R9
 14424
 14425#endif
 14426	SARQ $0x03, R9
 14427	LEAL (R10)(R9*1), R10
 14428	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 14429
 14430matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
 14431	CMPL DI, $0x04
 14432	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
 14433	MOVL (R8)(R10*1), R9
 14434	CMPL (BX)(R10*1), R9
 14435	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
 14436	LEAL -4(DI), DI
 14437	LEAL 4(R10), R10
 14438
 14439matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
 14440	CMPL DI, $0x01
 14441	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
 14442	JB   repeat_extend_forward_end_encodeSnappyBlockAsm8B
 14443	MOVW (R8)(R10*1), R9
 14444	CMPW (BX)(R10*1), R9
 14445	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
 14446	LEAL 2(R10), R10
 14447	SUBL $0x02, DI
 14448	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm8B
 14449
 14450matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
 14451	MOVB (R8)(R10*1), R9
 14452	CMPB (BX)(R10*1), R9
 14453	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 14454	LEAL 1(R10), R10
 14455
 14456repeat_extend_forward_end_encodeSnappyBlockAsm8B:
 14457	ADDL R10, CX
 14458	MOVL CX, BX
 14459	SUBL SI, BX
 14460	MOVL 16(SP), SI
 14461
 14462	// emitCopy
 14463two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
 14464	CMPL BX, $0x40
 14465	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
 14466	MOVB $0xee, (AX)
 14467	MOVW SI, 1(AX)
 14468	LEAL -60(BX), BX
 14469	ADDQ $0x03, AX
 14470	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
 14471
 14472two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
 14473	MOVL BX, DI
 14474	SHLL $0x02, DI
 14475	CMPL BX, $0x0c
 14476	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
 14477	LEAL -15(DI), DI
 14478	MOVB SI, 1(AX)
 14479	SHRL $0x08, SI
 14480	SHLL $0x05, SI
 14481	ORL  SI, DI
 14482	MOVB DI, (AX)
 14483	ADDQ $0x02, AX
 14484	JMP  repeat_end_emit_encodeSnappyBlockAsm8B
 14485
 14486emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
 14487	LEAL -2(DI), DI
 14488	MOVB DI, (AX)
 14489	MOVW SI, 1(AX)
 14490	ADDQ $0x03, AX
 14491
 14492repeat_end_emit_encodeSnappyBlockAsm8B:
 14493	MOVL CX, 12(SP)
 14494	JMP  search_loop_encodeSnappyBlockAsm8B
 14495
 14496no_repeat_found_encodeSnappyBlockAsm8B:
 14497	CMPL (DX)(BX*1), SI
 14498	JEQ  candidate_match_encodeSnappyBlockAsm8B
 14499	SHRQ $0x08, SI
 14500	MOVL 24(SP)(R9*4), BX
 14501	LEAL 2(CX), R8
 14502	CMPL (DX)(DI*1), SI
 14503	JEQ  candidate2_match_encodeSnappyBlockAsm8B
 14504	MOVL R8, 24(SP)(R9*4)
 14505	SHRQ $0x08, SI
 14506	CMPL (DX)(BX*1), SI
 14507	JEQ  candidate3_match_encodeSnappyBlockAsm8B
 14508	MOVL 20(SP), CX
 14509	JMP  search_loop_encodeSnappyBlockAsm8B
 14510
 14511candidate3_match_encodeSnappyBlockAsm8B:
 14512	ADDL $0x02, CX
 14513	JMP  candidate_match_encodeSnappyBlockAsm8B
 14514
 14515candidate2_match_encodeSnappyBlockAsm8B:
 14516	MOVL R8, 24(SP)(R9*4)
 14517	INCL CX
 14518	MOVL DI, BX
 14519
 14520candidate_match_encodeSnappyBlockAsm8B:
 14521	MOVL  12(SP), SI
 14522	TESTL BX, BX
 14523	JZ    match_extend_back_end_encodeSnappyBlockAsm8B
 14524
 14525match_extend_back_loop_encodeSnappyBlockAsm8B:
 14526	CMPL CX, SI
 14527	JBE  match_extend_back_end_encodeSnappyBlockAsm8B
 14528	MOVB -1(DX)(BX*1), DI
 14529	MOVB -1(DX)(CX*1), R8
 14530	CMPB DI, R8
 14531	JNE  match_extend_back_end_encodeSnappyBlockAsm8B
 14532	LEAL -1(CX), CX
 14533	DECL BX
 14534	JZ   match_extend_back_end_encodeSnappyBlockAsm8B
 14535	JMP  match_extend_back_loop_encodeSnappyBlockAsm8B
 14536
 14537match_extend_back_end_encodeSnappyBlockAsm8B:
 14538	MOVL CX, SI
 14539	SUBL 12(SP), SI
 14540	LEAQ 3(AX)(SI*1), SI
 14541	CMPQ SI, (SP)
 14542	JB   match_dst_size_check_encodeSnappyBlockAsm8B
 14543	MOVQ $0x00000000, ret+48(FP)
 14544	RET
 14545
 14546match_dst_size_check_encodeSnappyBlockAsm8B:
 14547	MOVL CX, SI
 14548	MOVL 12(SP), DI
 14549	CMPL DI, SI
 14550	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
 14551	MOVL SI, R8
 14552	MOVL SI, 12(SP)
 14553	LEAQ (DX)(DI*1), SI
 14554	SUBL DI, R8
 14555	LEAL -1(R8), DI
 14556	CMPL DI, $0x3c
 14557	JB   one_byte_match_emit_encodeSnappyBlockAsm8B
 14558	CMPL DI, $0x00000100
 14559	JB   two_bytes_match_emit_encodeSnappyBlockAsm8B
 14560	JB   three_bytes_match_emit_encodeSnappyBlockAsm8B
 14561
 14562three_bytes_match_emit_encodeSnappyBlockAsm8B:
 14563	MOVB $0xf4, (AX)
 14564	MOVW DI, 1(AX)
 14565	ADDQ $0x03, AX
 14566	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
 14567
 14568two_bytes_match_emit_encodeSnappyBlockAsm8B:
 14569	MOVB $0xf0, (AX)
 14570	MOVB DI, 1(AX)
 14571	ADDQ $0x02, AX
 14572	CMPL DI, $0x40
 14573	JB   memmove_match_emit_encodeSnappyBlockAsm8B
 14574	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
 14575
 14576one_byte_match_emit_encodeSnappyBlockAsm8B:
 14577	SHLB $0x02, DI
 14578	MOVB DI, (AX)
 14579	ADDQ $0x01, AX
 14580
 14581memmove_match_emit_encodeSnappyBlockAsm8B:
 14582	LEAQ (AX)(R8*1), DI
 14583
 14584	// genMemMoveShort
 14585	CMPQ R8, $0x08
 14586	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
 14587	CMPQ R8, $0x10
 14588	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
 14589	CMPQ R8, $0x20
 14590	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
 14591	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
 14592
 14593emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
 14594	MOVQ (SI), R9
 14595	MOVQ R9, (AX)
 14596	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 14597
 14598emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
 14599	MOVQ (SI), R9
 14600	MOVQ -8(SI)(R8*1), SI
 14601	MOVQ R9, (AX)
 14602	MOVQ SI, -8(AX)(R8*1)
 14603	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 14604
 14605emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
 14606	MOVOU (SI), X0
 14607	MOVOU -16(SI)(R8*1), X1
 14608	MOVOU X0, (AX)
 14609	MOVOU X1, -16(AX)(R8*1)
 14610	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 14611
 14612emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
 14613	MOVOU (SI), X0
 14614	MOVOU 16(SI), X1
 14615	MOVOU -32(SI)(R8*1), X2
 14616	MOVOU -16(SI)(R8*1), X3
 14617	MOVOU X0, (AX)
 14618	MOVOU X1, 16(AX)
 14619	MOVOU X2, -32(AX)(R8*1)
 14620	MOVOU X3, -16(AX)(R8*1)
 14621
 14622memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
 14623	MOVQ DI, AX
 14624	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
 14625
 14626memmove_long_match_emit_encodeSnappyBlockAsm8B:
 14627	LEAQ (AX)(R8*1), DI
 14628
 14629	// genMemMoveLong
 14630	MOVOU (SI), X0
 14631	MOVOU 16(SI), X1
 14632	MOVOU -32(SI)(R8*1), X2
 14633	MOVOU -16(SI)(R8*1), X3
 14634	MOVQ  R8, R10
 14635	SHRQ  $0x05, R10
 14636	MOVQ  AX, R9
 14637	ANDL  $0x0000001f, R9
 14638	MOVQ  $0x00000040, R11
 14639	SUBQ  R9, R11
 14640	DECQ  R10
 14641	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 14642	LEAQ  -32(SI)(R11*1), R9
 14643	LEAQ  -32(AX)(R11*1), R12
 14644
 14645emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
 14646	MOVOU (R9), X4
 14647	MOVOU 16(R9), X5
 14648	MOVOA X4, (R12)
 14649	MOVOA X5, 16(R12)
 14650	ADDQ  $0x20, R12
 14651	ADDQ  $0x20, R9
 14652	ADDQ  $0x20, R11
 14653	DECQ  R10
 14654	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
 14655
 14656emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
 14657	MOVOU -32(SI)(R11*1), X4
 14658	MOVOU -16(SI)(R11*1), X5
 14659	MOVOA X4, -32(AX)(R11*1)
 14660	MOVOA X5, -16(AX)(R11*1)
 14661	ADDQ  $0x20, R11
 14662	CMPQ  R8, R11
 14663	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 14664	MOVOU X0, (AX)
 14665	MOVOU X1, 16(AX)
 14666	MOVOU X2, -32(AX)(R8*1)
 14667	MOVOU X3, -16(AX)(R8*1)
 14668	MOVQ  DI, AX
 14669
 14670emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
 14671match_nolit_loop_encodeSnappyBlockAsm8B:
 14672	MOVL CX, SI
 14673	SUBL BX, SI
 14674	MOVL SI, 16(SP)
 14675	ADDL $0x04, CX
 14676	ADDL $0x04, BX
 14677	MOVQ src_len+32(FP), SI
 14678	SUBL CX, SI
 14679	LEAQ (DX)(CX*1), DI
 14680	LEAQ (DX)(BX*1), BX
 14681
 14682	// matchLen
 14683	XORL R9, R9
 14684
 14685matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
 14686	CMPL SI, $0x10
 14687	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
 14688	MOVQ (DI)(R9*1), R8
 14689	MOVQ 8(DI)(R9*1), R10
 14690	XORQ (BX)(R9*1), R8
 14691	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
 14692	XORQ 8(BX)(R9*1), R10
 14693	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
 14694	LEAL -16(SI), SI
 14695	LEAL 16(R9), R9
 14696	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
 14697
 14698matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
 14699#ifdef GOAMD64_v3
 14700	TZCNTQ R10, R10
 14701
 14702#else
 14703	BSFQ R10, R10
 14704
 14705#endif
 14706	SARQ $0x03, R10
 14707	LEAL 8(R9)(R10*1), R9
 14708	JMP  match_nolit_end_encodeSnappyBlockAsm8B
 14709
 14710matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
 14711	CMPL SI, $0x08
 14712	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
 14713	MOVQ (DI)(R9*1), R8
 14714	XORQ (BX)(R9*1), R8
 14715	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
 14716	LEAL -8(SI), SI
 14717	LEAL 8(R9), R9
 14718	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
 14719
 14720matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
 14721#ifdef GOAMD64_v3
 14722	TZCNTQ R8, R8
 14723
 14724#else
 14725	BSFQ R8, R8
 14726
 14727#endif
 14728	SARQ $0x03, R8
 14729	LEAL (R9)(R8*1), R9
 14730	JMP  match_nolit_end_encodeSnappyBlockAsm8B
 14731
 14732matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
 14733	CMPL SI, $0x04
 14734	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
 14735	MOVL (DI)(R9*1), R8
 14736	CMPL (BX)(R9*1), R8
 14737	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
 14738	LEAL -4(SI), SI
 14739	LEAL 4(R9), R9
 14740
 14741matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
 14742	CMPL SI, $0x01
 14743	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
 14744	JB   match_nolit_end_encodeSnappyBlockAsm8B
 14745	MOVW (DI)(R9*1), R8
 14746	CMPW (BX)(R9*1), R8
 14747	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
 14748	LEAL 2(R9), R9
 14749	SUBL $0x02, SI
 14750	JZ   match_nolit_end_encodeSnappyBlockAsm8B
 14751
 14752matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
 14753	MOVB (DI)(R9*1), R8
 14754	CMPB (BX)(R9*1), R8
 14755	JNE  match_nolit_end_encodeSnappyBlockAsm8B
 14756	LEAL 1(R9), R9
 14757
 14758match_nolit_end_encodeSnappyBlockAsm8B:
 14759	ADDL R9, CX
 14760	MOVL 16(SP), BX
 14761	ADDL $0x04, R9
 14762	MOVL CX, 12(SP)
 14763
 14764	// emitCopy
 14765two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
 14766	CMPL R9, $0x40
 14767	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
 14768	MOVB $0xee, (AX)
 14769	MOVW BX, 1(AX)
 14770	LEAL -60(R9), R9
 14771	ADDQ $0x03, AX
 14772	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
 14773
 14774two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
 14775	MOVL R9, SI
 14776	SHLL $0x02, SI
 14777	CMPL R9, $0x0c
 14778	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
 14779	LEAL -15(SI), SI
 14780	MOVB BL, 1(AX)
 14781	SHRL $0x08, BX
 14782	SHLL $0x05, BX
 14783	ORL  BX, SI
 14784	MOVB SI, (AX)
 14785	ADDQ $0x02, AX
 14786	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
 14787
 14788emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
 14789	LEAL -2(SI), SI
 14790	MOVB SI, (AX)
 14791	MOVW BX, 1(AX)
 14792	ADDQ $0x03, AX
 14793
 14794match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
 14795	CMPL CX, 8(SP)
 14796	JAE  emit_remainder_encodeSnappyBlockAsm8B
 14797	MOVQ -2(DX)(CX*1), SI
 14798	CMPQ AX, (SP)
 14799	JB   match_nolit_dst_ok_encodeSnappyBlockAsm8B
 14800	MOVQ $0x00000000, ret+48(FP)
 14801	RET
 14802
 14803match_nolit_dst_ok_encodeSnappyBlockAsm8B:
 14804	MOVQ  $0x9e3779b1, R8
 14805	MOVQ  SI, DI
 14806	SHRQ  $0x10, SI
 14807	MOVQ  SI, BX
 14808	SHLQ  $0x20, DI
 14809	IMULQ R8, DI
 14810	SHRQ  $0x38, DI
 14811	SHLQ  $0x20, BX
 14812	IMULQ R8, BX
 14813	SHRQ  $0x38, BX
 14814	LEAL  -2(CX), R8
 14815	LEAQ  24(SP)(BX*4), R9
 14816	MOVL  (R9), BX
 14817	MOVL  R8, 24(SP)(DI*4)
 14818	MOVL  CX, (R9)
 14819	CMPL  (DX)(BX*1), SI
 14820	JEQ   match_nolit_loop_encodeSnappyBlockAsm8B
 14821	INCL  CX
 14822	JMP   search_loop_encodeSnappyBlockAsm8B
 14823
 14824emit_remainder_encodeSnappyBlockAsm8B:
 14825	MOVQ src_len+32(FP), CX
 14826	SUBL 12(SP), CX
 14827	LEAQ 3(AX)(CX*1), CX
 14828	CMPQ CX, (SP)
 14829	JB   emit_remainder_ok_encodeSnappyBlockAsm8B
 14830	MOVQ $0x00000000, ret+48(FP)
 14831	RET
 14832
 14833emit_remainder_ok_encodeSnappyBlockAsm8B:
 14834	MOVQ src_len+32(FP), CX
 14835	MOVL 12(SP), BX
 14836	CMPL BX, CX
 14837	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
 14838	MOVL CX, SI
 14839	MOVL CX, 12(SP)
 14840	LEAQ (DX)(BX*1), CX
 14841	SUBL BX, SI
 14842	LEAL -1(SI), DX
 14843	CMPL DX, $0x3c
 14844	JB   one_byte_emit_remainder_encodeSnappyBlockAsm8B
 14845	CMPL DX, $0x00000100
 14846	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm8B
 14847	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm8B
 14848
 14849three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
 14850	MOVB $0xf4, (AX)
 14851	MOVW DX, 1(AX)
 14852	ADDQ $0x03, AX
 14853	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
 14854
 14855two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
 14856	MOVB $0xf0, (AX)
 14857	MOVB DL, 1(AX)
 14858	ADDQ $0x02, AX
 14859	CMPL DX, $0x40
 14860	JB   memmove_emit_remainder_encodeSnappyBlockAsm8B
 14861	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
 14862
 14863one_byte_emit_remainder_encodeSnappyBlockAsm8B:
 14864	SHLB $0x02, DL
 14865	MOVB DL, (AX)
 14866	ADDQ $0x01, AX
 14867
 14868memmove_emit_remainder_encodeSnappyBlockAsm8B:
 14869	LEAQ (AX)(SI*1), DX
 14870	MOVL SI, BX
 14871
 14872	// genMemMoveShort
 14873	CMPQ BX, $0x03
 14874	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
 14875	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
 14876	CMPQ BX, $0x08
 14877	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7
 14878	CMPQ BX, $0x10
 14879	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
 14880	CMPQ BX, $0x20
 14881	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
 14882	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
 14883
 14884emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
 14885	MOVB (CX), SI
 14886	MOVB -1(CX)(BX*1), CL
 14887	MOVB SI, (AX)
 14888	MOVB CL, -1(AX)(BX*1)
 14889	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 14890
 14891emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
 14892	MOVW (CX), SI
 14893	MOVB 2(CX), CL
 14894	MOVW SI, (AX)
 14895	MOVB CL, 2(AX)
 14896	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 14897
 14898emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
 14899	MOVL (CX), SI
 14900	MOVL -4(CX)(BX*1), CX
 14901	MOVL SI, (AX)
 14902	MOVL CX, -4(AX)(BX*1)
 14903	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 14904
 14905emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
 14906	MOVQ (CX), SI
 14907	MOVQ -8(CX)(BX*1), CX
 14908	MOVQ SI, (AX)
 14909	MOVQ CX, -8(AX)(BX*1)
 14910	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 14911
 14912emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
 14913	MOVOU (CX), X0
 14914	MOVOU -16(CX)(BX*1), X1
 14915	MOVOU X0, (AX)
 14916	MOVOU X1, -16(AX)(BX*1)
 14917	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 14918
 14919emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
 14920	MOVOU (CX), X0
 14921	MOVOU 16(CX), X1
 14922	MOVOU -32(CX)(BX*1), X2
 14923	MOVOU -16(CX)(BX*1), X3
 14924	MOVOU X0, (AX)
 14925	MOVOU X1, 16(AX)
 14926	MOVOU X2, -32(AX)(BX*1)
 14927	MOVOU X3, -16(AX)(BX*1)
 14928
 14929memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
 14930	MOVQ DX, AX
 14931	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
 14932
 14933memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
 14934	LEAQ (AX)(SI*1), DX
 14935	MOVL SI, BX
 14936
 14937	// genMemMoveLong
 14938	MOVOU (CX), X0
 14939	MOVOU 16(CX), X1
 14940	MOVOU -32(CX)(BX*1), X2
 14941	MOVOU -16(CX)(BX*1), X3
 14942	MOVQ  BX, DI
 14943	SHRQ  $0x05, DI
 14944	MOVQ  AX, SI
 14945	ANDL  $0x0000001f, SI
 14946	MOVQ  $0x00000040, R8
 14947	SUBQ  SI, R8
 14948	DECQ  DI
 14949	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 14950	LEAQ  -32(CX)(R8*1), SI
 14951	LEAQ  -32(AX)(R8*1), R9
 14952
 14953emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
 14954	MOVOU (SI), X4
 14955	MOVOU 16(SI), X5
 14956	MOVOA X4, (R9)
 14957	MOVOA X5, 16(R9)
 14958	ADDQ  $0x20, R9
 14959	ADDQ  $0x20, SI
 14960	ADDQ  $0x20, R8
 14961	DECQ  DI
 14962	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
 14963
 14964emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
 14965	MOVOU -32(CX)(R8*1), X4
 14966	MOVOU -16(CX)(R8*1), X5
 14967	MOVOA X4, -32(AX)(R8*1)
 14968	MOVOA X5, -16(AX)(R8*1)
 14969	ADDQ  $0x20, R8
 14970	CMPQ  BX, R8
 14971	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 14972	MOVOU X0, (AX)
 14973	MOVOU X1, 16(AX)
 14974	MOVOU X2, -32(AX)(BX*1)
 14975	MOVOU X3, -16(AX)(BX*1)
 14976	MOVQ  DX, AX
 14977
 14978emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
 14979	MOVQ dst_base+0(FP), CX
 14980	SUBQ CX, AX
 14981	MOVQ AX, ret+48(FP)
 14982	RET
 14983
 14984// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
 14985// Requires: BMI, SSE2
 14986TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56
 14987	MOVQ dst_base+0(FP), AX
 14988	MOVQ $0x00001200, CX
 14989	LEAQ 24(SP), DX
 14990	PXOR X0, X0
 14991
 14992zero_loop_encodeSnappyBetterBlockAsm:
 14993	MOVOU X0, (DX)
 14994	MOVOU X0, 16(DX)
 14995	MOVOU X0, 32(DX)
 14996	MOVOU X0, 48(DX)
 14997	MOVOU X0, 64(DX)
 14998	MOVOU X0, 80(DX)
 14999	MOVOU X0, 96(DX)
 15000	MOVOU X0, 112(DX)
 15001	ADDQ  $0x80, DX
 15002	DECQ  CX
 15003	JNZ   zero_loop_encodeSnappyBetterBlockAsm
 15004	MOVL  $0x00000000, 12(SP)
 15005	MOVQ  src_len+32(FP), CX
 15006	LEAQ  -9(CX), DX
 15007	LEAQ  -8(CX), BX
 15008	MOVL  BX, 8(SP)
 15009	SHRQ  $0x05, CX
 15010	SUBL  CX, DX
 15011	LEAQ  (AX)(DX*1), DX
 15012	MOVQ  DX, (SP)
 15013	MOVL  $0x00000001, CX
 15014	MOVL  $0x00000000, 16(SP)
 15015	MOVQ  src_base+24(FP), DX
 15016
 15017search_loop_encodeSnappyBetterBlockAsm:
 15018	MOVL CX, BX
 15019	SUBL 12(SP), BX
 15020	SHRL $0x07, BX
 15021	CMPL BX, $0x63
 15022	JBE  check_maxskip_ok_encodeSnappyBetterBlockAsm
 15023	LEAL 100(CX), BX
 15024	JMP  check_maxskip_cont_encodeSnappyBetterBlockAsm
 15025
 15026check_maxskip_ok_encodeSnappyBetterBlockAsm:
 15027	LEAL 1(CX)(BX*1), BX
 15028
 15029check_maxskip_cont_encodeSnappyBetterBlockAsm:
 15030	CMPL  BX, 8(SP)
 15031	JAE   emit_remainder_encodeSnappyBetterBlockAsm
 15032	MOVQ  (DX)(CX*1), SI
 15033	MOVL  BX, 20(SP)
 15034	MOVQ  $0x00cf1bbcdcbfa563, R8
 15035	MOVQ  $0x9e3779b1, BX
 15036	MOVQ  SI, R9
 15037	MOVQ  SI, R10
 15038	SHLQ  $0x08, R9
 15039	IMULQ R8, R9
 15040	SHRQ  $0x2f, R9
 15041	SHLQ  $0x20, R10
 15042	IMULQ BX, R10
 15043	SHRQ  $0x32, R10
 15044	MOVL  24(SP)(R9*4), BX
 15045	MOVL  524312(SP)(R10*4), DI
 15046	MOVL  CX, 24(SP)(R9*4)
 15047	MOVL  CX, 524312(SP)(R10*4)
 15048	MOVQ  (DX)(BX*1), R9
 15049	MOVQ  (DX)(DI*1), R10
 15050	CMPQ  R9, SI
 15051	JEQ   candidate_match_encodeSnappyBetterBlockAsm
 15052	CMPQ  R10, SI
 15053	JNE   no_short_found_encodeSnappyBetterBlockAsm
 15054	MOVL  DI, BX
 15055	JMP   candidate_match_encodeSnappyBetterBlockAsm
 15056
 15057no_short_found_encodeSnappyBetterBlockAsm:
 15058	CMPL R9, SI
 15059	JEQ  candidate_match_encodeSnappyBetterBlockAsm
 15060	CMPL R10, SI
 15061	JEQ  candidateS_match_encodeSnappyBetterBlockAsm
 15062	MOVL 20(SP), CX
 15063	JMP  search_loop_encodeSnappyBetterBlockAsm
 15064
 15065candidateS_match_encodeSnappyBetterBlockAsm:
 15066	SHRQ  $0x08, SI
 15067	MOVQ  SI, R9
 15068	SHLQ  $0x08, R9
 15069	IMULQ R8, R9
 15070	SHRQ  $0x2f, R9
 15071	MOVL  24(SP)(R9*4), BX
 15072	INCL  CX
 15073	MOVL  CX, 24(SP)(R9*4)
 15074	CMPL  (DX)(BX*1), SI
 15075	JEQ   candidate_match_encodeSnappyBetterBlockAsm
 15076	DECL  CX
 15077	MOVL  DI, BX
 15078
 15079candidate_match_encodeSnappyBetterBlockAsm:
 15080	MOVL  12(SP), SI
 15081	TESTL BX, BX
 15082	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm
 15083
 15084match_extend_back_loop_encodeSnappyBetterBlockAsm:
 15085	CMPL CX, SI
 15086	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm
 15087	MOVB -1(DX)(BX*1), DI
 15088	MOVB -1(DX)(CX*1), R8
 15089	CMPB DI, R8
 15090	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm
 15091	LEAL -1(CX), CX
 15092	DECL BX
 15093	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm
 15094	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm
 15095
 15096match_extend_back_end_encodeSnappyBetterBlockAsm:
 15097	MOVL CX, SI
 15098	SUBL 12(SP), SI
 15099	LEAQ 5(AX)(SI*1), SI
 15100	CMPQ SI, (SP)
 15101	JB   match_dst_size_check_encodeSnappyBetterBlockAsm
 15102	MOVQ $0x00000000, ret+48(FP)
 15103	RET
 15104
 15105match_dst_size_check_encodeSnappyBetterBlockAsm:
 15106	MOVL CX, SI
 15107	ADDL $0x04, CX
 15108	ADDL $0x04, BX
 15109	MOVQ src_len+32(FP), DI
 15110	SUBL CX, DI
 15111	LEAQ (DX)(CX*1), R8
 15112	LEAQ (DX)(BX*1), R9
 15113
 15114	// matchLen
 15115	XORL R11, R11
 15116
 15117matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
 15118	CMPL DI, $0x10
 15119	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
 15120	MOVQ (R8)(R11*1), R10
 15121	MOVQ 8(R8)(R11*1), R12
 15122	XORQ (R9)(R11*1), R10
 15123	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
 15124	XORQ 8(R9)(R11*1), R12
 15125	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
 15126	LEAL -16(DI), DI
 15127	LEAL 16(R11), R11
 15128	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
 15129
 15130matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
 15131#ifdef GOAMD64_v3
 15132	TZCNTQ R12, R12
 15133
 15134#else
 15135	BSFQ R12, R12
 15136
 15137#endif
 15138	SARQ $0x03, R12
 15139	LEAL 8(R11)(R12*1), R11
 15140	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
 15141
 15142matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
 15143	CMPL DI, $0x08
 15144	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
 15145	MOVQ (R8)(R11*1), R10
 15146	XORQ (R9)(R11*1), R10
 15147	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
 15148	LEAL -8(DI), DI
 15149	LEAL 8(R11), R11
 15150	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
 15151
 15152matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
 15153#ifdef GOAMD64_v3
 15154	TZCNTQ R10, R10
 15155
 15156#else
 15157	BSFQ R10, R10
 15158
 15159#endif
 15160	SARQ $0x03, R10
 15161	LEAL (R11)(R10*1), R11
 15162	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
 15163
 15164matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
 15165	CMPL DI, $0x04
 15166	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
 15167	MOVL (R8)(R11*1), R10
 15168	CMPL (R9)(R11*1), R10
 15169	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
 15170	LEAL -4(DI), DI
 15171	LEAL 4(R11), R11
 15172
 15173matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
 15174	CMPL DI, $0x01
 15175	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
 15176	JB   match_nolit_end_encodeSnappyBetterBlockAsm
 15177	MOVW (R8)(R11*1), R10
 15178	CMPW (R9)(R11*1), R10
 15179	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
 15180	LEAL 2(R11), R11
 15181	SUBL $0x02, DI
 15182	JZ   match_nolit_end_encodeSnappyBetterBlockAsm
 15183
 15184matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
 15185	MOVB (R8)(R11*1), R10
 15186	CMPB (R9)(R11*1), R10
 15187	JNE  match_nolit_end_encodeSnappyBetterBlockAsm
 15188	LEAL 1(R11), R11
 15189
 15190match_nolit_end_encodeSnappyBetterBlockAsm:
 15191	MOVL CX, DI
 15192	SUBL BX, DI
 15193
 15194	// Check if repeat
 15195	CMPL R11, $0x01
 15196	JA   match_length_ok_encodeSnappyBetterBlockAsm
 15197	CMPL DI, $0x0000ffff
 15198	JBE  match_length_ok_encodeSnappyBetterBlockAsm
 15199	MOVL 20(SP), CX
 15200	INCL CX
 15201	JMP  search_loop_encodeSnappyBetterBlockAsm
 15202
 15203match_length_ok_encodeSnappyBetterBlockAsm:
 15204	MOVL DI, 16(SP)
 15205	MOVL 12(SP), BX
 15206	CMPL BX, SI
 15207	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
 15208	MOVL SI, R8
 15209	MOVL SI, 12(SP)
 15210	LEAQ (DX)(BX*1), R9
 15211	SUBL BX, R8
 15212	LEAL -1(R8), BX
 15213	CMPL BX, $0x3c
 15214	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm
 15215	CMPL BX, $0x00000100
 15216	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm
 15217	CMPL BX, $0x00010000
 15218	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm
 15219	CMPL BX, $0x01000000
 15220	JB   four_bytes_match_emit_encodeSnappyBetterBlockAsm
 15221	MOVB $0xfc, (AX)
 15222	MOVL BX, 1(AX)
 15223	ADDQ $0x05, AX
 15224	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 15225
 15226four_bytes_match_emit_encodeSnappyBetterBlockAsm:
 15227	MOVL BX, R10
 15228	SHRL $0x10, R10
 15229	MOVB $0xf8, (AX)
 15230	MOVW BX, 1(AX)
 15231	MOVB R10, 3(AX)
 15232	ADDQ $0x04, AX
 15233	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 15234
 15235three_bytes_match_emit_encodeSnappyBetterBlockAsm:
 15236	MOVB $0xf4, (AX)
 15237	MOVW BX, 1(AX)
 15238	ADDQ $0x03, AX
 15239	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 15240
 15241two_bytes_match_emit_encodeSnappyBetterBlockAsm:
 15242	MOVB $0xf0, (AX)
 15243	MOVB BL, 1(AX)
 15244	ADDQ $0x02, AX
 15245	CMPL BX, $0x40
 15246	JB   memmove_match_emit_encodeSnappyBetterBlockAsm
 15247	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 15248
 15249one_byte_match_emit_encodeSnappyBetterBlockAsm:
 15250	SHLB $0x02, BL
 15251	MOVB BL, (AX)
 15252	ADDQ $0x01, AX
 15253
 15254memmove_match_emit_encodeSnappyBetterBlockAsm:
 15255	LEAQ (AX)(R8*1), BX
 15256
 15257	// genMemMoveShort
 15258	CMPQ R8, $0x08
 15259	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
 15260	CMPQ R8, $0x10
 15261	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
 15262	CMPQ R8, $0x20
 15263	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
 15264	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
 15265
 15266emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
 15267	MOVQ (R9), R10
 15268	MOVQ R10, (AX)
 15269	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 15270
 15271emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
 15272	MOVQ (R9), R10
 15273	MOVQ -8(R9)(R8*1), R9
 15274	MOVQ R10, (AX)
 15275	MOVQ R9, -8(AX)(R8*1)
 15276	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 15277
 15278emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
 15279	MOVOU (R9), X0
 15280	MOVOU -16(R9)(R8*1), X1
 15281	MOVOU X0, (AX)
 15282	MOVOU X1, -16(AX)(R8*1)
 15283	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 15284
 15285emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
 15286	MOVOU (R9), X0
 15287	MOVOU 16(R9), X1
 15288	MOVOU -32(R9)(R8*1), X2
 15289	MOVOU -16(R9)(R8*1), X3
 15290	MOVOU X0, (AX)
 15291	MOVOU X1, 16(AX)
 15292	MOVOU X2, -32(AX)(R8*1)
 15293	MOVOU X3, -16(AX)(R8*1)
 15294
 15295memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
 15296	MOVQ BX, AX
 15297	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
 15298
 15299memmove_long_match_emit_encodeSnappyBetterBlockAsm:
 15300	LEAQ (AX)(R8*1), BX
 15301
 15302	// genMemMoveLong
 15303	MOVOU (R9), X0
 15304	MOVOU 16(R9), X1
 15305	MOVOU -32(R9)(R8*1), X2
 15306	MOVOU -16(R9)(R8*1), X3
 15307	MOVQ  R8, R12
 15308	SHRQ  $0x05, R12
 15309	MOVQ  AX, R10
 15310	ANDL  $0x0000001f, R10
 15311	MOVQ  $0x00000040, R13
 15312	SUBQ  R10, R13
 15313	DECQ  R12
 15314	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 15315	LEAQ  -32(R9)(R13*1), R10
 15316	LEAQ  -32(AX)(R13*1), R14
 15317
 15318emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
 15319	MOVOU (R10), X4
 15320	MOVOU 16(R10), X5
 15321	MOVOA X4, (R14)
 15322	MOVOA X5, 16(R14)
 15323	ADDQ  $0x20, R14
 15324	ADDQ  $0x20, R10
 15325	ADDQ  $0x20, R13
 15326	DECQ  R12
 15327	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
 15328
 15329emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
 15330	MOVOU -32(R9)(R13*1), X4
 15331	MOVOU -16(R9)(R13*1), X5
 15332	MOVOA X4, -32(AX)(R13*1)
 15333	MOVOA X5, -16(AX)(R13*1)
 15334	ADDQ  $0x20, R13
 15335	CMPQ  R8, R13
 15336	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 15337	MOVOU X0, (AX)
 15338	MOVOU X1, 16(AX)
 15339	MOVOU X2, -32(AX)(R8*1)
 15340	MOVOU X3, -16(AX)(R8*1)
 15341	MOVQ  BX, AX
 15342
 15343emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
 15344	ADDL R11, CX
 15345	ADDL $0x04, R11
 15346	MOVL CX, 12(SP)
 15347
 15348	// emitCopy
 15349	CMPL DI, $0x00010000
 15350	JB   two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
 15351
 15352four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
 15353	CMPL R11, $0x40
 15354	JBE  four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
 15355	MOVB $0xff, (AX)
 15356	MOVL DI, 1(AX)
 15357	LEAL -64(R11), R11
 15358	ADDQ $0x05, AX
 15359	CMPL R11, $0x04
 15360	JB   four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
 15361	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
 15362
 15363four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
 15364	TESTL R11, R11
 15365	JZ    match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 15366	XORL  BX, BX
 15367	LEAL  -1(BX)(R11*4), R11
 15368	MOVB  R11, (AX)
 15369	MOVL  DI, 1(AX)
 15370	ADDQ  $0x05, AX
 15371	JMP   match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 15372
 15373two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
 15374	CMPL R11, $0x40
 15375	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
 15376	MOVB $0xee, (AX)
 15377	MOVW DI, 1(AX)
 15378	LEAL -60(R11), R11
 15379	ADDQ $0x03, AX
 15380	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
 15381
 15382two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
 15383	MOVL R11, BX
 15384	SHLL $0x02, BX
 15385	CMPL R11, $0x0c
 15386	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
 15387	CMPL DI, $0x00000800
 15388	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
 15389	LEAL -15(BX), BX
 15390	MOVB DI, 1(AX)
 15391	SHRL $0x08, DI
 15392	SHLL $0x05, DI
 15393	ORL  DI, BX
 15394	MOVB BL, (AX)
 15395	ADDQ $0x02, AX
 15396	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 15397
 15398emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
 15399	LEAL -2(BX), BX
 15400	MOVB BL, (AX)
 15401	MOVW DI, 1(AX)
 15402	ADDQ $0x03, AX
 15403
 15404match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
 15405	CMPL CX, 8(SP)
 15406	JAE  emit_remainder_encodeSnappyBetterBlockAsm
 15407	CMPQ AX, (SP)
 15408	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm
 15409	MOVQ $0x00000000, ret+48(FP)
 15410	RET
 15411
 15412match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
 15413	MOVQ  $0x00cf1bbcdcbfa563, BX
 15414	MOVQ  $0x9e3779b1, DI
 15415	LEAQ  1(SI), SI
 15416	LEAQ  -2(CX), R8
 15417	MOVQ  (DX)(SI*1), R9
 15418	MOVQ  1(DX)(SI*1), R10
 15419	MOVQ  (DX)(R8*1), R11
 15420	MOVQ  1(DX)(R8*1), R12
 15421	SHLQ  $0x08, R9
 15422	IMULQ BX, R9
 15423	SHRQ  $0x2f, R9
 15424	SHLQ  $0x20, R10
 15425	IMULQ DI, R10
 15426	SHRQ  $0x32, R10
 15427	SHLQ  $0x08, R11
 15428	IMULQ BX, R11
 15429	SHRQ  $0x2f, R11
 15430	SHLQ  $0x20, R12
 15431	IMULQ DI, R12
 15432	SHRQ  $0x32, R12
 15433	LEAQ  1(SI), DI
 15434	LEAQ  1(R8), R13
 15435	MOVL  SI, 24(SP)(R9*4)
 15436	MOVL  R8, 24(SP)(R11*4)
 15437	MOVL  DI, 524312(SP)(R10*4)
 15438	MOVL  R13, 524312(SP)(R12*4)
 15439	LEAQ  1(R8)(SI*1), DI
 15440	SHRQ  $0x01, DI
 15441	ADDQ  $0x01, SI
 15442	SUBQ  $0x01, R8
 15443
 15444index_loop_encodeSnappyBetterBlockAsm:
 15445	CMPQ  DI, R8
 15446	JAE   search_loop_encodeSnappyBetterBlockAsm
 15447	MOVQ  (DX)(SI*1), R9
 15448	MOVQ  (DX)(DI*1), R10
 15449	SHLQ  $0x08, R9
 15450	IMULQ BX, R9
 15451	SHRQ  $0x2f, R9
 15452	SHLQ  $0x08, R10
 15453	IMULQ BX, R10
 15454	SHRQ  $0x2f, R10
 15455	MOVL  SI, 24(SP)(R9*4)
 15456	MOVL  DI, 24(SP)(R10*4)
 15457	ADDQ  $0x02, SI
 15458	ADDQ  $0x02, DI
 15459	JMP   index_loop_encodeSnappyBetterBlockAsm
 15460
 15461emit_remainder_encodeSnappyBetterBlockAsm:
 15462	MOVQ src_len+32(FP), CX
 15463	SUBL 12(SP), CX
 15464	LEAQ 5(AX)(CX*1), CX
 15465	CMPQ CX, (SP)
 15466	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm
 15467	MOVQ $0x00000000, ret+48(FP)
 15468	RET
 15469
 15470emit_remainder_ok_encodeSnappyBetterBlockAsm:
 15471	MOVQ src_len+32(FP), CX
 15472	MOVL 12(SP), BX
 15473	CMPL BX, CX
 15474	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
 15475	MOVL CX, SI
 15476	MOVL CX, 12(SP)
 15477	LEAQ (DX)(BX*1), CX
 15478	SUBL BX, SI
 15479	LEAL -1(SI), DX
 15480	CMPL DX, $0x3c
 15481	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm
 15482	CMPL DX, $0x00000100
 15483	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 15484	CMPL DX, $0x00010000
 15485	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 15486	CMPL DX, $0x01000000
 15487	JB   four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 15488	MOVB $0xfc, (AX)
 15489	MOVL DX, 1(AX)
 15490	ADDQ $0x05, AX
 15491	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 15492
 15493four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 15494	MOVL DX, BX
 15495	SHRL $0x10, BX
 15496	MOVB $0xf8, (AX)
 15497	MOVW DX, 1(AX)
 15498	MOVB BL, 3(AX)
 15499	ADDQ $0x04, AX
 15500	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 15501
 15502three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 15503	MOVB $0xf4, (AX)
 15504	MOVW DX, 1(AX)
 15505	ADDQ $0x03, AX
 15506	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 15507
 15508two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 15509	MOVB $0xf0, (AX)
 15510	MOVB DL, 1(AX)
 15511	ADDQ $0x02, AX
 15512	CMPL DX, $0x40
 15513	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm
 15514	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 15515
 15516one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
 15517	SHLB $0x02, DL
 15518	MOVB DL, (AX)
 15519	ADDQ $0x01, AX
 15520
 15521memmove_emit_remainder_encodeSnappyBetterBlockAsm:
 15522	LEAQ (AX)(SI*1), DX
 15523	MOVL SI, BX
 15524
 15525	// genMemMoveShort
 15526	CMPQ BX, $0x03
 15527	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2
 15528	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3
 15529	CMPQ BX, $0x08
 15530	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7
 15531	CMPQ BX, $0x10
 15532	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
 15533	CMPQ BX, $0x20
 15534	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
 15535	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
 15536
 15537emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
 15538	MOVB (CX), SI
 15539	MOVB -1(CX)(BX*1), CL
 15540	MOVB SI, (AX)
 15541	MOVB CL, -1(AX)(BX*1)
 15542	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 15543
 15544emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
 15545	MOVW (CX), SI
 15546	MOVB 2(CX), CL
 15547	MOVW SI, (AX)
 15548	MOVB CL, 2(AX)
 15549	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 15550
 15551emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
 15552	MOVL (CX), SI
 15553	MOVL -4(CX)(BX*1), CX
 15554	MOVL SI, (AX)
 15555	MOVL CX, -4(AX)(BX*1)
 15556	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 15557
 15558emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
 15559	MOVQ (CX), SI
 15560	MOVQ -8(CX)(BX*1), CX
 15561	MOVQ SI, (AX)
 15562	MOVQ CX, -8(AX)(BX*1)
 15563	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 15564
 15565emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
 15566	MOVOU (CX), X0
 15567	MOVOU -16(CX)(BX*1), X1
 15568	MOVOU X0, (AX)
 15569	MOVOU X1, -16(AX)(BX*1)
 15570	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 15571
 15572emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
 15573	MOVOU (CX), X0
 15574	MOVOU 16(CX), X1
 15575	MOVOU -32(CX)(BX*1), X2
 15576	MOVOU -16(CX)(BX*1), X3
 15577	MOVOU X0, (AX)
 15578	MOVOU X1, 16(AX)
 15579	MOVOU X2, -32(AX)(BX*1)
 15580	MOVOU X3, -16(AX)(BX*1)
 15581
 15582memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
 15583	MOVQ DX, AX
 15584	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
 15585
 15586memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
 15587	LEAQ (AX)(SI*1), DX
 15588	MOVL SI, BX
 15589
 15590	// genMemMoveLong
 15591	MOVOU (CX), X0
 15592	MOVOU 16(CX), X1
 15593	MOVOU -32(CX)(BX*1), X2
 15594	MOVOU -16(CX)(BX*1), X3
 15595	MOVQ  BX, DI
 15596	SHRQ  $0x05, DI
 15597	MOVQ  AX, SI
 15598	ANDL  $0x0000001f, SI
 15599	MOVQ  $0x00000040, R8
 15600	SUBQ  SI, R8
 15601	DECQ  DI
 15602	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 15603	LEAQ  -32(CX)(R8*1), SI
 15604	LEAQ  -32(AX)(R8*1), R9
 15605
 15606emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
 15607	MOVOU (SI), X4
 15608	MOVOU 16(SI), X5
 15609	MOVOA X4, (R9)
 15610	MOVOA X5, 16(R9)
 15611	ADDQ  $0x20, R9
 15612	ADDQ  $0x20, SI
 15613	ADDQ  $0x20, R8
 15614	DECQ  DI
 15615	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
 15616
 15617emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
 15618	MOVOU -32(CX)(R8*1), X4
 15619	MOVOU -16(CX)(R8*1), X5
 15620	MOVOA X4, -32(AX)(R8*1)
 15621	MOVOA X5, -16(AX)(R8*1)
 15622	ADDQ  $0x20, R8
 15623	CMPQ  BX, R8
 15624	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 15625	MOVOU X0, (AX)
 15626	MOVOU X1, 16(AX)
 15627	MOVOU X2, -32(AX)(BX*1)
 15628	MOVOU X3, -16(AX)(BX*1)
 15629	MOVQ  DX, AX
 15630
 15631emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
 15632	MOVQ dst_base+0(FP), CX
 15633	SUBQ CX, AX
 15634	MOVQ AX, ret+48(FP)
 15635	RET
 15636
 15637// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
 15638// Requires: BMI, SSE2
 15639TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56
 15640	MOVQ dst_base+0(FP), AX
 15641	MOVQ $0x00000a00, CX
 15642	LEAQ 24(SP), DX
 15643	PXOR X0, X0
 15644
 15645zero_loop_encodeSnappyBetterBlockAsm64K:
 15646	MOVOU X0, (DX)
 15647	MOVOU X0, 16(DX)
 15648	MOVOU X0, 32(DX)
 15649	MOVOU X0, 48(DX)
 15650	MOVOU X0, 64(DX)
 15651	MOVOU X0, 80(DX)
 15652	MOVOU X0, 96(DX)
 15653	MOVOU X0, 112(DX)
 15654	ADDQ  $0x80, DX
 15655	DECQ  CX
 15656	JNZ   zero_loop_encodeSnappyBetterBlockAsm64K
 15657	MOVL  $0x00000000, 12(SP)
 15658	MOVQ  src_len+32(FP), CX
 15659	LEAQ  -9(CX), DX
 15660	LEAQ  -8(CX), BX
 15661	MOVL  BX, 8(SP)
 15662	SHRQ  $0x05, CX
 15663	SUBL  CX, DX
 15664	LEAQ  (AX)(DX*1), DX
 15665	MOVQ  DX, (SP)
 15666	MOVL  $0x00000001, CX
 15667	MOVL  $0x00000000, 16(SP)
 15668	MOVQ  src_base+24(FP), DX
 15669
 15670search_loop_encodeSnappyBetterBlockAsm64K:
 15671	MOVL  CX, BX
 15672	SUBL  12(SP), BX
 15673	SHRL  $0x07, BX
 15674	LEAL  1(CX)(BX*1), BX
 15675	CMPL  BX, 8(SP)
 15676	JAE   emit_remainder_encodeSnappyBetterBlockAsm64K
 15677	MOVQ  (DX)(CX*1), SI
 15678	MOVL  BX, 20(SP)
 15679	MOVQ  $0x00cf1bbcdcbfa563, R8
 15680	MOVQ  $0x9e3779b1, BX
 15681	MOVQ  SI, R9
 15682	MOVQ  SI, R10
 15683	SHLQ  $0x08, R9
 15684	IMULQ R8, R9
 15685	SHRQ  $0x30, R9
 15686	SHLQ  $0x20, R10
 15687	IMULQ BX, R10
 15688	SHRQ  $0x32, R10
 15689	MOVL  24(SP)(R9*4), BX
 15690	MOVL  262168(SP)(R10*4), DI
 15691	MOVL  CX, 24(SP)(R9*4)
 15692	MOVL  CX, 262168(SP)(R10*4)
 15693	MOVQ  (DX)(BX*1), R9
 15694	MOVQ  (DX)(DI*1), R10
 15695	CMPQ  R9, SI
 15696	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
 15697	CMPQ  R10, SI
 15698	JNE   no_short_found_encodeSnappyBetterBlockAsm64K
 15699	MOVL  DI, BX
 15700	JMP   candidate_match_encodeSnappyBetterBlockAsm64K
 15701
 15702no_short_found_encodeSnappyBetterBlockAsm64K:
 15703	CMPL R9, SI
 15704	JEQ  candidate_match_encodeSnappyBetterBlockAsm64K
 15705	CMPL R10, SI
 15706	JEQ  candidateS_match_encodeSnappyBetterBlockAsm64K
 15707	MOVL 20(SP), CX
 15708	JMP  search_loop_encodeSnappyBetterBlockAsm64K
 15709
 15710candidateS_match_encodeSnappyBetterBlockAsm64K:
 15711	SHRQ  $0x08, SI
 15712	MOVQ  SI, R9
 15713	SHLQ  $0x08, R9
 15714	IMULQ R8, R9
 15715	SHRQ  $0x30, R9
 15716	MOVL  24(SP)(R9*4), BX
 15717	INCL  CX
 15718	MOVL  CX, 24(SP)(R9*4)
 15719	CMPL  (DX)(BX*1), SI
 15720	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
 15721	DECL  CX
 15722	MOVL  DI, BX
 15723
 15724candidate_match_encodeSnappyBetterBlockAsm64K:
 15725	MOVL  12(SP), SI
 15726	TESTL BX, BX
 15727	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm64K
 15728
 15729match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
 15730	CMPL CX, SI
 15731	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
 15732	MOVB -1(DX)(BX*1), DI
 15733	MOVB -1(DX)(CX*1), R8
 15734	CMPB DI, R8
 15735	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
 15736	LEAL -1(CX), CX
 15737	DECL BX
 15738	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm64K
 15739	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm64K
 15740
 15741match_extend_back_end_encodeSnappyBetterBlockAsm64K:
 15742	MOVL CX, SI
 15743	SUBL 12(SP), SI
 15744	LEAQ 3(AX)(SI*1), SI
 15745	CMPQ SI, (SP)
 15746	JB   match_dst_size_check_encodeSnappyBetterBlockAsm64K
 15747	MOVQ $0x00000000, ret+48(FP)
 15748	RET
 15749
 15750match_dst_size_check_encodeSnappyBetterBlockAsm64K:
 15751	MOVL CX, SI
 15752	ADDL $0x04, CX
 15753	ADDL $0x04, BX
 15754	MOVQ src_len+32(FP), DI
 15755	SUBL CX, DI
 15756	LEAQ (DX)(CX*1), R8
 15757	LEAQ (DX)(BX*1), R9
 15758
 15759	// matchLen
 15760	XORL R11, R11
 15761
 15762matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
 15763	CMPL DI, $0x10
 15764	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
 15765	MOVQ (R8)(R11*1), R10
 15766	MOVQ 8(R8)(R11*1), R12
 15767	XORQ (R9)(R11*1), R10
 15768	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
 15769	XORQ 8(R9)(R11*1), R12
 15770	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
 15771	LEAL -16(DI), DI
 15772	LEAL 16(R11), R11
 15773	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
 15774
 15775matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
 15776#ifdef GOAMD64_v3
 15777	TZCNTQ R12, R12
 15778
 15779#else
 15780	BSFQ R12, R12
 15781
 15782#endif
 15783	SARQ $0x03, R12
 15784	LEAL 8(R11)(R12*1), R11
 15785	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
 15786
 15787matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
 15788	CMPL DI, $0x08
 15789	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
 15790	MOVQ (R8)(R11*1), R10
 15791	XORQ (R9)(R11*1), R10
 15792	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
 15793	LEAL -8(DI), DI
 15794	LEAL 8(R11), R11
 15795	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
 15796
 15797matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
 15798#ifdef GOAMD64_v3
 15799	TZCNTQ R10, R10
 15800
 15801#else
 15802	BSFQ R10, R10
 15803
 15804#endif
 15805	SARQ $0x03, R10
 15806	LEAL (R11)(R10*1), R11
 15807	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
 15808
 15809matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
 15810	CMPL DI, $0x04
 15811	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
 15812	MOVL (R8)(R11*1), R10
 15813	CMPL (R9)(R11*1), R10
 15814	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
 15815	LEAL -4(DI), DI
 15816	LEAL 4(R11), R11
 15817
 15818matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
 15819	CMPL DI, $0x01
 15820	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
 15821	JB   match_nolit_end_encodeSnappyBetterBlockAsm64K
 15822	MOVW (R8)(R11*1), R10
 15823	CMPW (R9)(R11*1), R10
 15824	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
 15825	LEAL 2(R11), R11
 15826	SUBL $0x02, DI
 15827	JZ   match_nolit_end_encodeSnappyBetterBlockAsm64K
 15828
 15829matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
 15830	MOVB (R8)(R11*1), R10
 15831	CMPB (R9)(R11*1), R10
 15832	JNE  match_nolit_end_encodeSnappyBetterBlockAsm64K
 15833	LEAL 1(R11), R11
 15834
 15835match_nolit_end_encodeSnappyBetterBlockAsm64K:
 15836	MOVL CX, DI
 15837	SUBL BX, DI
 15838
 15839	// Check if repeat
 15840	MOVL DI, 16(SP)
 15841	MOVL 12(SP), BX
 15842	CMPL BX, SI
 15843	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
 15844	MOVL SI, R8
 15845	MOVL SI, 12(SP)
 15846	LEAQ (DX)(BX*1), R9
 15847	SUBL BX, R8
 15848	LEAL -1(R8), BX
 15849	CMPL BX, $0x3c
 15850	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm64K
 15851	CMPL BX, $0x00000100
 15852	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
 15853	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
 15854
 15855three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
 15856	MOVB $0xf4, (AX)
 15857	MOVW BX, 1(AX)
 15858	ADDQ $0x03, AX
 15859	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
 15860
 15861two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
 15862	MOVB $0xf0, (AX)
 15863	MOVB BL, 1(AX)
 15864	ADDQ $0x02, AX
 15865	CMPL BX, $0x40
 15866	JB   memmove_match_emit_encodeSnappyBetterBlockAsm64K
 15867	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
 15868
 15869one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
 15870	SHLB $0x02, BL
 15871	MOVB BL, (AX)
 15872	ADDQ $0x01, AX
 15873
 15874memmove_match_emit_encodeSnappyBetterBlockAsm64K:
 15875	LEAQ (AX)(R8*1), BX
 15876
 15877	// genMemMoveShort
 15878	CMPQ R8, $0x08
 15879	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
 15880	CMPQ R8, $0x10
 15881	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
 15882	CMPQ R8, $0x20
 15883	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
 15884	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
 15885
 15886emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
 15887	MOVQ (R9), R10
 15888	MOVQ R10, (AX)
 15889	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 15890
 15891emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
 15892	MOVQ (R9), R10
 15893	MOVQ -8(R9)(R8*1), R9
 15894	MOVQ R10, (AX)
 15895	MOVQ R9, -8(AX)(R8*1)
 15896	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 15897
 15898emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
 15899	MOVOU (R9), X0
 15900	MOVOU -16(R9)(R8*1), X1
 15901	MOVOU X0, (AX)
 15902	MOVOU X1, -16(AX)(R8*1)
 15903	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 15904
 15905emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
 15906	MOVOU (R9), X0
 15907	MOVOU 16(R9), X1
 15908	MOVOU -32(R9)(R8*1), X2
 15909	MOVOU -16(R9)(R8*1), X3
 15910	MOVOU X0, (AX)
 15911	MOVOU X1, 16(AX)
 15912	MOVOU X2, -32(AX)(R8*1)
 15913	MOVOU X3, -16(AX)(R8*1)
 15914
 15915memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
 15916	MOVQ BX, AX
 15917	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
 15918
 15919memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
 15920	LEAQ (AX)(R8*1), BX
 15921
 15922	// genMemMoveLong
 15923	MOVOU (R9), X0
 15924	MOVOU 16(R9), X1
 15925	MOVOU -32(R9)(R8*1), X2
 15926	MOVOU -16(R9)(R8*1), X3
 15927	MOVQ  R8, R12
 15928	SHRQ  $0x05, R12
 15929	MOVQ  AX, R10
 15930	ANDL  $0x0000001f, R10
 15931	MOVQ  $0x00000040, R13
 15932	SUBQ  R10, R13
 15933	DECQ  R12
 15934	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 15935	LEAQ  -32(R9)(R13*1), R10
 15936	LEAQ  -32(AX)(R13*1), R14
 15937
 15938emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
 15939	MOVOU (R10), X4
 15940	MOVOU 16(R10), X5
 15941	MOVOA X4, (R14)
 15942	MOVOA X5, 16(R14)
 15943	ADDQ  $0x20, R14
 15944	ADDQ  $0x20, R10
 15945	ADDQ  $0x20, R13
 15946	DECQ  R12
 15947	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
 15948
 15949emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
 15950	MOVOU -32(R9)(R13*1), X4
 15951	MOVOU -16(R9)(R13*1), X5
 15952	MOVOA X4, -32(AX)(R13*1)
 15953	MOVOA X5, -16(AX)(R13*1)
 15954	ADDQ  $0x20, R13
 15955	CMPQ  R8, R13
 15956	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 15957	MOVOU X0, (AX)
 15958	MOVOU X1, 16(AX)
 15959	MOVOU X2, -32(AX)(R8*1)
 15960	MOVOU X3, -16(AX)(R8*1)
 15961	MOVQ  BX, AX
 15962
 15963emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
 15964	ADDL R11, CX
 15965	ADDL $0x04, R11
 15966	MOVL CX, 12(SP)
 15967
 15968	// emitCopy
 15969two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
 15970	CMPL R11, $0x40
 15971	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
 15972	MOVB $0xee, (AX)
 15973	MOVW DI, 1(AX)
 15974	LEAL -60(R11), R11
 15975	ADDQ $0x03, AX
 15976	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
 15977
 15978two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
 15979	MOVL R11, BX
 15980	SHLL $0x02, BX
 15981	CMPL R11, $0x0c
 15982	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
 15983	CMPL DI, $0x00000800
 15984	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
 15985	LEAL -15(BX), BX
 15986	MOVB DI, 1(AX)
 15987	SHRL $0x08, DI
 15988	SHLL $0x05, DI
 15989	ORL  DI, BX
 15990	MOVB BL, (AX)
 15991	ADDQ $0x02, AX
 15992	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
 15993
 15994emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
 15995	LEAL -2(BX), BX
 15996	MOVB BL, (AX)
 15997	MOVW DI, 1(AX)
 15998	ADDQ $0x03, AX
 15999
 16000match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
 16001	CMPL CX, 8(SP)
 16002	JAE  emit_remainder_encodeSnappyBetterBlockAsm64K
 16003	CMPQ AX, (SP)
 16004	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
 16005	MOVQ $0x00000000, ret+48(FP)
 16006	RET
 16007
 16008match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
 16009	MOVQ  $0x00cf1bbcdcbfa563, BX
 16010	MOVQ  $0x9e3779b1, DI
 16011	LEAQ  1(SI), SI
 16012	LEAQ  -2(CX), R8
 16013	MOVQ  (DX)(SI*1), R9
 16014	MOVQ  1(DX)(SI*1), R10
 16015	MOVQ  (DX)(R8*1), R11
 16016	MOVQ  1(DX)(R8*1), R12
 16017	SHLQ  $0x08, R9
 16018	IMULQ BX, R9
 16019	SHRQ  $0x30, R9
 16020	SHLQ  $0x20, R10
 16021	IMULQ DI, R10
 16022	SHRQ  $0x32, R10
 16023	SHLQ  $0x08, R11
 16024	IMULQ BX, R11
 16025	SHRQ  $0x30, R11
 16026	SHLQ  $0x20, R12
 16027	IMULQ DI, R12
 16028	SHRQ  $0x32, R12
 16029	LEAQ  1(SI), DI
 16030	LEAQ  1(R8), R13
 16031	MOVL  SI, 24(SP)(R9*4)
 16032	MOVL  R8, 24(SP)(R11*4)
 16033	MOVL  DI, 262168(SP)(R10*4)
 16034	MOVL  R13, 262168(SP)(R12*4)
 16035	LEAQ  1(R8)(SI*1), DI
 16036	SHRQ  $0x01, DI
 16037	ADDQ  $0x01, SI
 16038	SUBQ  $0x01, R8
 16039
 16040index_loop_encodeSnappyBetterBlockAsm64K:
 16041	CMPQ  DI, R8
 16042	JAE   search_loop_encodeSnappyBetterBlockAsm64K
 16043	MOVQ  (DX)(SI*1), R9
 16044	MOVQ  (DX)(DI*1), R10
 16045	SHLQ  $0x08, R9
 16046	IMULQ BX, R9
 16047	SHRQ  $0x30, R9
 16048	SHLQ  $0x08, R10
 16049	IMULQ BX, R10
 16050	SHRQ  $0x30, R10
 16051	MOVL  SI, 24(SP)(R9*4)
 16052	MOVL  DI, 24(SP)(R10*4)
 16053	ADDQ  $0x02, SI
 16054	ADDQ  $0x02, DI
 16055	JMP   index_loop_encodeSnappyBetterBlockAsm64K
 16056
 16057emit_remainder_encodeSnappyBetterBlockAsm64K:
 16058	MOVQ src_len+32(FP), CX
 16059	SUBL 12(SP), CX
 16060	LEAQ 3(AX)(CX*1), CX
 16061	CMPQ CX, (SP)
 16062	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm64K
 16063	MOVQ $0x00000000, ret+48(FP)
 16064	RET
 16065
 16066emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
 16067	MOVQ src_len+32(FP), CX
 16068	MOVL 12(SP), BX
 16069	CMPL BX, CX
 16070	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
 16071	MOVL CX, SI
 16072	MOVL CX, 12(SP)
 16073	LEAQ (DX)(BX*1), CX
 16074	SUBL BX, SI
 16075	LEAL -1(SI), DX
 16076	CMPL DX, $0x3c
 16077	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
 16078	CMPL DX, $0x00000100
 16079	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
 16080	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
 16081
 16082three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16083	MOVB $0xf4, (AX)
 16084	MOVW DX, 1(AX)
 16085	ADDQ $0x03, AX
 16086	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
 16087
 16088two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16089	MOVB $0xf0, (AX)
 16090	MOVB DL, 1(AX)
 16091	ADDQ $0x02, AX
 16092	CMPL DX, $0x40
 16093	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
 16094	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
 16095
 16096one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16097	SHLB $0x02, DL
 16098	MOVB DL, (AX)
 16099	ADDQ $0x01, AX
 16100
 16101memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16102	LEAQ (AX)(SI*1), DX
 16103	MOVL SI, BX
 16104
 16105	// genMemMoveShort
 16106	CMPQ BX, $0x03
 16107	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2
 16108	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3
 16109	CMPQ BX, $0x08
 16110	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7
 16111	CMPQ BX, $0x10
 16112	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
 16113	CMPQ BX, $0x20
 16114	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
 16115	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
 16116
 16117emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
 16118	MOVB (CX), SI
 16119	MOVB -1(CX)(BX*1), CL
 16120	MOVB SI, (AX)
 16121	MOVB CL, -1(AX)(BX*1)
 16122	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 16123
 16124emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
 16125	MOVW (CX), SI
 16126	MOVB 2(CX), CL
 16127	MOVW SI, (AX)
 16128	MOVB CL, 2(AX)
 16129	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 16130
 16131emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
 16132	MOVL (CX), SI
 16133	MOVL -4(CX)(BX*1), CX
 16134	MOVL SI, (AX)
 16135	MOVL CX, -4(AX)(BX*1)
 16136	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 16137
 16138emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
 16139	MOVQ (CX), SI
 16140	MOVQ -8(CX)(BX*1), CX
 16141	MOVQ SI, (AX)
 16142	MOVQ CX, -8(AX)(BX*1)
 16143	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 16144
 16145emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
 16146	MOVOU (CX), X0
 16147	MOVOU -16(CX)(BX*1), X1
 16148	MOVOU X0, (AX)
 16149	MOVOU X1, -16(AX)(BX*1)
 16150	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 16151
 16152emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
 16153	MOVOU (CX), X0
 16154	MOVOU 16(CX), X1
 16155	MOVOU -32(CX)(BX*1), X2
 16156	MOVOU -16(CX)(BX*1), X3
 16157	MOVOU X0, (AX)
 16158	MOVOU X1, 16(AX)
 16159	MOVOU X2, -32(AX)(BX*1)
 16160	MOVOU X3, -16(AX)(BX*1)
 16161
 16162memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16163	MOVQ DX, AX
 16164	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
 16165
 16166memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16167	LEAQ (AX)(SI*1), DX
 16168	MOVL SI, BX
 16169
 16170	// genMemMoveLong
 16171	MOVOU (CX), X0
 16172	MOVOU 16(CX), X1
 16173	MOVOU -32(CX)(BX*1), X2
 16174	MOVOU -16(CX)(BX*1), X3
 16175	MOVQ  BX, DI
 16176	SHRQ  $0x05, DI
 16177	MOVQ  AX, SI
 16178	ANDL  $0x0000001f, SI
 16179	MOVQ  $0x00000040, R8
 16180	SUBQ  SI, R8
 16181	DECQ  DI
 16182	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 16183	LEAQ  -32(CX)(R8*1), SI
 16184	LEAQ  -32(AX)(R8*1), R9
 16185
 16186emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
 16187	MOVOU (SI), X4
 16188	MOVOU 16(SI), X5
 16189	MOVOA X4, (R9)
 16190	MOVOA X5, 16(R9)
 16191	ADDQ  $0x20, R9
 16192	ADDQ  $0x20, SI
 16193	ADDQ  $0x20, R8
 16194	DECQ  DI
 16195	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
 16196
 16197emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
 16198	MOVOU -32(CX)(R8*1), X4
 16199	MOVOU -16(CX)(R8*1), X5
 16200	MOVOA X4, -32(AX)(R8*1)
 16201	MOVOA X5, -16(AX)(R8*1)
 16202	ADDQ  $0x20, R8
 16203	CMPQ  BX, R8
 16204	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 16205	MOVOU X0, (AX)
 16206	MOVOU X1, 16(AX)
 16207	MOVOU X2, -32(AX)(BX*1)
 16208	MOVOU X3, -16(AX)(BX*1)
 16209	MOVQ  DX, AX
 16210
 16211emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
 16212	MOVQ dst_base+0(FP), CX
 16213	SUBQ CX, AX
 16214	MOVQ AX, ret+48(FP)
 16215	RET
 16216
 16217// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
 16218// Requires: BMI, SSE2
 16219TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56
 16220	MOVQ dst_base+0(FP), AX
 16221	MOVQ $0x00000280, CX
 16222	LEAQ 24(SP), DX
 16223	PXOR X0, X0
 16224
 16225zero_loop_encodeSnappyBetterBlockAsm12B:
 16226	MOVOU X0, (DX)
 16227	MOVOU X0, 16(DX)
 16228	MOVOU X0, 32(DX)
 16229	MOVOU X0, 48(DX)
 16230	MOVOU X0, 64(DX)
 16231	MOVOU X0, 80(DX)
 16232	MOVOU X0, 96(DX)
 16233	MOVOU X0, 112(DX)
 16234	ADDQ  $0x80, DX
 16235	DECQ  CX
 16236	JNZ   zero_loop_encodeSnappyBetterBlockAsm12B
 16237	MOVL  $0x00000000, 12(SP)
 16238	MOVQ  src_len+32(FP), CX
 16239	LEAQ  -9(CX), DX
 16240	LEAQ  -8(CX), BX
 16241	MOVL  BX, 8(SP)
 16242	SHRQ  $0x05, CX
 16243	SUBL  CX, DX
 16244	LEAQ  (AX)(DX*1), DX
 16245	MOVQ  DX, (SP)
 16246	MOVL  $0x00000001, CX
 16247	MOVL  $0x00000000, 16(SP)
 16248	MOVQ  src_base+24(FP), DX
 16249
 16250search_loop_encodeSnappyBetterBlockAsm12B:
 16251	MOVL  CX, BX
 16252	SUBL  12(SP), BX
 16253	SHRL  $0x06, BX
 16254	LEAL  1(CX)(BX*1), BX
 16255	CMPL  BX, 8(SP)
 16256	JAE   emit_remainder_encodeSnappyBetterBlockAsm12B
 16257	MOVQ  (DX)(CX*1), SI
 16258	MOVL  BX, 20(SP)
 16259	MOVQ  $0x0000cf1bbcdcbf9b, R8
 16260	MOVQ  $0x9e3779b1, BX
 16261	MOVQ  SI, R9
 16262	MOVQ  SI, R10
 16263	SHLQ  $0x10, R9
 16264	IMULQ R8, R9
 16265	SHRQ  $0x32, R9
 16266	SHLQ  $0x20, R10
 16267	IMULQ BX, R10
 16268	SHRQ  $0x34, R10
 16269	MOVL  24(SP)(R9*4), BX
 16270	MOVL  65560(SP)(R10*4), DI
 16271	MOVL  CX, 24(SP)(R9*4)
 16272	MOVL  CX, 65560(SP)(R10*4)
 16273	MOVQ  (DX)(BX*1), R9
 16274	MOVQ  (DX)(DI*1), R10
 16275	CMPQ  R9, SI
 16276	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
 16277	CMPQ  R10, SI
 16278	JNE   no_short_found_encodeSnappyBetterBlockAsm12B
 16279	MOVL  DI, BX
 16280	JMP   candidate_match_encodeSnappyBetterBlockAsm12B
 16281
 16282no_short_found_encodeSnappyBetterBlockAsm12B:
 16283	CMPL R9, SI
 16284	JEQ  candidate_match_encodeSnappyBetterBlockAsm12B
 16285	CMPL R10, SI
 16286	JEQ  candidateS_match_encodeSnappyBetterBlockAsm12B
 16287	MOVL 20(SP), CX
 16288	JMP  search_loop_encodeSnappyBetterBlockAsm12B
 16289
 16290candidateS_match_encodeSnappyBetterBlockAsm12B:
 16291	SHRQ  $0x08, SI
 16292	MOVQ  SI, R9
 16293	SHLQ  $0x10, R9
 16294	IMULQ R8, R9
 16295	SHRQ  $0x32, R9
 16296	MOVL  24(SP)(R9*4), BX
 16297	INCL  CX
 16298	MOVL  CX, 24(SP)(R9*4)
 16299	CMPL  (DX)(BX*1), SI
 16300	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
 16301	DECL  CX
 16302	MOVL  DI, BX
 16303
 16304candidate_match_encodeSnappyBetterBlockAsm12B:
 16305	MOVL  12(SP), SI
 16306	TESTL BX, BX
 16307	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm12B
 16308
 16309match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
 16310	CMPL CX, SI
 16311	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
 16312	MOVB -1(DX)(BX*1), DI
 16313	MOVB -1(DX)(CX*1), R8
 16314	CMPB DI, R8
 16315	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
 16316	LEAL -1(CX), CX
 16317	DECL BX
 16318	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm12B
 16319	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm12B
 16320
 16321match_extend_back_end_encodeSnappyBetterBlockAsm12B:
 16322	MOVL CX, SI
 16323	SUBL 12(SP), SI
 16324	LEAQ 3(AX)(SI*1), SI
 16325	CMPQ SI, (SP)
 16326	JB   match_dst_size_check_encodeSnappyBetterBlockAsm12B
 16327	MOVQ $0x00000000, ret+48(FP)
 16328	RET
 16329
 16330match_dst_size_check_encodeSnappyBetterBlockAsm12B:
 16331	MOVL CX, SI
 16332	ADDL $0x04, CX
 16333	ADDL $0x04, BX
 16334	MOVQ src_len+32(FP), DI
 16335	SUBL CX, DI
 16336	LEAQ (DX)(CX*1), R8
 16337	LEAQ (DX)(BX*1), R9
 16338
 16339	// matchLen
 16340	XORL R11, R11
 16341
 16342matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
 16343	CMPL DI, $0x10
 16344	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
 16345	MOVQ (R8)(R11*1), R10
 16346	MOVQ 8(R8)(R11*1), R12
 16347	XORQ (R9)(R11*1), R10
 16348	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
 16349	XORQ 8(R9)(R11*1), R12
 16350	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
 16351	LEAL -16(DI), DI
 16352	LEAL 16(R11), R11
 16353	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
 16354
 16355matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
 16356#ifdef GOAMD64_v3
 16357	TZCNTQ R12, R12
 16358
 16359#else
 16360	BSFQ R12, R12
 16361
 16362#endif
 16363	SARQ $0x03, R12
 16364	LEAL 8(R11)(R12*1), R11
 16365	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
 16366
 16367matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
 16368	CMPL DI, $0x08
 16369	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
 16370	MOVQ (R8)(R11*1), R10
 16371	XORQ (R9)(R11*1), R10
 16372	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
 16373	LEAL -8(DI), DI
 16374	LEAL 8(R11), R11
 16375	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
 16376
 16377matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
 16378#ifdef GOAMD64_v3
 16379	TZCNTQ R10, R10
 16380
 16381#else
 16382	BSFQ R10, R10
 16383
 16384#endif
 16385	SARQ $0x03, R10
 16386	LEAL (R11)(R10*1), R11
 16387	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
 16388
 16389matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
 16390	CMPL DI, $0x04
 16391	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
 16392	MOVL (R8)(R11*1), R10
 16393	CMPL (R9)(R11*1), R10
 16394	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
 16395	LEAL -4(DI), DI
 16396	LEAL 4(R11), R11
 16397
 16398matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
 16399	CMPL DI, $0x01
 16400	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
 16401	JB   match_nolit_end_encodeSnappyBetterBlockAsm12B
 16402	MOVW (R8)(R11*1), R10
 16403	CMPW (R9)(R11*1), R10
 16404	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
 16405	LEAL 2(R11), R11
 16406	SUBL $0x02, DI
 16407	JZ   match_nolit_end_encodeSnappyBetterBlockAsm12B
 16408
 16409matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
 16410	MOVB (R8)(R11*1), R10
 16411	CMPB (R9)(R11*1), R10
 16412	JNE  match_nolit_end_encodeSnappyBetterBlockAsm12B
 16413	LEAL 1(R11), R11
 16414
 16415match_nolit_end_encodeSnappyBetterBlockAsm12B:
 16416	MOVL CX, DI
 16417	SUBL BX, DI
 16418
 16419	// Check if repeat
 16420	MOVL DI, 16(SP)
 16421	MOVL 12(SP), BX
 16422	CMPL BX, SI
 16423	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
 16424	MOVL SI, R8
 16425	MOVL SI, 12(SP)
 16426	LEAQ (DX)(BX*1), R9
 16427	SUBL BX, R8
 16428	LEAL -1(R8), BX
 16429	CMPL BX, $0x3c
 16430	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm12B
 16431	CMPL BX, $0x00000100
 16432	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
 16433	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
 16434
 16435three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
 16436	MOVB $0xf4, (AX)
 16437	MOVW BX, 1(AX)
 16438	ADDQ $0x03, AX
 16439	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
 16440
 16441two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
 16442	MOVB $0xf0, (AX)
 16443	MOVB BL, 1(AX)
 16444	ADDQ $0x02, AX
 16445	CMPL BX, $0x40
 16446	JB   memmove_match_emit_encodeSnappyBetterBlockAsm12B
 16447	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
 16448
 16449one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
 16450	SHLB $0x02, BL
 16451	MOVB BL, (AX)
 16452	ADDQ $0x01, AX
 16453
 16454memmove_match_emit_encodeSnappyBetterBlockAsm12B:
 16455	LEAQ (AX)(R8*1), BX
 16456
 16457	// genMemMoveShort
 16458	CMPQ R8, $0x08
 16459	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
 16460	CMPQ R8, $0x10
 16461	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
 16462	CMPQ R8, $0x20
 16463	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
 16464	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
 16465
 16466emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
 16467	MOVQ (R9), R10
 16468	MOVQ R10, (AX)
 16469	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 16470
 16471emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
 16472	MOVQ (R9), R10
 16473	MOVQ -8(R9)(R8*1), R9
 16474	MOVQ R10, (AX)
 16475	MOVQ R9, -8(AX)(R8*1)
 16476	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 16477
 16478emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
 16479	MOVOU (R9), X0
 16480	MOVOU -16(R9)(R8*1), X1
 16481	MOVOU X0, (AX)
 16482	MOVOU X1, -16(AX)(R8*1)
 16483	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 16484
 16485emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
 16486	MOVOU (R9), X0
 16487	MOVOU 16(R9), X1
 16488	MOVOU -32(R9)(R8*1), X2
 16489	MOVOU -16(R9)(R8*1), X3
 16490	MOVOU X0, (AX)
 16491	MOVOU X1, 16(AX)
 16492	MOVOU X2, -32(AX)(R8*1)
 16493	MOVOU X3, -16(AX)(R8*1)
 16494
 16495memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
 16496	MOVQ BX, AX
 16497	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
 16498
 16499memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
 16500	LEAQ (AX)(R8*1), BX
 16501
 16502	// genMemMoveLong
 16503	MOVOU (R9), X0
 16504	MOVOU 16(R9), X1
 16505	MOVOU -32(R9)(R8*1), X2
 16506	MOVOU -16(R9)(R8*1), X3
 16507	MOVQ  R8, R12
 16508	SHRQ  $0x05, R12
 16509	MOVQ  AX, R10
 16510	ANDL  $0x0000001f, R10
 16511	MOVQ  $0x00000040, R13
 16512	SUBQ  R10, R13
 16513	DECQ  R12
 16514	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 16515	LEAQ  -32(R9)(R13*1), R10
 16516	LEAQ  -32(AX)(R13*1), R14
 16517
 16518emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
 16519	MOVOU (R10), X4
 16520	MOVOU 16(R10), X5
 16521	MOVOA X4, (R14)
 16522	MOVOA X5, 16(R14)
 16523	ADDQ  $0x20, R14
 16524	ADDQ  $0x20, R10
 16525	ADDQ  $0x20, R13
 16526	DECQ  R12
 16527	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
 16528
 16529emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
 16530	MOVOU -32(R9)(R13*1), X4
 16531	MOVOU -16(R9)(R13*1), X5
 16532	MOVOA X4, -32(AX)(R13*1)
 16533	MOVOA X5, -16(AX)(R13*1)
 16534	ADDQ  $0x20, R13
 16535	CMPQ  R8, R13
 16536	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 16537	MOVOU X0, (AX)
 16538	MOVOU X1, 16(AX)
 16539	MOVOU X2, -32(AX)(R8*1)
 16540	MOVOU X3, -16(AX)(R8*1)
 16541	MOVQ  BX, AX
 16542
 16543emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
 16544	ADDL R11, CX
 16545	ADDL $0x04, R11
 16546	MOVL CX, 12(SP)
 16547
 16548	// emitCopy
 16549two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
 16550	CMPL R11, $0x40
 16551	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
 16552	MOVB $0xee, (AX)
 16553	MOVW DI, 1(AX)
 16554	LEAL -60(R11), R11
 16555	ADDQ $0x03, AX
 16556	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
 16557
 16558two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
 16559	MOVL R11, BX
 16560	SHLL $0x02, BX
 16561	CMPL R11, $0x0c
 16562	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
 16563	CMPL DI, $0x00000800
 16564	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
 16565	LEAL -15(BX), BX
 16566	MOVB DI, 1(AX)
 16567	SHRL $0x08, DI
 16568	SHLL $0x05, DI
 16569	ORL  DI, BX
 16570	MOVB BL, (AX)
 16571	ADDQ $0x02, AX
 16572	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
 16573
 16574emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
 16575	LEAL -2(BX), BX
 16576	MOVB BL, (AX)
 16577	MOVW DI, 1(AX)
 16578	ADDQ $0x03, AX
 16579
 16580match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
 16581	CMPL CX, 8(SP)
 16582	JAE  emit_remainder_encodeSnappyBetterBlockAsm12B
 16583	CMPQ AX, (SP)
 16584	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
 16585	MOVQ $0x00000000, ret+48(FP)
 16586	RET
 16587
 16588match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
 16589	MOVQ  $0x0000cf1bbcdcbf9b, BX
 16590	MOVQ  $0x9e3779b1, DI
 16591	LEAQ  1(SI), SI
 16592	LEAQ  -2(CX), R8
 16593	MOVQ  (DX)(SI*1), R9
 16594	MOVQ  1(DX)(SI*1), R10
 16595	MOVQ  (DX)(R8*1), R11
 16596	MOVQ  1(DX)(R8*1), R12
 16597	SHLQ  $0x10, R9
 16598	IMULQ BX, R9
 16599	SHRQ  $0x32, R9
 16600	SHLQ  $0x20, R10
 16601	IMULQ DI, R10
 16602	SHRQ  $0x34, R10
 16603	SHLQ  $0x10, R11
 16604	IMULQ BX, R11
 16605	SHRQ  $0x32, R11
 16606	SHLQ  $0x20, R12
 16607	IMULQ DI, R12
 16608	SHRQ  $0x34, R12
 16609	LEAQ  1(SI), DI
 16610	LEAQ  1(R8), R13
 16611	MOVL  SI, 24(SP)(R9*4)
 16612	MOVL  R8, 24(SP)(R11*4)
 16613	MOVL  DI, 65560(SP)(R10*4)
 16614	MOVL  R13, 65560(SP)(R12*4)
 16615	LEAQ  1(R8)(SI*1), DI
 16616	SHRQ  $0x01, DI
 16617	ADDQ  $0x01, SI
 16618	SUBQ  $0x01, R8
 16619
 16620index_loop_encodeSnappyBetterBlockAsm12B:
 16621	CMPQ  DI, R8
 16622	JAE   search_loop_encodeSnappyBetterBlockAsm12B
 16623	MOVQ  (DX)(SI*1), R9
 16624	MOVQ  (DX)(DI*1), R10
 16625	SHLQ  $0x10, R9
 16626	IMULQ BX, R9
 16627	SHRQ  $0x32, R9
 16628	SHLQ  $0x10, R10
 16629	IMULQ BX, R10
 16630	SHRQ  $0x32, R10
 16631	MOVL  SI, 24(SP)(R9*4)
 16632	MOVL  DI, 24(SP)(R10*4)
 16633	ADDQ  $0x02, SI
 16634	ADDQ  $0x02, DI
 16635	JMP   index_loop_encodeSnappyBetterBlockAsm12B
 16636
 16637emit_remainder_encodeSnappyBetterBlockAsm12B:
 16638	MOVQ src_len+32(FP), CX
 16639	SUBL 12(SP), CX
 16640	LEAQ 3(AX)(CX*1), CX
 16641	CMPQ CX, (SP)
 16642	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm12B
 16643	MOVQ $0x00000000, ret+48(FP)
 16644	RET
 16645
 16646emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
 16647	MOVQ src_len+32(FP), CX
 16648	MOVL 12(SP), BX
 16649	CMPL BX, CX
 16650	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
 16651	MOVL CX, SI
 16652	MOVL CX, 12(SP)
 16653	LEAQ (DX)(BX*1), CX
 16654	SUBL BX, SI
 16655	LEAL -1(SI), DX
 16656	CMPL DX, $0x3c
 16657	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
 16658	CMPL DX, $0x00000100
 16659	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
 16660	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
 16661
 16662three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16663	MOVB $0xf4, (AX)
 16664	MOVW DX, 1(AX)
 16665	ADDQ $0x03, AX
 16666	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
 16667
 16668two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16669	MOVB $0xf0, (AX)
 16670	MOVB DL, 1(AX)
 16671	ADDQ $0x02, AX
 16672	CMPL DX, $0x40
 16673	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
 16674	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
 16675
 16676one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16677	SHLB $0x02, DL
 16678	MOVB DL, (AX)
 16679	ADDQ $0x01, AX
 16680
 16681memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16682	LEAQ (AX)(SI*1), DX
 16683	MOVL SI, BX
 16684
 16685	// genMemMoveShort
 16686	CMPQ BX, $0x03
 16687	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2
 16688	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3
 16689	CMPQ BX, $0x08
 16690	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7
 16691	CMPQ BX, $0x10
 16692	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
 16693	CMPQ BX, $0x20
 16694	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
 16695	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
 16696
 16697emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
 16698	MOVB (CX), SI
 16699	MOVB -1(CX)(BX*1), CL
 16700	MOVB SI, (AX)
 16701	MOVB CL, -1(AX)(BX*1)
 16702	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 16703
 16704emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
 16705	MOVW (CX), SI
 16706	MOVB 2(CX), CL
 16707	MOVW SI, (AX)
 16708	MOVB CL, 2(AX)
 16709	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 16710
 16711emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
 16712	MOVL (CX), SI
 16713	MOVL -4(CX)(BX*1), CX
 16714	MOVL SI, (AX)
 16715	MOVL CX, -4(AX)(BX*1)
 16716	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 16717
 16718emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
 16719	MOVQ (CX), SI
 16720	MOVQ -8(CX)(BX*1), CX
 16721	MOVQ SI, (AX)
 16722	MOVQ CX, -8(AX)(BX*1)
 16723	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 16724
 16725emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
 16726	MOVOU (CX), X0
 16727	MOVOU -16(CX)(BX*1), X1
 16728	MOVOU X0, (AX)
 16729	MOVOU X1, -16(AX)(BX*1)
 16730	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 16731
 16732emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
 16733	MOVOU (CX), X0
 16734	MOVOU 16(CX), X1
 16735	MOVOU -32(CX)(BX*1), X2
 16736	MOVOU -16(CX)(BX*1), X3
 16737	MOVOU X0, (AX)
 16738	MOVOU X1, 16(AX)
 16739	MOVOU X2, -32(AX)(BX*1)
 16740	MOVOU X3, -16(AX)(BX*1)
 16741
 16742memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16743	MOVQ DX, AX
 16744	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
 16745
 16746memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16747	LEAQ (AX)(SI*1), DX
 16748	MOVL SI, BX
 16749
 16750	// genMemMoveLong
 16751	MOVOU (CX), X0
 16752	MOVOU 16(CX), X1
 16753	MOVOU -32(CX)(BX*1), X2
 16754	MOVOU -16(CX)(BX*1), X3
 16755	MOVQ  BX, DI
 16756	SHRQ  $0x05, DI
 16757	MOVQ  AX, SI
 16758	ANDL  $0x0000001f, SI
 16759	MOVQ  $0x00000040, R8
 16760	SUBQ  SI, R8
 16761	DECQ  DI
 16762	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 16763	LEAQ  -32(CX)(R8*1), SI
 16764	LEAQ  -32(AX)(R8*1), R9
 16765
 16766emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
 16767	MOVOU (SI), X4
 16768	MOVOU 16(SI), X5
 16769	MOVOA X4, (R9)
 16770	MOVOA X5, 16(R9)
 16771	ADDQ  $0x20, R9
 16772	ADDQ  $0x20, SI
 16773	ADDQ  $0x20, R8
 16774	DECQ  DI
 16775	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
 16776
 16777emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
 16778	MOVOU -32(CX)(R8*1), X4
 16779	MOVOU -16(CX)(R8*1), X5
 16780	MOVOA X4, -32(AX)(R8*1)
 16781	MOVOA X5, -16(AX)(R8*1)
 16782	ADDQ  $0x20, R8
 16783	CMPQ  BX, R8
 16784	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 16785	MOVOU X0, (AX)
 16786	MOVOU X1, 16(AX)
 16787	MOVOU X2, -32(AX)(BX*1)
 16788	MOVOU X3, -16(AX)(BX*1)
 16789	MOVQ  DX, AX
 16790
 16791emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
 16792	MOVQ dst_base+0(FP), CX
 16793	SUBQ CX, AX
 16794	MOVQ AX, ret+48(FP)
 16795	RET
 16796
 16797// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
 16798// Requires: BMI, SSE2
 16799TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56
 16800	MOVQ dst_base+0(FP), AX
 16801	MOVQ $0x000000a0, CX
 16802	LEAQ 24(SP), DX
 16803	PXOR X0, X0
 16804
 16805zero_loop_encodeSnappyBetterBlockAsm10B:
 16806	MOVOU X0, (DX)
 16807	MOVOU X0, 16(DX)
 16808	MOVOU X0, 32(DX)
 16809	MOVOU X0, 48(DX)
 16810	MOVOU X0, 64(DX)
 16811	MOVOU X0, 80(DX)
 16812	MOVOU X0, 96(DX)
 16813	MOVOU X0, 112(DX)
 16814	ADDQ  $0x80, DX
 16815	DECQ  CX
 16816	JNZ   zero_loop_encodeSnappyBetterBlockAsm10B
 16817	MOVL  $0x00000000, 12(SP)
 16818	MOVQ  src_len+32(FP), CX
 16819	LEAQ  -9(CX), DX
 16820	LEAQ  -8(CX), BX
 16821	MOVL  BX, 8(SP)
 16822	SHRQ  $0x05, CX
 16823	SUBL  CX, DX
 16824	LEAQ  (AX)(DX*1), DX
 16825	MOVQ  DX, (SP)
 16826	MOVL  $0x00000001, CX
 16827	MOVL  $0x00000000, 16(SP)
 16828	MOVQ  src_base+24(FP), DX
 16829
 16830search_loop_encodeSnappyBetterBlockAsm10B:
 16831	MOVL  CX, BX
 16832	SUBL  12(SP), BX
 16833	SHRL  $0x05, BX
 16834	LEAL  1(CX)(BX*1), BX
 16835	CMPL  BX, 8(SP)
 16836	JAE   emit_remainder_encodeSnappyBetterBlockAsm10B
 16837	MOVQ  (DX)(CX*1), SI
 16838	MOVL  BX, 20(SP)
 16839	MOVQ  $0x0000cf1bbcdcbf9b, R8
 16840	MOVQ  $0x9e3779b1, BX
 16841	MOVQ  SI, R9
 16842	MOVQ  SI, R10
 16843	SHLQ  $0x10, R9
 16844	IMULQ R8, R9
 16845	SHRQ  $0x34, R9
 16846	SHLQ  $0x20, R10
 16847	IMULQ BX, R10
 16848	SHRQ  $0x36, R10
 16849	MOVL  24(SP)(R9*4), BX
 16850	MOVL  16408(SP)(R10*4), DI
 16851	MOVL  CX, 24(SP)(R9*4)
 16852	MOVL  CX, 16408(SP)(R10*4)
 16853	MOVQ  (DX)(BX*1), R9
 16854	MOVQ  (DX)(DI*1), R10
 16855	CMPQ  R9, SI
 16856	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
 16857	CMPQ  R10, SI
 16858	JNE   no_short_found_encodeSnappyBetterBlockAsm10B
 16859	MOVL  DI, BX
 16860	JMP   candidate_match_encodeSnappyBetterBlockAsm10B
 16861
 16862no_short_found_encodeSnappyBetterBlockAsm10B:
 16863	CMPL R9, SI
 16864	JEQ  candidate_match_encodeSnappyBetterBlockAsm10B
 16865	CMPL R10, SI
 16866	JEQ  candidateS_match_encodeSnappyBetterBlockAsm10B
 16867	MOVL 20(SP), CX
 16868	JMP  search_loop_encodeSnappyBetterBlockAsm10B
 16869
 16870candidateS_match_encodeSnappyBetterBlockAsm10B:
 16871	SHRQ  $0x08, SI
 16872	MOVQ  SI, R9
 16873	SHLQ  $0x10, R9
 16874	IMULQ R8, R9
 16875	SHRQ  $0x34, R9
 16876	MOVL  24(SP)(R9*4), BX
 16877	INCL  CX
 16878	MOVL  CX, 24(SP)(R9*4)
 16879	CMPL  (DX)(BX*1), SI
 16880	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
 16881	DECL  CX
 16882	MOVL  DI, BX
 16883
 16884candidate_match_encodeSnappyBetterBlockAsm10B:
 16885	MOVL  12(SP), SI
 16886	TESTL BX, BX
 16887	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm10B
 16888
 16889match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
 16890	CMPL CX, SI
 16891	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
 16892	MOVB -1(DX)(BX*1), DI
 16893	MOVB -1(DX)(CX*1), R8
 16894	CMPB DI, R8
 16895	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
 16896	LEAL -1(CX), CX
 16897	DECL BX
 16898	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm10B
 16899	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm10B
 16900
 16901match_extend_back_end_encodeSnappyBetterBlockAsm10B:
 16902	MOVL CX, SI
 16903	SUBL 12(SP), SI
 16904	LEAQ 3(AX)(SI*1), SI
 16905	CMPQ SI, (SP)
 16906	JB   match_dst_size_check_encodeSnappyBetterBlockAsm10B
 16907	MOVQ $0x00000000, ret+48(FP)
 16908	RET
 16909
 16910match_dst_size_check_encodeSnappyBetterBlockAsm10B:
 16911	MOVL CX, SI
 16912	ADDL $0x04, CX
 16913	ADDL $0x04, BX
 16914	MOVQ src_len+32(FP), DI
 16915	SUBL CX, DI
 16916	LEAQ (DX)(CX*1), R8
 16917	LEAQ (DX)(BX*1), R9
 16918
 16919	// matchLen
 16920	XORL R11, R11
 16921
 16922matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
 16923	CMPL DI, $0x10
 16924	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
 16925	MOVQ (R8)(R11*1), R10
 16926	MOVQ 8(R8)(R11*1), R12
 16927	XORQ (R9)(R11*1), R10
 16928	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
 16929	XORQ 8(R9)(R11*1), R12
 16930	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
 16931	LEAL -16(DI), DI
 16932	LEAL 16(R11), R11
 16933	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
 16934
 16935matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
 16936#ifdef GOAMD64_v3
 16937	TZCNTQ R12, R12
 16938
 16939#else
 16940	BSFQ R12, R12
 16941
 16942#endif
 16943	SARQ $0x03, R12
 16944	LEAL 8(R11)(R12*1), R11
 16945	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
 16946
 16947matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
 16948	CMPL DI, $0x08
 16949	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
 16950	MOVQ (R8)(R11*1), R10
 16951	XORQ (R9)(R11*1), R10
 16952	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
 16953	LEAL -8(DI), DI
 16954	LEAL 8(R11), R11
 16955	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
 16956
 16957matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
 16958#ifdef GOAMD64_v3
 16959	TZCNTQ R10, R10
 16960
 16961#else
 16962	BSFQ R10, R10
 16963
 16964#endif
 16965	SARQ $0x03, R10
 16966	LEAL (R11)(R10*1), R11
 16967	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
 16968
 16969matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
 16970	CMPL DI, $0x04
 16971	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
 16972	MOVL (R8)(R11*1), R10
 16973	CMPL (R9)(R11*1), R10
 16974	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
 16975	LEAL -4(DI), DI
 16976	LEAL 4(R11), R11
 16977
 16978matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
 16979	CMPL DI, $0x01
 16980	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
 16981	JB   match_nolit_end_encodeSnappyBetterBlockAsm10B
 16982	MOVW (R8)(R11*1), R10
 16983	CMPW (R9)(R11*1), R10
 16984	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
 16985	LEAL 2(R11), R11
 16986	SUBL $0x02, DI
 16987	JZ   match_nolit_end_encodeSnappyBetterBlockAsm10B
 16988
 16989matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
 16990	MOVB (R8)(R11*1), R10
 16991	CMPB (R9)(R11*1), R10
 16992	JNE  match_nolit_end_encodeSnappyBetterBlockAsm10B
 16993	LEAL 1(R11), R11
 16994
 16995match_nolit_end_encodeSnappyBetterBlockAsm10B:
 16996	MOVL CX, DI
 16997	SUBL BX, DI
 16998
 16999	// Check if repeat
 17000	MOVL DI, 16(SP)
 17001	MOVL 12(SP), BX
 17002	CMPL BX, SI
 17003	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
 17004	MOVL SI, R8
 17005	MOVL SI, 12(SP)
 17006	LEAQ (DX)(BX*1), R9
 17007	SUBL BX, R8
 17008	LEAL -1(R8), BX
 17009	CMPL BX, $0x3c
 17010	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm10B
 17011	CMPL BX, $0x00000100
 17012	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
 17013	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
 17014
 17015three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
 17016	MOVB $0xf4, (AX)
 17017	MOVW BX, 1(AX)
 17018	ADDQ $0x03, AX
 17019	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
 17020
 17021two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
 17022	MOVB $0xf0, (AX)
 17023	MOVB BL, 1(AX)
 17024	ADDQ $0x02, AX
 17025	CMPL BX, $0x40
 17026	JB   memmove_match_emit_encodeSnappyBetterBlockAsm10B
 17027	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
 17028
 17029one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
 17030	SHLB $0x02, BL
 17031	MOVB BL, (AX)
 17032	ADDQ $0x01, AX
 17033
 17034memmove_match_emit_encodeSnappyBetterBlockAsm10B:
 17035	LEAQ (AX)(R8*1), BX
 17036
 17037	// genMemMoveShort
 17038	CMPQ R8, $0x08
 17039	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
 17040	CMPQ R8, $0x10
 17041	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
 17042	CMPQ R8, $0x20
 17043	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
 17044	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
 17045
 17046emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
 17047	MOVQ (R9), R10
 17048	MOVQ R10, (AX)
 17049	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 17050
 17051emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
 17052	MOVQ (R9), R10
 17053	MOVQ -8(R9)(R8*1), R9
 17054	MOVQ R10, (AX)
 17055	MOVQ R9, -8(AX)(R8*1)
 17056	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 17057
 17058emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
 17059	MOVOU (R9), X0
 17060	MOVOU -16(R9)(R8*1), X1
 17061	MOVOU X0, (AX)
 17062	MOVOU X1, -16(AX)(R8*1)
 17063	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 17064
 17065emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
 17066	MOVOU (R9), X0
 17067	MOVOU 16(R9), X1
 17068	MOVOU -32(R9)(R8*1), X2
 17069	MOVOU -16(R9)(R8*1), X3
 17070	MOVOU X0, (AX)
 17071	MOVOU X1, 16(AX)
 17072	MOVOU X2, -32(AX)(R8*1)
 17073	MOVOU X3, -16(AX)(R8*1)
 17074
 17075memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
 17076	MOVQ BX, AX
 17077	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
 17078
 17079memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
 17080	LEAQ (AX)(R8*1), BX
 17081
 17082	// genMemMoveLong
 17083	MOVOU (R9), X0
 17084	MOVOU 16(R9), X1
 17085	MOVOU -32(R9)(R8*1), X2
 17086	MOVOU -16(R9)(R8*1), X3
 17087	MOVQ  R8, R12
 17088	SHRQ  $0x05, R12
 17089	MOVQ  AX, R10
 17090	ANDL  $0x0000001f, R10
 17091	MOVQ  $0x00000040, R13
 17092	SUBQ  R10, R13
 17093	DECQ  R12
 17094	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 17095	LEAQ  -32(R9)(R13*1), R10
 17096	LEAQ  -32(AX)(R13*1), R14
 17097
 17098emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
 17099	MOVOU (R10), X4
 17100	MOVOU 16(R10), X5
 17101	MOVOA X4, (R14)
 17102	MOVOA X5, 16(R14)
 17103	ADDQ  $0x20, R14
 17104	ADDQ  $0x20, R10
 17105	ADDQ  $0x20, R13
 17106	DECQ  R12
 17107	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
 17108
 17109emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
 17110	MOVOU -32(R9)(R13*1), X4
 17111	MOVOU -16(R9)(R13*1), X5
 17112	MOVOA X4, -32(AX)(R13*1)
 17113	MOVOA X5, -16(AX)(R13*1)
 17114	ADDQ  $0x20, R13
 17115	CMPQ  R8, R13
 17116	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 17117	MOVOU X0, (AX)
 17118	MOVOU X1, 16(AX)
 17119	MOVOU X2, -32(AX)(R8*1)
 17120	MOVOU X3, -16(AX)(R8*1)
 17121	MOVQ  BX, AX
 17122
 17123emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
 17124	ADDL R11, CX
 17125	ADDL $0x04, R11
 17126	MOVL CX, 12(SP)
 17127
 17128	// emitCopy
 17129two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
 17130	CMPL R11, $0x40
 17131	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
 17132	MOVB $0xee, (AX)
 17133	MOVW DI, 1(AX)
 17134	LEAL -60(R11), R11
 17135	ADDQ $0x03, AX
 17136	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
 17137
 17138two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
 17139	MOVL R11, BX
 17140	SHLL $0x02, BX
 17141	CMPL R11, $0x0c
 17142	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
 17143	CMPL DI, $0x00000800
 17144	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
 17145	LEAL -15(BX), BX
 17146	MOVB DI, 1(AX)
 17147	SHRL $0x08, DI
 17148	SHLL $0x05, DI
 17149	ORL  DI, BX
 17150	MOVB BL, (AX)
 17151	ADDQ $0x02, AX
 17152	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
 17153
 17154emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
 17155	LEAL -2(BX), BX
 17156	MOVB BL, (AX)
 17157	MOVW DI, 1(AX)
 17158	ADDQ $0x03, AX
 17159
 17160match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
 17161	CMPL CX, 8(SP)
 17162	JAE  emit_remainder_encodeSnappyBetterBlockAsm10B
 17163	CMPQ AX, (SP)
 17164	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
 17165	MOVQ $0x00000000, ret+48(FP)
 17166	RET
 17167
 17168match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
 17169	MOVQ  $0x0000cf1bbcdcbf9b, BX
 17170	MOVQ  $0x9e3779b1, DI
 17171	LEAQ  1(SI), SI
 17172	LEAQ  -2(CX), R8
 17173	MOVQ  (DX)(SI*1), R9
 17174	MOVQ  1(DX)(SI*1), R10
 17175	MOVQ  (DX)(R8*1), R11
 17176	MOVQ  1(DX)(R8*1), R12
 17177	SHLQ  $0x10, R9
 17178	IMULQ BX, R9
 17179	SHRQ  $0x34, R9
 17180	SHLQ  $0x20, R10
 17181	IMULQ DI, R10
 17182	SHRQ  $0x36, R10
 17183	SHLQ  $0x10, R11
 17184	IMULQ BX, R11
 17185	SHRQ  $0x34, R11
 17186	SHLQ  $0x20, R12
 17187	IMULQ DI, R12
 17188	SHRQ  $0x36, R12
 17189	LEAQ  1(SI), DI
 17190	LEAQ  1(R8), R13
 17191	MOVL  SI, 24(SP)(R9*4)
 17192	MOVL  R8, 24(SP)(R11*4)
 17193	MOVL  DI, 16408(SP)(R10*4)
 17194	MOVL  R13, 16408(SP)(R12*4)
 17195	LEAQ  1(R8)(SI*1), DI
 17196	SHRQ  $0x01, DI
 17197	ADDQ  $0x01, SI
 17198	SUBQ  $0x01, R8
 17199
 17200index_loop_encodeSnappyBetterBlockAsm10B:
 17201	CMPQ  DI, R8
 17202	JAE   search_loop_encodeSnappyBetterBlockAsm10B
 17203	MOVQ  (DX)(SI*1), R9
 17204	MOVQ  (DX)(DI*1), R10
 17205	SHLQ  $0x10, R9
 17206	IMULQ BX, R9
 17207	SHRQ  $0x34, R9
 17208	SHLQ  $0x10, R10
 17209	IMULQ BX, R10
 17210	SHRQ  $0x34, R10
 17211	MOVL  SI, 24(SP)(R9*4)
 17212	MOVL  DI, 24(SP)(R10*4)
 17213	ADDQ  $0x02, SI
 17214	ADDQ  $0x02, DI
 17215	JMP   index_loop_encodeSnappyBetterBlockAsm10B
 17216
 17217emit_remainder_encodeSnappyBetterBlockAsm10B:
 17218	MOVQ src_len+32(FP), CX
 17219	SUBL 12(SP), CX
 17220	LEAQ 3(AX)(CX*1), CX
 17221	CMPQ CX, (SP)
 17222	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm10B
 17223	MOVQ $0x00000000, ret+48(FP)
 17224	RET
 17225
 17226emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
 17227	MOVQ src_len+32(FP), CX
 17228	MOVL 12(SP), BX
 17229	CMPL BX, CX
 17230	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
 17231	MOVL CX, SI
 17232	MOVL CX, 12(SP)
 17233	LEAQ (DX)(BX*1), CX
 17234	SUBL BX, SI
 17235	LEAL -1(SI), DX
 17236	CMPL DX, $0x3c
 17237	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
 17238	CMPL DX, $0x00000100
 17239	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
 17240	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
 17241
 17242three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17243	MOVB $0xf4, (AX)
 17244	MOVW DX, 1(AX)
 17245	ADDQ $0x03, AX
 17246	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
 17247
 17248two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17249	MOVB $0xf0, (AX)
 17250	MOVB DL, 1(AX)
 17251	ADDQ $0x02, AX
 17252	CMPL DX, $0x40
 17253	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
 17254	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
 17255
 17256one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17257	SHLB $0x02, DL
 17258	MOVB DL, (AX)
 17259	ADDQ $0x01, AX
 17260
 17261memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17262	LEAQ (AX)(SI*1), DX
 17263	MOVL SI, BX
 17264
 17265	// genMemMoveShort
 17266	CMPQ BX, $0x03
 17267	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2
 17268	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3
 17269	CMPQ BX, $0x08
 17270	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7
 17271	CMPQ BX, $0x10
 17272	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
 17273	CMPQ BX, $0x20
 17274	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
 17275	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
 17276
 17277emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
 17278	MOVB (CX), SI
 17279	MOVB -1(CX)(BX*1), CL
 17280	MOVB SI, (AX)
 17281	MOVB CL, -1(AX)(BX*1)
 17282	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 17283
 17284emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
 17285	MOVW (CX), SI
 17286	MOVB 2(CX), CL
 17287	MOVW SI, (AX)
 17288	MOVB CL, 2(AX)
 17289	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 17290
 17291emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
 17292	MOVL (CX), SI
 17293	MOVL -4(CX)(BX*1), CX
 17294	MOVL SI, (AX)
 17295	MOVL CX, -4(AX)(BX*1)
 17296	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 17297
 17298emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
 17299	MOVQ (CX), SI
 17300	MOVQ -8(CX)(BX*1), CX
 17301	MOVQ SI, (AX)
 17302	MOVQ CX, -8(AX)(BX*1)
 17303	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 17304
 17305emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
 17306	MOVOU (CX), X0
 17307	MOVOU -16(CX)(BX*1), X1
 17308	MOVOU X0, (AX)
 17309	MOVOU X1, -16(AX)(BX*1)
 17310	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 17311
 17312emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
 17313	MOVOU (CX), X0
 17314	MOVOU 16(CX), X1
 17315	MOVOU -32(CX)(BX*1), X2
 17316	MOVOU -16(CX)(BX*1), X3
 17317	MOVOU X0, (AX)
 17318	MOVOU X1, 16(AX)
 17319	MOVOU X2, -32(AX)(BX*1)
 17320	MOVOU X3, -16(AX)(BX*1)
 17321
 17322memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17323	MOVQ DX, AX
 17324	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
 17325
 17326memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17327	LEAQ (AX)(SI*1), DX
 17328	MOVL SI, BX
 17329
 17330	// genMemMoveLong
 17331	MOVOU (CX), X0
 17332	MOVOU 16(CX), X1
 17333	MOVOU -32(CX)(BX*1), X2
 17334	MOVOU -16(CX)(BX*1), X3
 17335	MOVQ  BX, DI
 17336	SHRQ  $0x05, DI
 17337	MOVQ  AX, SI
 17338	ANDL  $0x0000001f, SI
 17339	MOVQ  $0x00000040, R8
 17340	SUBQ  SI, R8
 17341	DECQ  DI
 17342	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 17343	LEAQ  -32(CX)(R8*1), SI
 17344	LEAQ  -32(AX)(R8*1), R9
 17345
 17346emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
 17347	MOVOU (SI), X4
 17348	MOVOU 16(SI), X5
 17349	MOVOA X4, (R9)
 17350	MOVOA X5, 16(R9)
 17351	ADDQ  $0x20, R9
 17352	ADDQ  $0x20, SI
 17353	ADDQ  $0x20, R8
 17354	DECQ  DI
 17355	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
 17356
 17357emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
 17358	MOVOU -32(CX)(R8*1), X4
 17359	MOVOU -16(CX)(R8*1), X5
 17360	MOVOA X4, -32(AX)(R8*1)
 17361	MOVOA X5, -16(AX)(R8*1)
 17362	ADDQ  $0x20, R8
 17363	CMPQ  BX, R8
 17364	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 17365	MOVOU X0, (AX)
 17366	MOVOU X1, 16(AX)
 17367	MOVOU X2, -32(AX)(BX*1)
 17368	MOVOU X3, -16(AX)(BX*1)
 17369	MOVQ  DX, AX
 17370
 17371emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
 17372	MOVQ dst_base+0(FP), CX
 17373	SUBQ CX, AX
 17374	MOVQ AX, ret+48(FP)
 17375	RET
 17376
 17377// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
 17378// Requires: BMI, SSE2
 17379TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56
 17380	MOVQ dst_base+0(FP), AX
 17381	MOVQ $0x00000028, CX
 17382	LEAQ 24(SP), DX
 17383	PXOR X0, X0
 17384
 17385zero_loop_encodeSnappyBetterBlockAsm8B:
 17386	MOVOU X0, (DX)
 17387	MOVOU X0, 16(DX)
 17388	MOVOU X0, 32(DX)
 17389	MOVOU X0, 48(DX)
 17390	MOVOU X0, 64(DX)
 17391	MOVOU X0, 80(DX)
 17392	MOVOU X0, 96(DX)
 17393	MOVOU X0, 112(DX)
 17394	ADDQ  $0x80, DX
 17395	DECQ  CX
 17396	JNZ   zero_loop_encodeSnappyBetterBlockAsm8B
 17397	MOVL  $0x00000000, 12(SP)
 17398	MOVQ  src_len+32(FP), CX
 17399	LEAQ  -9(CX), DX
 17400	LEAQ  -8(CX), BX
 17401	MOVL  BX, 8(SP)
 17402	SHRQ  $0x05, CX
 17403	SUBL  CX, DX
 17404	LEAQ  (AX)(DX*1), DX
 17405	MOVQ  DX, (SP)
 17406	MOVL  $0x00000001, CX
 17407	MOVL  $0x00000000, 16(SP)
 17408	MOVQ  src_base+24(FP), DX
 17409
 17410search_loop_encodeSnappyBetterBlockAsm8B:
 17411	MOVL  CX, BX
 17412	SUBL  12(SP), BX
 17413	SHRL  $0x04, BX
 17414	LEAL  1(CX)(BX*1), BX
 17415	CMPL  BX, 8(SP)
 17416	JAE   emit_remainder_encodeSnappyBetterBlockAsm8B
 17417	MOVQ  (DX)(CX*1), SI
 17418	MOVL  BX, 20(SP)
 17419	MOVQ  $0x0000cf1bbcdcbf9b, R8
 17420	MOVQ  $0x9e3779b1, BX
 17421	MOVQ  SI, R9
 17422	MOVQ  SI, R10
 17423	SHLQ  $0x10, R9
 17424	IMULQ R8, R9
 17425	SHRQ  $0x36, R9
 17426	SHLQ  $0x20, R10
 17427	IMULQ BX, R10
 17428	SHRQ  $0x38, R10
 17429	MOVL  24(SP)(R9*4), BX
 17430	MOVL  4120(SP)(R10*4), DI
 17431	MOVL  CX, 24(SP)(R9*4)
 17432	MOVL  CX, 4120(SP)(R10*4)
 17433	MOVQ  (DX)(BX*1), R9
 17434	MOVQ  (DX)(DI*1), R10
 17435	CMPQ  R9, SI
 17436	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
 17437	CMPQ  R10, SI
 17438	JNE   no_short_found_encodeSnappyBetterBlockAsm8B
 17439	MOVL  DI, BX
 17440	JMP   candidate_match_encodeSnappyBetterBlockAsm8B
 17441
 17442no_short_found_encodeSnappyBetterBlockAsm8B:
 17443	CMPL R9, SI
 17444	JEQ  candidate_match_encodeSnappyBetterBlockAsm8B
 17445	CMPL R10, SI
 17446	JEQ  candidateS_match_encodeSnappyBetterBlockAsm8B
 17447	MOVL 20(SP), CX
 17448	JMP  search_loop_encodeSnappyBetterBlockAsm8B
 17449
 17450candidateS_match_encodeSnappyBetterBlockAsm8B:
 17451	SHRQ  $0x08, SI
 17452	MOVQ  SI, R9
 17453	SHLQ  $0x10, R9
 17454	IMULQ R8, R9
 17455	SHRQ  $0x36, R9
 17456	MOVL  24(SP)(R9*4), BX
 17457	INCL  CX
 17458	MOVL  CX, 24(SP)(R9*4)
 17459	CMPL  (DX)(BX*1), SI
 17460	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
 17461	DECL  CX
 17462	MOVL  DI, BX
 17463
 17464candidate_match_encodeSnappyBetterBlockAsm8B:
 17465	MOVL  12(SP), SI
 17466	TESTL BX, BX
 17467	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm8B
 17468
 17469match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
 17470	CMPL CX, SI
 17471	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
 17472	MOVB -1(DX)(BX*1), DI
 17473	MOVB -1(DX)(CX*1), R8
 17474	CMPB DI, R8
 17475	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
 17476	LEAL -1(CX), CX
 17477	DECL BX
 17478	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm8B
 17479	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm8B
 17480
 17481match_extend_back_end_encodeSnappyBetterBlockAsm8B:
 17482	MOVL CX, SI
 17483	SUBL 12(SP), SI
 17484	LEAQ 3(AX)(SI*1), SI
 17485	CMPQ SI, (SP)
 17486	JB   match_dst_size_check_encodeSnappyBetterBlockAsm8B
 17487	MOVQ $0x00000000, ret+48(FP)
 17488	RET
 17489
 17490match_dst_size_check_encodeSnappyBetterBlockAsm8B:
 17491	MOVL CX, SI
 17492	ADDL $0x04, CX
 17493	ADDL $0x04, BX
 17494	MOVQ src_len+32(FP), DI
 17495	SUBL CX, DI
 17496	LEAQ (DX)(CX*1), R8
 17497	LEAQ (DX)(BX*1), R9
 17498
 17499	// matchLen
 17500	XORL R11, R11
 17501
 17502matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
 17503	CMPL DI, $0x10
 17504	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
 17505	MOVQ (R8)(R11*1), R10
 17506	MOVQ 8(R8)(R11*1), R12
 17507	XORQ (R9)(R11*1), R10
 17508	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
 17509	XORQ 8(R9)(R11*1), R12
 17510	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
 17511	LEAL -16(DI), DI
 17512	LEAL 16(R11), R11
 17513	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
 17514
 17515matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
 17516#ifdef GOAMD64_v3
 17517	TZCNTQ R12, R12
 17518
 17519#else
 17520	BSFQ R12, R12
 17521
 17522#endif
 17523	SARQ $0x03, R12
 17524	LEAL 8(R11)(R12*1), R11
 17525	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
 17526
 17527matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
 17528	CMPL DI, $0x08
 17529	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
 17530	MOVQ (R8)(R11*1), R10
 17531	XORQ (R9)(R11*1), R10
 17532	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
 17533	LEAL -8(DI), DI
 17534	LEAL 8(R11), R11
 17535	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
 17536
 17537matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
 17538#ifdef GOAMD64_v3
 17539	TZCNTQ R10, R10
 17540
 17541#else
 17542	BSFQ R10, R10
 17543
 17544#endif
 17545	SARQ $0x03, R10
 17546	LEAL (R11)(R10*1), R11
 17547	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
 17548
 17549matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
 17550	CMPL DI, $0x04
 17551	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
 17552	MOVL (R8)(R11*1), R10
 17553	CMPL (R9)(R11*1), R10
 17554	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
 17555	LEAL -4(DI), DI
 17556	LEAL 4(R11), R11
 17557
 17558matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
 17559	CMPL DI, $0x01
 17560	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
 17561	JB   match_nolit_end_encodeSnappyBetterBlockAsm8B
 17562	MOVW (R8)(R11*1), R10
 17563	CMPW (R9)(R11*1), R10
 17564	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
 17565	LEAL 2(R11), R11
 17566	SUBL $0x02, DI
 17567	JZ   match_nolit_end_encodeSnappyBetterBlockAsm8B
 17568
 17569matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
 17570	MOVB (R8)(R11*1), R10
 17571	CMPB (R9)(R11*1), R10
 17572	JNE  match_nolit_end_encodeSnappyBetterBlockAsm8B
 17573	LEAL 1(R11), R11
 17574
 17575match_nolit_end_encodeSnappyBetterBlockAsm8B:
 17576	MOVL CX, DI
 17577	SUBL BX, DI
 17578
 17579	// Check if repeat
 17580	MOVL DI, 16(SP)
 17581	MOVL 12(SP), BX
 17582	CMPL BX, SI
 17583	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
 17584	MOVL SI, R8
 17585	MOVL SI, 12(SP)
 17586	LEAQ (DX)(BX*1), R9
 17587	SUBL BX, R8
 17588	LEAL -1(R8), BX
 17589	CMPL BX, $0x3c
 17590	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm8B
 17591	CMPL BX, $0x00000100
 17592	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
 17593	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
 17594
 17595three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
 17596	MOVB $0xf4, (AX)
 17597	MOVW BX, 1(AX)
 17598	ADDQ $0x03, AX
 17599	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
 17600
 17601two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
 17602	MOVB $0xf0, (AX)
 17603	MOVB BL, 1(AX)
 17604	ADDQ $0x02, AX
 17605	CMPL BX, $0x40
 17606	JB   memmove_match_emit_encodeSnappyBetterBlockAsm8B
 17607	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
 17608
 17609one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
 17610	SHLB $0x02, BL
 17611	MOVB BL, (AX)
 17612	ADDQ $0x01, AX
 17613
 17614memmove_match_emit_encodeSnappyBetterBlockAsm8B:
 17615	LEAQ (AX)(R8*1), BX
 17616
 17617	// genMemMoveShort
 17618	CMPQ R8, $0x08
 17619	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
 17620	CMPQ R8, $0x10
 17621	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
 17622	CMPQ R8, $0x20
 17623	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
 17624	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
 17625
 17626emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
 17627	MOVQ (R9), R10
 17628	MOVQ R10, (AX)
 17629	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 17630
 17631emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
 17632	MOVQ (R9), R10
 17633	MOVQ -8(R9)(R8*1), R9
 17634	MOVQ R10, (AX)
 17635	MOVQ R9, -8(AX)(R8*1)
 17636	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 17637
 17638emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
 17639	MOVOU (R9), X0
 17640	MOVOU -16(R9)(R8*1), X1
 17641	MOVOU X0, (AX)
 17642	MOVOU X1, -16(AX)(R8*1)
 17643	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 17644
 17645emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
 17646	MOVOU (R9), X0
 17647	MOVOU 16(R9), X1
 17648	MOVOU -32(R9)(R8*1), X2
 17649	MOVOU -16(R9)(R8*1), X3
 17650	MOVOU X0, (AX)
 17651	MOVOU X1, 16(AX)
 17652	MOVOU X2, -32(AX)(R8*1)
 17653	MOVOU X3, -16(AX)(R8*1)
 17654
 17655memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
 17656	MOVQ BX, AX
 17657	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
 17658
 17659memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
 17660	LEAQ (AX)(R8*1), BX
 17661
 17662	// genMemMoveLong
 17663	MOVOU (R9), X0
 17664	MOVOU 16(R9), X1
 17665	MOVOU -32(R9)(R8*1), X2
 17666	MOVOU -16(R9)(R8*1), X3
 17667	MOVQ  R8, R12
 17668	SHRQ  $0x05, R12
 17669	MOVQ  AX, R10
 17670	ANDL  $0x0000001f, R10
 17671	MOVQ  $0x00000040, R13
 17672	SUBQ  R10, R13
 17673	DECQ  R12
 17674	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 17675	LEAQ  -32(R9)(R13*1), R10
 17676	LEAQ  -32(AX)(R13*1), R14
 17677
 17678emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
 17679	MOVOU (R10), X4
 17680	MOVOU 16(R10), X5
 17681	MOVOA X4, (R14)
 17682	MOVOA X5, 16(R14)
 17683	ADDQ  $0x20, R14
 17684	ADDQ  $0x20, R10
 17685	ADDQ  $0x20, R13
 17686	DECQ  R12
 17687	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
 17688
 17689emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
 17690	MOVOU -32(R9)(R13*1), X4
 17691	MOVOU -16(R9)(R13*1), X5
 17692	MOVOA X4, -32(AX)(R13*1)
 17693	MOVOA X5, -16(AX)(R13*1)
 17694	ADDQ  $0x20, R13
 17695	CMPQ  R8, R13
 17696	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 17697	MOVOU X0, (AX)
 17698	MOVOU X1, 16(AX)
 17699	MOVOU X2, -32(AX)(R8*1)
 17700	MOVOU X3, -16(AX)(R8*1)
 17701	MOVQ  BX, AX
 17702
 17703emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
 17704	ADDL R11, CX
 17705	ADDL $0x04, R11
 17706	MOVL CX, 12(SP)
 17707
 17708	// emitCopy
 17709two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
 17710	CMPL R11, $0x40
 17711	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
 17712	MOVB $0xee, (AX)
 17713	MOVW DI, 1(AX)
 17714	LEAL -60(R11), R11
 17715	ADDQ $0x03, AX
 17716	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
 17717
 17718two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
 17719	MOVL R11, BX
 17720	SHLL $0x02, BX
 17721	CMPL R11, $0x0c
 17722	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
 17723	LEAL -15(BX), BX
 17724	MOVB DI, 1(AX)
 17725	SHRL $0x08, DI
 17726	SHLL $0x05, DI
 17727	ORL  DI, BX
 17728	MOVB BL, (AX)
 17729	ADDQ $0x02, AX
 17730	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
 17731
 17732emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
 17733	LEAL -2(BX), BX
 17734	MOVB BL, (AX)
 17735	MOVW DI, 1(AX)
 17736	ADDQ $0x03, AX
 17737
 17738match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
 17739	CMPL CX, 8(SP)
 17740	JAE  emit_remainder_encodeSnappyBetterBlockAsm8B
 17741	CMPQ AX, (SP)
 17742	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
 17743	MOVQ $0x00000000, ret+48(FP)
 17744	RET
 17745
 17746match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
 17747	MOVQ  $0x0000cf1bbcdcbf9b, BX
 17748	MOVQ  $0x9e3779b1, DI
 17749	LEAQ  1(SI), SI
 17750	LEAQ  -2(CX), R8
 17751	MOVQ  (DX)(SI*1), R9
 17752	MOVQ  1(DX)(SI*1), R10
 17753	MOVQ  (DX)(R8*1), R11
 17754	MOVQ  1(DX)(R8*1), R12
 17755	SHLQ  $0x10, R9
 17756	IMULQ BX, R9
 17757	SHRQ  $0x36, R9
 17758	SHLQ  $0x20, R10
 17759	IMULQ DI, R10
 17760	SHRQ  $0x38, R10
 17761	SHLQ  $0x10, R11
 17762	IMULQ BX, R11
 17763	SHRQ  $0x36, R11
 17764	SHLQ  $0x20, R12
 17765	IMULQ DI, R12
 17766	SHRQ  $0x38, R12
 17767	LEAQ  1(SI), DI
 17768	LEAQ  1(R8), R13
 17769	MOVL  SI, 24(SP)(R9*4)
 17770	MOVL  R8, 24(SP)(R11*4)
 17771	MOVL  DI, 4120(SP)(R10*4)
 17772	MOVL  R13, 4120(SP)(R12*4)
 17773	LEAQ  1(R8)(SI*1), DI
 17774	SHRQ  $0x01, DI
 17775	ADDQ  $0x01, SI
 17776	SUBQ  $0x01, R8
 17777
 17778index_loop_encodeSnappyBetterBlockAsm8B:
 17779	CMPQ  DI, R8
 17780	JAE   search_loop_encodeSnappyBetterBlockAsm8B
 17781	MOVQ  (DX)(SI*1), R9
 17782	MOVQ  (DX)(DI*1), R10
 17783	SHLQ  $0x10, R9
 17784	IMULQ BX, R9
 17785	SHRQ  $0x36, R9
 17786	SHLQ  $0x10, R10
 17787	IMULQ BX, R10
 17788	SHRQ  $0x36, R10
 17789	MOVL  SI, 24(SP)(R9*4)
 17790	MOVL  DI, 24(SP)(R10*4)
 17791	ADDQ  $0x02, SI
 17792	ADDQ  $0x02, DI
 17793	JMP   index_loop_encodeSnappyBetterBlockAsm8B
 17794
 17795emit_remainder_encodeSnappyBetterBlockAsm8B:
 17796	MOVQ src_len+32(FP), CX
 17797	SUBL 12(SP), CX
 17798	LEAQ 3(AX)(CX*1), CX
 17799	CMPQ CX, (SP)
 17800	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm8B
 17801	MOVQ $0x00000000, ret+48(FP)
 17802	RET
 17803
 17804emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
 17805	MOVQ src_len+32(FP), CX
 17806	MOVL 12(SP), BX
 17807	CMPL BX, CX
 17808	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
 17809	MOVL CX, SI
 17810	MOVL CX, 12(SP)
 17811	LEAQ (DX)(BX*1), CX
 17812	SUBL BX, SI
 17813	LEAL -1(SI), DX
 17814	CMPL DX, $0x3c
 17815	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
 17816	CMPL DX, $0x00000100
 17817	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
 17818	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
 17819
 17820three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17821	MOVB $0xf4, (AX)
 17822	MOVW DX, 1(AX)
 17823	ADDQ $0x03, AX
 17824	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
 17825
 17826two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17827	MOVB $0xf0, (AX)
 17828	MOVB DL, 1(AX)
 17829	ADDQ $0x02, AX
 17830	CMPL DX, $0x40
 17831	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
 17832	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
 17833
 17834one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17835	SHLB $0x02, DL
 17836	MOVB DL, (AX)
 17837	ADDQ $0x01, AX
 17838
 17839memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17840	LEAQ (AX)(SI*1), DX
 17841	MOVL SI, BX
 17842
 17843	// genMemMoveShort
 17844	CMPQ BX, $0x03
 17845	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2
 17846	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3
 17847	CMPQ BX, $0x08
 17848	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7
 17849	CMPQ BX, $0x10
 17850	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
 17851	CMPQ BX, $0x20
 17852	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
 17853	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
 17854
 17855emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
 17856	MOVB (CX), SI
 17857	MOVB -1(CX)(BX*1), CL
 17858	MOVB SI, (AX)
 17859	MOVB CL, -1(AX)(BX*1)
 17860	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 17861
 17862emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
 17863	MOVW (CX), SI
 17864	MOVB 2(CX), CL
 17865	MOVW SI, (AX)
 17866	MOVB CL, 2(AX)
 17867	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 17868
 17869emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
 17870	MOVL (CX), SI
 17871	MOVL -4(CX)(BX*1), CX
 17872	MOVL SI, (AX)
 17873	MOVL CX, -4(AX)(BX*1)
 17874	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 17875
 17876emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
 17877	MOVQ (CX), SI
 17878	MOVQ -8(CX)(BX*1), CX
 17879	MOVQ SI, (AX)
 17880	MOVQ CX, -8(AX)(BX*1)
 17881	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 17882
 17883emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
 17884	MOVOU (CX), X0
 17885	MOVOU -16(CX)(BX*1), X1
 17886	MOVOU X0, (AX)
 17887	MOVOU X1, -16(AX)(BX*1)
 17888	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 17889
 17890emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
 17891	MOVOU (CX), X0
 17892	MOVOU 16(CX), X1
 17893	MOVOU -32(CX)(BX*1), X2
 17894	MOVOU -16(CX)(BX*1), X3
 17895	MOVOU X0, (AX)
 17896	MOVOU X1, 16(AX)
 17897	MOVOU X2, -32(AX)(BX*1)
 17898	MOVOU X3, -16(AX)(BX*1)
 17899
 17900memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17901	MOVQ DX, AX
 17902	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
 17903
 17904memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17905	LEAQ (AX)(SI*1), DX
 17906	MOVL SI, BX
 17907
 17908	// genMemMoveLong
 17909	MOVOU (CX), X0
 17910	MOVOU 16(CX), X1
 17911	MOVOU -32(CX)(BX*1), X2
 17912	MOVOU -16(CX)(BX*1), X3
 17913	MOVQ  BX, DI
 17914	SHRQ  $0x05, DI
 17915	MOVQ  AX, SI
 17916	ANDL  $0x0000001f, SI
 17917	MOVQ  $0x00000040, R8
 17918	SUBQ  SI, R8
 17919	DECQ  DI
 17920	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 17921	LEAQ  -32(CX)(R8*1), SI
 17922	LEAQ  -32(AX)(R8*1), R9
 17923
 17924emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
 17925	MOVOU (SI), X4
 17926	MOVOU 16(SI), X5
 17927	MOVOA X4, (R9)
 17928	MOVOA X5, 16(R9)
 17929	ADDQ  $0x20, R9
 17930	ADDQ  $0x20, SI
 17931	ADDQ  $0x20, R8
 17932	DECQ  DI
 17933	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
 17934
 17935emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
 17936	MOVOU -32(CX)(R8*1), X4
 17937	MOVOU -16(CX)(R8*1), X5
 17938	MOVOA X4, -32(AX)(R8*1)
 17939	MOVOA X5, -16(AX)(R8*1)
 17940	ADDQ  $0x20, R8
 17941	CMPQ  BX, R8
 17942	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 17943	MOVOU X0, (AX)
 17944	MOVOU X1, 16(AX)
 17945	MOVOU X2, -32(AX)(BX*1)
 17946	MOVOU X3, -16(AX)(BX*1)
 17947	MOVQ  DX, AX
 17948
 17949emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
 17950	MOVQ dst_base+0(FP), CX
 17951	SUBQ CX, AX
 17952	MOVQ AX, ret+48(FP)
 17953	RET
 17954
 17955// func calcBlockSize(src []byte) int
 17956// Requires: BMI, SSE2
 17957TEXT ·calcBlockSize(SB), $32792-32
 17958	XORQ AX, AX
 17959	MOVQ $0x00000100, CX
 17960	LEAQ 24(SP), DX
 17961	PXOR X0, X0
 17962
 17963zero_loop_calcBlockSize:
 17964	MOVOU X0, (DX)
 17965	MOVOU X0, 16(DX)
 17966	MOVOU X0, 32(DX)
 17967	MOVOU X0, 48(DX)
 17968	MOVOU X0, 64(DX)
 17969	MOVOU X0, 80(DX)
 17970	MOVOU X0, 96(DX)
 17971	MOVOU X0, 112(DX)
 17972	ADDQ  $0x80, DX
 17973	DECQ  CX
 17974	JNZ   zero_loop_calcBlockSize
 17975	MOVL  $0x00000000, 12(SP)
 17976	MOVQ  src_len+8(FP), CX
 17977	LEAQ  -9(CX), DX
 17978	LEAQ  -8(CX), BX
 17979	MOVL  BX, 8(SP)
 17980	SHRQ  $0x05, CX
 17981	SUBL  CX, DX
 17982	LEAQ  (AX)(DX*1), DX
 17983	MOVQ  DX, (SP)
 17984	MOVL  $0x00000001, CX
 17985	MOVL  CX, 16(SP)
 17986	MOVQ  src_base+0(FP), DX
 17987
 17988search_loop_calcBlockSize:
 17989	MOVL  CX, BX
 17990	SUBL  12(SP), BX
 17991	SHRL  $0x05, BX
 17992	LEAL  4(CX)(BX*1), BX
 17993	CMPL  BX, 8(SP)
 17994	JAE   emit_remainder_calcBlockSize
 17995	MOVQ  (DX)(CX*1), SI
 17996	MOVL  BX, 20(SP)
 17997	MOVQ  $0x0000cf1bbcdcbf9b, R8
 17998	MOVQ  SI, R9
 17999	MOVQ  SI, R10
 18000	SHRQ  $0x08, R10
 18001	SHLQ  $0x10, R9
 18002	IMULQ R8, R9
 18003	SHRQ  $0x33, R9
 18004	SHLQ  $0x10, R10
 18005	IMULQ R8, R10
 18006	SHRQ  $0x33, R10
 18007	MOVL  24(SP)(R9*4), BX
 18008	MOVL  24(SP)(R10*4), DI
 18009	MOVL  CX, 24(SP)(R9*4)
 18010	LEAL  1(CX), R9
 18011	MOVL  R9, 24(SP)(R10*4)
 18012	MOVQ  SI, R9
 18013	SHRQ  $0x10, R9
 18014	SHLQ  $0x10, R9
 18015	IMULQ R8, R9
 18016	SHRQ  $0x33, R9
 18017	MOVL  CX, R8
 18018	SUBL  16(SP), R8
 18019	MOVL  1(DX)(R8*1), R10
 18020	MOVQ  SI, R8
 18021	SHRQ  $0x08, R8
 18022	CMPL  R8, R10
 18023	JNE   no_repeat_found_calcBlockSize
 18024	LEAL  1(CX), SI
 18025	MOVL  12(SP), BX
 18026	MOVL  SI, DI
 18027	SUBL  16(SP), DI
 18028	JZ    repeat_extend_back_end_calcBlockSize
 18029
 18030repeat_extend_back_loop_calcBlockSize:
 18031	CMPL SI, BX
 18032	JBE  repeat_extend_back_end_calcBlockSize
 18033	MOVB -1(DX)(DI*1), R8
 18034	MOVB -1(DX)(SI*1), R9
 18035	CMPB R8, R9
 18036	JNE  repeat_extend_back_end_calcBlockSize
 18037	LEAL -1(SI), SI
 18038	DECL DI
 18039	JNZ  repeat_extend_back_loop_calcBlockSize
 18040
 18041repeat_extend_back_end_calcBlockSize:
 18042	MOVL SI, BX
 18043	SUBL 12(SP), BX
 18044	LEAQ 5(AX)(BX*1), BX
 18045	CMPQ BX, (SP)
 18046	JB   repeat_dst_size_check_calcBlockSize
 18047	MOVQ $0x00000000, ret+24(FP)
 18048	RET
 18049
 18050repeat_dst_size_check_calcBlockSize:
 18051	MOVL 12(SP), BX
 18052	CMPL BX, SI
 18053	JEQ  emit_literal_done_repeat_emit_calcBlockSize
 18054	MOVL SI, DI
 18055	MOVL SI, 12(SP)
 18056	LEAQ (DX)(BX*1), R8
 18057	SUBL BX, DI
 18058	LEAL -1(DI), BX
 18059	CMPL BX, $0x3c
 18060	JB   one_byte_repeat_emit_calcBlockSize
 18061	CMPL BX, $0x00000100
 18062	JB   two_bytes_repeat_emit_calcBlockSize
 18063	CMPL BX, $0x00010000
 18064	JB   three_bytes_repeat_emit_calcBlockSize
 18065	CMPL BX, $0x01000000
 18066	JB   four_bytes_repeat_emit_calcBlockSize
 18067	ADDQ $0x05, AX
 18068	JMP  memmove_long_repeat_emit_calcBlockSize
 18069
 18070four_bytes_repeat_emit_calcBlockSize:
 18071	ADDQ $0x04, AX
 18072	JMP  memmove_long_repeat_emit_calcBlockSize
 18073
 18074three_bytes_repeat_emit_calcBlockSize:
 18075	ADDQ $0x03, AX
 18076	JMP  memmove_long_repeat_emit_calcBlockSize
 18077
 18078two_bytes_repeat_emit_calcBlockSize:
 18079	ADDQ $0x02, AX
 18080	CMPL BX, $0x40
 18081	JB   memmove_repeat_emit_calcBlockSize
 18082	JMP  memmove_long_repeat_emit_calcBlockSize
 18083
 18084one_byte_repeat_emit_calcBlockSize:
 18085	ADDQ $0x01, AX
 18086
 18087memmove_repeat_emit_calcBlockSize:
 18088	LEAQ (AX)(DI*1), AX
 18089	JMP  emit_literal_done_repeat_emit_calcBlockSize
 18090
 18091memmove_long_repeat_emit_calcBlockSize:
 18092	LEAQ (AX)(DI*1), AX
 18093
 18094emit_literal_done_repeat_emit_calcBlockSize:
 18095	ADDL $0x05, CX
 18096	MOVL CX, BX
 18097	SUBL 16(SP), BX
 18098	MOVQ src_len+8(FP), DI
 18099	SUBL CX, DI
 18100	LEAQ (DX)(CX*1), R8
 18101	LEAQ (DX)(BX*1), BX
 18102
 18103	// matchLen
 18104	XORL R10, R10
 18105
 18106matchlen_loopback_16_repeat_extend_calcBlockSize:
 18107	CMPL DI, $0x10
 18108	JB   matchlen_match8_repeat_extend_calcBlockSize
 18109	MOVQ (R8)(R10*1), R9
 18110	MOVQ 8(R8)(R10*1), R11
 18111	XORQ (BX)(R10*1), R9
 18112	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
 18113	XORQ 8(BX)(R10*1), R11
 18114	JNZ  matchlen_bsf_16repeat_extend_calcBlockSize
 18115	LEAL -16(DI), DI
 18116	LEAL 16(R10), R10
 18117	JMP  matchlen_loopback_16_repeat_extend_calcBlockSize
 18118
 18119matchlen_bsf_16repeat_extend_calcBlockSize:
 18120#ifdef GOAMD64_v3
 18121	TZCNTQ R11, R11
 18122
 18123#else
 18124	BSFQ R11, R11
 18125
 18126#endif
 18127	SARQ $0x03, R11
 18128	LEAL 8(R10)(R11*1), R10
 18129	JMP  repeat_extend_forward_end_calcBlockSize
 18130
 18131matchlen_match8_repeat_extend_calcBlockSize:
 18132	CMPL DI, $0x08
 18133	JB   matchlen_match4_repeat_extend_calcBlockSize
 18134	MOVQ (R8)(R10*1), R9
 18135	XORQ (BX)(R10*1), R9
 18136	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
 18137	LEAL -8(DI), DI
 18138	LEAL 8(R10), R10
 18139	JMP  matchlen_match4_repeat_extend_calcBlockSize
 18140
 18141matchlen_bsf_8_repeat_extend_calcBlockSize:
 18142#ifdef GOAMD64_v3
 18143	TZCNTQ R9, R9
 18144
 18145#else
 18146	BSFQ R9, R9
 18147
 18148#endif
 18149	SARQ $0x03, R9
 18150	LEAL (R10)(R9*1), R10
 18151	JMP  repeat_extend_forward_end_calcBlockSize
 18152
 18153matchlen_match4_repeat_extend_calcBlockSize:
 18154	CMPL DI, $0x04
 18155	JB   matchlen_match2_repeat_extend_calcBlockSize
 18156	MOVL (R8)(R10*1), R9
 18157	CMPL (BX)(R10*1), R9
 18158	JNE  matchlen_match2_repeat_extend_calcBlockSize
 18159	LEAL -4(DI), DI
 18160	LEAL 4(R10), R10
 18161
 18162matchlen_match2_repeat_extend_calcBlockSize:
 18163	CMPL DI, $0x01
 18164	JE   matchlen_match1_repeat_extend_calcBlockSize
 18165	JB   repeat_extend_forward_end_calcBlockSize
 18166	MOVW (R8)(R10*1), R9
 18167	CMPW (BX)(R10*1), R9
 18168	JNE  matchlen_match1_repeat_extend_calcBlockSize
 18169	LEAL 2(R10), R10
 18170	SUBL $0x02, DI
 18171	JZ   repeat_extend_forward_end_calcBlockSize
 18172
 18173matchlen_match1_repeat_extend_calcBlockSize:
 18174	MOVB (R8)(R10*1), R9
 18175	CMPB (BX)(R10*1), R9
 18176	JNE  repeat_extend_forward_end_calcBlockSize
 18177	LEAL 1(R10), R10
 18178
 18179repeat_extend_forward_end_calcBlockSize:
 18180	ADDL R10, CX
 18181	MOVL CX, BX
 18182	SUBL SI, BX
 18183	MOVL 16(SP), SI
 18184
 18185	// emitCopy
 18186	CMPL SI, $0x00010000
 18187	JB   two_byte_offset_repeat_as_copy_calcBlockSize
 18188
 18189four_bytes_loop_back_repeat_as_copy_calcBlockSize:
 18190	CMPL BX, $0x40
 18191	JBE  four_bytes_remain_repeat_as_copy_calcBlockSize
 18192	LEAL -64(BX), BX
 18193	ADDQ $0x05, AX
 18194	CMPL BX, $0x04
 18195	JB   four_bytes_remain_repeat_as_copy_calcBlockSize
 18196	JMP  four_bytes_loop_back_repeat_as_copy_calcBlockSize
 18197
 18198four_bytes_remain_repeat_as_copy_calcBlockSize:
 18199	TESTL BX, BX
 18200	JZ    repeat_end_emit_calcBlockSize
 18201	XORL  BX, BX
 18202	ADDQ  $0x05, AX
 18203	JMP   repeat_end_emit_calcBlockSize
 18204
 18205two_byte_offset_repeat_as_copy_calcBlockSize:
 18206	CMPL BX, $0x40
 18207	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSize
 18208	LEAL -60(BX), BX
 18209	ADDQ $0x03, AX
 18210	JMP  two_byte_offset_repeat_as_copy_calcBlockSize
 18211
 18212two_byte_offset_short_repeat_as_copy_calcBlockSize:
 18213	MOVL BX, DI
 18214	SHLL $0x02, DI
 18215	CMPL BX, $0x0c
 18216	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
 18217	CMPL SI, $0x00000800
 18218	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
 18219	ADDQ $0x02, AX
 18220	JMP  repeat_end_emit_calcBlockSize
 18221
 18222emit_copy_three_repeat_as_copy_calcBlockSize:
 18223	ADDQ $0x03, AX
 18224
 18225repeat_end_emit_calcBlockSize:
 18226	MOVL CX, 12(SP)
 18227	JMP  search_loop_calcBlockSize
 18228
 18229no_repeat_found_calcBlockSize:
 18230	CMPL (DX)(BX*1), SI
 18231	JEQ  candidate_match_calcBlockSize
 18232	SHRQ $0x08, SI
 18233	MOVL 24(SP)(R9*4), BX
 18234	LEAL 2(CX), R8
 18235	CMPL (DX)(DI*1), SI
 18236	JEQ  candidate2_match_calcBlockSize
 18237	MOVL R8, 24(SP)(R9*4)
 18238	SHRQ $0x08, SI
 18239	CMPL (DX)(BX*1), SI
 18240	JEQ  candidate3_match_calcBlockSize
 18241	MOVL 20(SP), CX
 18242	JMP  search_loop_calcBlockSize
 18243
 18244candidate3_match_calcBlockSize:
 18245	ADDL $0x02, CX
 18246	JMP  candidate_match_calcBlockSize
 18247
 18248candidate2_match_calcBlockSize:
 18249	MOVL R8, 24(SP)(R9*4)
 18250	INCL CX
 18251	MOVL DI, BX
 18252
 18253candidate_match_calcBlockSize:
 18254	MOVL  12(SP), SI
 18255	TESTL BX, BX
 18256	JZ    match_extend_back_end_calcBlockSize
 18257
 18258match_extend_back_loop_calcBlockSize:
 18259	CMPL CX, SI
 18260	JBE  match_extend_back_end_calcBlockSize
 18261	MOVB -1(DX)(BX*1), DI
 18262	MOVB -1(DX)(CX*1), R8
 18263	CMPB DI, R8
 18264	JNE  match_extend_back_end_calcBlockSize
 18265	LEAL -1(CX), CX
 18266	DECL BX
 18267	JZ   match_extend_back_end_calcBlockSize
 18268	JMP  match_extend_back_loop_calcBlockSize
 18269
 18270match_extend_back_end_calcBlockSize:
 18271	MOVL CX, SI
 18272	SUBL 12(SP), SI
 18273	LEAQ 5(AX)(SI*1), SI
 18274	CMPQ SI, (SP)
 18275	JB   match_dst_size_check_calcBlockSize
 18276	MOVQ $0x00000000, ret+24(FP)
 18277	RET
 18278
 18279match_dst_size_check_calcBlockSize:
 18280	MOVL CX, SI
 18281	MOVL 12(SP), DI
 18282	CMPL DI, SI
 18283	JEQ  emit_literal_done_match_emit_calcBlockSize
 18284	MOVL SI, R8
 18285	MOVL SI, 12(SP)
 18286	LEAQ (DX)(DI*1), SI
 18287	SUBL DI, R8
 18288	LEAL -1(R8), SI
 18289	CMPL SI, $0x3c
 18290	JB   one_byte_match_emit_calcBlockSize
 18291	CMPL SI, $0x00000100
 18292	JB   two_bytes_match_emit_calcBlockSize
 18293	CMPL SI, $0x00010000
 18294	JB   three_bytes_match_emit_calcBlockSize
 18295	CMPL SI, $0x01000000
 18296	JB   four_bytes_match_emit_calcBlockSize
 18297	ADDQ $0x05, AX
 18298	JMP  memmove_long_match_emit_calcBlockSize
 18299
 18300four_bytes_match_emit_calcBlockSize:
 18301	ADDQ $0x04, AX
 18302	JMP  memmove_long_match_emit_calcBlockSize
 18303
 18304three_bytes_match_emit_calcBlockSize:
 18305	ADDQ $0x03, AX
 18306	JMP  memmove_long_match_emit_calcBlockSize
 18307
 18308two_bytes_match_emit_calcBlockSize:
 18309	ADDQ $0x02, AX
 18310	CMPL SI, $0x40
 18311	JB   memmove_match_emit_calcBlockSize
 18312	JMP  memmove_long_match_emit_calcBlockSize
 18313
 18314one_byte_match_emit_calcBlockSize:
 18315	ADDQ $0x01, AX
 18316
 18317memmove_match_emit_calcBlockSize:
 18318	LEAQ (AX)(R8*1), AX
 18319	JMP  emit_literal_done_match_emit_calcBlockSize
 18320
 18321memmove_long_match_emit_calcBlockSize:
 18322	LEAQ (AX)(R8*1), AX
 18323
 18324emit_literal_done_match_emit_calcBlockSize:
 18325match_nolit_loop_calcBlockSize:
 18326	MOVL CX, SI
 18327	SUBL BX, SI
 18328	MOVL SI, 16(SP)
 18329	ADDL $0x04, CX
 18330	ADDL $0x04, BX
 18331	MOVQ src_len+8(FP), SI
 18332	SUBL CX, SI
 18333	LEAQ (DX)(CX*1), DI
 18334	LEAQ (DX)(BX*1), BX
 18335
 18336	// matchLen
 18337	XORL R9, R9
 18338
 18339matchlen_loopback_16_match_nolit_calcBlockSize:
 18340	CMPL SI, $0x10
 18341	JB   matchlen_match8_match_nolit_calcBlockSize
 18342	MOVQ (DI)(R9*1), R8
 18343	MOVQ 8(DI)(R9*1), R10
 18344	XORQ (BX)(R9*1), R8
 18345	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
 18346	XORQ 8(BX)(R9*1), R10
 18347	JNZ  matchlen_bsf_16match_nolit_calcBlockSize
 18348	LEAL -16(SI), SI
 18349	LEAL 16(R9), R9
 18350	JMP  matchlen_loopback_16_match_nolit_calcBlockSize
 18351
 18352matchlen_bsf_16match_nolit_calcBlockSize:
 18353#ifdef GOAMD64_v3
 18354	TZCNTQ R10, R10
 18355
 18356#else
 18357	BSFQ R10, R10
 18358
 18359#endif
 18360	SARQ $0x03, R10
 18361	LEAL 8(R9)(R10*1), R9
 18362	JMP  match_nolit_end_calcBlockSize
 18363
 18364matchlen_match8_match_nolit_calcBlockSize:
 18365	CMPL SI, $0x08
 18366	JB   matchlen_match4_match_nolit_calcBlockSize
 18367	MOVQ (DI)(R9*1), R8
 18368	XORQ (BX)(R9*1), R8
 18369	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
 18370	LEAL -8(SI), SI
 18371	LEAL 8(R9), R9
 18372	JMP  matchlen_match4_match_nolit_calcBlockSize
 18373
 18374matchlen_bsf_8_match_nolit_calcBlockSize:
 18375#ifdef GOAMD64_v3
 18376	TZCNTQ R8, R8
 18377
 18378#else
 18379	BSFQ R8, R8
 18380
 18381#endif
 18382	SARQ $0x03, R8
 18383	LEAL (R9)(R8*1), R9
 18384	JMP  match_nolit_end_calcBlockSize
 18385
 18386matchlen_match4_match_nolit_calcBlockSize:
 18387	CMPL SI, $0x04
 18388	JB   matchlen_match2_match_nolit_calcBlockSize
 18389	MOVL (DI)(R9*1), R8
 18390	CMPL (BX)(R9*1), R8
 18391	JNE  matchlen_match2_match_nolit_calcBlockSize
 18392	LEAL -4(SI), SI
 18393	LEAL 4(R9), R9
 18394
 18395matchlen_match2_match_nolit_calcBlockSize:
 18396	CMPL SI, $0x01
 18397	JE   matchlen_match1_match_nolit_calcBlockSize
 18398	JB   match_nolit_end_calcBlockSize
 18399	MOVW (DI)(R9*1), R8
 18400	CMPW (BX)(R9*1), R8
 18401	JNE  matchlen_match1_match_nolit_calcBlockSize
 18402	LEAL 2(R9), R9
 18403	SUBL $0x02, SI
 18404	JZ   match_nolit_end_calcBlockSize
 18405
 18406matchlen_match1_match_nolit_calcBlockSize:
 18407	MOVB (DI)(R9*1), R8
 18408	CMPB (BX)(R9*1), R8
 18409	JNE  match_nolit_end_calcBlockSize
 18410	LEAL 1(R9), R9
 18411
 18412match_nolit_end_calcBlockSize:
 18413	ADDL R9, CX
 18414	MOVL 16(SP), BX
 18415	ADDL $0x04, R9
 18416	MOVL CX, 12(SP)
 18417
 18418	// emitCopy
 18419	CMPL BX, $0x00010000
 18420	JB   two_byte_offset_match_nolit_calcBlockSize
 18421
 18422four_bytes_loop_back_match_nolit_calcBlockSize:
 18423	CMPL R9, $0x40
 18424	JBE  four_bytes_remain_match_nolit_calcBlockSize
 18425	LEAL -64(R9), R9
 18426	ADDQ $0x05, AX
 18427	CMPL R9, $0x04
 18428	JB   four_bytes_remain_match_nolit_calcBlockSize
 18429	JMP  four_bytes_loop_back_match_nolit_calcBlockSize
 18430
 18431four_bytes_remain_match_nolit_calcBlockSize:
 18432	TESTL R9, R9
 18433	JZ    match_nolit_emitcopy_end_calcBlockSize
 18434	XORL  BX, BX
 18435	ADDQ  $0x05, AX
 18436	JMP   match_nolit_emitcopy_end_calcBlockSize
 18437
 18438two_byte_offset_match_nolit_calcBlockSize:
 18439	CMPL R9, $0x40
 18440	JBE  two_byte_offset_short_match_nolit_calcBlockSize
 18441	LEAL -60(R9), R9
 18442	ADDQ $0x03, AX
 18443	JMP  two_byte_offset_match_nolit_calcBlockSize
 18444
 18445two_byte_offset_short_match_nolit_calcBlockSize:
 18446	MOVL R9, SI
 18447	SHLL $0x02, SI
 18448	CMPL R9, $0x0c
 18449	JAE  emit_copy_three_match_nolit_calcBlockSize
 18450	CMPL BX, $0x00000800
 18451	JAE  emit_copy_three_match_nolit_calcBlockSize
 18452	ADDQ $0x02, AX
 18453	JMP  match_nolit_emitcopy_end_calcBlockSize
 18454
 18455emit_copy_three_match_nolit_calcBlockSize:
 18456	ADDQ $0x03, AX
 18457
 18458match_nolit_emitcopy_end_calcBlockSize:
 18459	CMPL CX, 8(SP)
 18460	JAE  emit_remainder_calcBlockSize
 18461	MOVQ -2(DX)(CX*1), SI
 18462	CMPQ AX, (SP)
 18463	JB   match_nolit_dst_ok_calcBlockSize
 18464	MOVQ $0x00000000, ret+24(FP)
 18465	RET
 18466
 18467match_nolit_dst_ok_calcBlockSize:
 18468	MOVQ  $0x0000cf1bbcdcbf9b, R8
 18469	MOVQ  SI, DI
 18470	SHRQ  $0x10, SI
 18471	MOVQ  SI, BX
 18472	SHLQ  $0x10, DI
 18473	IMULQ R8, DI
 18474	SHRQ  $0x33, DI
 18475	SHLQ  $0x10, BX
 18476	IMULQ R8, BX
 18477	SHRQ  $0x33, BX
 18478	LEAL  -2(CX), R8
 18479	LEAQ  24(SP)(BX*4), R9
 18480	MOVL  (R9), BX
 18481	MOVL  R8, 24(SP)(DI*4)
 18482	MOVL  CX, (R9)
 18483	CMPL  (DX)(BX*1), SI
 18484	JEQ   match_nolit_loop_calcBlockSize
 18485	INCL  CX
 18486	JMP   search_loop_calcBlockSize
 18487
 18488emit_remainder_calcBlockSize:
 18489	MOVQ src_len+8(FP), CX
 18490	SUBL 12(SP), CX
 18491	LEAQ 5(AX)(CX*1), CX
 18492	CMPQ CX, (SP)
 18493	JB   emit_remainder_ok_calcBlockSize
 18494	MOVQ $0x00000000, ret+24(FP)
 18495	RET
 18496
 18497emit_remainder_ok_calcBlockSize:
 18498	MOVQ src_len+8(FP), CX
 18499	MOVL 12(SP), BX
 18500	CMPL BX, CX
 18501	JEQ  emit_literal_done_emit_remainder_calcBlockSize
 18502	MOVL CX, SI
 18503	MOVL CX, 12(SP)
 18504	LEAQ (DX)(BX*1), CX
 18505	SUBL BX, SI
 18506	LEAL -1(SI), CX
 18507	CMPL CX, $0x3c
 18508	JB   one_byte_emit_remainder_calcBlockSize
 18509	CMPL CX, $0x00000100
 18510	JB   two_bytes_emit_remainder_calcBlockSize
 18511	CMPL CX, $0x00010000
 18512	JB   three_bytes_emit_remainder_calcBlockSize
 18513	CMPL CX, $0x01000000
 18514	JB   four_bytes_emit_remainder_calcBlockSize
 18515	ADDQ $0x05, AX
 18516	JMP  memmove_long_emit_remainder_calcBlockSize
 18517
 18518four_bytes_emit_remainder_calcBlockSize:
 18519	ADDQ $0x04, AX
 18520	JMP  memmove_long_emit_remainder_calcBlockSize
 18521
 18522three_bytes_emit_remainder_calcBlockSize:
 18523	ADDQ $0x03, AX
 18524	JMP  memmove_long_emit_remainder_calcBlockSize
 18525
 18526two_bytes_emit_remainder_calcBlockSize:
 18527	ADDQ $0x02, AX
 18528	CMPL CX, $0x40
 18529	JB   memmove_emit_remainder_calcBlockSize
 18530	JMP  memmove_long_emit_remainder_calcBlockSize
 18531
 18532one_byte_emit_remainder_calcBlockSize:
 18533	ADDQ $0x01, AX
 18534
 18535memmove_emit_remainder_calcBlockSize:
 18536	LEAQ (AX)(SI*1), AX
 18537	JMP  emit_literal_done_emit_remainder_calcBlockSize
 18538
 18539memmove_long_emit_remainder_calcBlockSize:
 18540	LEAQ (AX)(SI*1), AX
 18541
 18542emit_literal_done_emit_remainder_calcBlockSize:
 18543	MOVQ AX, ret+24(FP)
 18544	RET
 18545
 18546// func calcBlockSizeSmall(src []byte) int
 18547// Requires: BMI, SSE2
 18548TEXT ·calcBlockSizeSmall(SB), $2072-32
 18549	XORQ AX, AX
 18550	MOVQ $0x00000010, CX
 18551	LEAQ 24(SP), DX
 18552	PXOR X0, X0
 18553
 18554zero_loop_calcBlockSizeSmall:
 18555	MOVOU X0, (DX)
 18556	MOVOU X0, 16(DX)
 18557	MOVOU X0, 32(DX)
 18558	MOVOU X0, 48(DX)
 18559	MOVOU X0, 64(DX)
 18560	MOVOU X0, 80(DX)
 18561	MOVOU X0, 96(DX)
 18562	MOVOU X0, 112(DX)
 18563	ADDQ  $0x80, DX
 18564	DECQ  CX
 18565	JNZ   zero_loop_calcBlockSizeSmall
 18566	MOVL  $0x00000000, 12(SP)
 18567	MOVQ  src_len+8(FP), CX
 18568	LEAQ  -9(CX), DX
 18569	LEAQ  -8(CX), BX
 18570	MOVL  BX, 8(SP)
 18571	SHRQ  $0x05, CX
 18572	SUBL  CX, DX
 18573	LEAQ  (AX)(DX*1), DX
 18574	MOVQ  DX, (SP)
 18575	MOVL  $0x00000001, CX
 18576	MOVL  CX, 16(SP)
 18577	MOVQ  src_base+0(FP), DX
 18578
 18579search_loop_calcBlockSizeSmall:
 18580	MOVL  CX, BX
 18581	SUBL  12(SP), BX
 18582	SHRL  $0x04, BX
 18583	LEAL  4(CX)(BX*1), BX
 18584	CMPL  BX, 8(SP)
 18585	JAE   emit_remainder_calcBlockSizeSmall
 18586	MOVQ  (DX)(CX*1), SI
 18587	MOVL  BX, 20(SP)
 18588	MOVQ  $0x9e3779b1, R8
 18589	MOVQ  SI, R9
 18590	MOVQ  SI, R10
 18591	SHRQ  $0x08, R10
 18592	SHLQ  $0x20, R9
 18593	IMULQ R8, R9
 18594	SHRQ  $0x37, R9
 18595	SHLQ  $0x20, R10
 18596	IMULQ R8, R10
 18597	SHRQ  $0x37, R10
 18598	MOVL  24(SP)(R9*4), BX
 18599	MOVL  24(SP)(R10*4), DI
 18600	MOVL  CX, 24(SP)(R9*4)
 18601	LEAL  1(CX), R9
 18602	MOVL  R9, 24(SP)(R10*4)
 18603	MOVQ  SI, R9
 18604	SHRQ  $0x10, R9
 18605	SHLQ  $0x20, R9
 18606	IMULQ R8, R9
 18607	SHRQ  $0x37, R9
 18608	MOVL  CX, R8
 18609	SUBL  16(SP), R8
 18610	MOVL  1(DX)(R8*1), R10
 18611	MOVQ  SI, R8
 18612	SHRQ  $0x08, R8
 18613	CMPL  R8, R10
 18614	JNE   no_repeat_found_calcBlockSizeSmall
 18615	LEAL  1(CX), SI
 18616	MOVL  12(SP), BX
 18617	MOVL  SI, DI
 18618	SUBL  16(SP), DI
 18619	JZ    repeat_extend_back_end_calcBlockSizeSmall
 18620
 18621repeat_extend_back_loop_calcBlockSizeSmall:
 18622	CMPL SI, BX
 18623	JBE  repeat_extend_back_end_calcBlockSizeSmall
 18624	MOVB -1(DX)(DI*1), R8
 18625	MOVB -1(DX)(SI*1), R9
 18626	CMPB R8, R9
 18627	JNE  repeat_extend_back_end_calcBlockSizeSmall
 18628	LEAL -1(SI), SI
 18629	DECL DI
 18630	JNZ  repeat_extend_back_loop_calcBlockSizeSmall
 18631
 18632repeat_extend_back_end_calcBlockSizeSmall:
 18633	MOVL SI, BX
 18634	SUBL 12(SP), BX
 18635	LEAQ 3(AX)(BX*1), BX
 18636	CMPQ BX, (SP)
 18637	JB   repeat_dst_size_check_calcBlockSizeSmall
 18638	MOVQ $0x00000000, ret+24(FP)
 18639	RET
 18640
 18641repeat_dst_size_check_calcBlockSizeSmall:
 18642	MOVL 12(SP), BX
 18643	CMPL BX, SI
 18644	JEQ  emit_literal_done_repeat_emit_calcBlockSizeSmall
 18645	MOVL SI, DI
 18646	MOVL SI, 12(SP)
 18647	LEAQ (DX)(BX*1), R8
 18648	SUBL BX, DI
 18649	LEAL -1(DI), BX
 18650	CMPL BX, $0x3c
 18651	JB   one_byte_repeat_emit_calcBlockSizeSmall
 18652	CMPL BX, $0x00000100
 18653	JB   two_bytes_repeat_emit_calcBlockSizeSmall
 18654	JB   three_bytes_repeat_emit_calcBlockSizeSmall
 18655
 18656three_bytes_repeat_emit_calcBlockSizeSmall:
 18657	ADDQ $0x03, AX
 18658	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
 18659
 18660two_bytes_repeat_emit_calcBlockSizeSmall:
 18661	ADDQ $0x02, AX
 18662	CMPL BX, $0x40
 18663	JB   memmove_repeat_emit_calcBlockSizeSmall
 18664	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
 18665
 18666one_byte_repeat_emit_calcBlockSizeSmall:
 18667	ADDQ $0x01, AX
 18668
 18669memmove_repeat_emit_calcBlockSizeSmall:
 18670	LEAQ (AX)(DI*1), AX
 18671	JMP  emit_literal_done_repeat_emit_calcBlockSizeSmall
 18672
 18673memmove_long_repeat_emit_calcBlockSizeSmall:
 18674	LEAQ (AX)(DI*1), AX
 18675
 18676emit_literal_done_repeat_emit_calcBlockSizeSmall:
 18677	ADDL $0x05, CX
 18678	MOVL CX, BX
 18679	SUBL 16(SP), BX
 18680	MOVQ src_len+8(FP), DI
 18681	SUBL CX, DI
 18682	LEAQ (DX)(CX*1), R8
 18683	LEAQ (DX)(BX*1), BX
 18684
 18685	// matchLen
 18686	XORL R10, R10
 18687
 18688matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
 18689	CMPL DI, $0x10
 18690	JB   matchlen_match8_repeat_extend_calcBlockSizeSmall
 18691	MOVQ (R8)(R10*1), R9
 18692	MOVQ 8(R8)(R10*1), R11
 18693	XORQ (BX)(R10*1), R9
 18694	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
 18695	XORQ 8(BX)(R10*1), R11
 18696	JNZ  matchlen_bsf_16repeat_extend_calcBlockSizeSmall
 18697	LEAL -16(DI), DI
 18698	LEAL 16(R10), R10
 18699	JMP  matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
 18700
 18701matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
 18702#ifdef GOAMD64_v3
 18703	TZCNTQ R11, R11
 18704
 18705#else
 18706	BSFQ R11, R11
 18707
 18708#endif
 18709	SARQ $0x03, R11
 18710	LEAL 8(R10)(R11*1), R10
 18711	JMP  repeat_extend_forward_end_calcBlockSizeSmall
 18712
 18713matchlen_match8_repeat_extend_calcBlockSizeSmall:
 18714	CMPL DI, $0x08
 18715	JB   matchlen_match4_repeat_extend_calcBlockSizeSmall
 18716	MOVQ (R8)(R10*1), R9
 18717	XORQ (BX)(R10*1), R9
 18718	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
 18719	LEAL -8(DI), DI
 18720	LEAL 8(R10), R10
 18721	JMP  matchlen_match4_repeat_extend_calcBlockSizeSmall
 18722
 18723matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
 18724#ifdef GOAMD64_v3
 18725	TZCNTQ R9, R9
 18726
 18727#else
 18728	BSFQ R9, R9
 18729
 18730#endif
 18731	SARQ $0x03, R9
 18732	LEAL (R10)(R9*1), R10
 18733	JMP  repeat_extend_forward_end_calcBlockSizeSmall
 18734
 18735matchlen_match4_repeat_extend_calcBlockSizeSmall:
 18736	CMPL DI, $0x04
 18737	JB   matchlen_match2_repeat_extend_calcBlockSizeSmall
 18738	MOVL (R8)(R10*1), R9
 18739	CMPL (BX)(R10*1), R9
 18740	JNE  matchlen_match2_repeat_extend_calcBlockSizeSmall
 18741	LEAL -4(DI), DI
 18742	LEAL 4(R10), R10
 18743
 18744matchlen_match2_repeat_extend_calcBlockSizeSmall:
 18745	CMPL DI, $0x01
 18746	JE   matchlen_match1_repeat_extend_calcBlockSizeSmall
 18747	JB   repeat_extend_forward_end_calcBlockSizeSmall
 18748	MOVW (R8)(R10*1), R9
 18749	CMPW (BX)(R10*1), R9
 18750	JNE  matchlen_match1_repeat_extend_calcBlockSizeSmall
 18751	LEAL 2(R10), R10
 18752	SUBL $0x02, DI
 18753	JZ   repeat_extend_forward_end_calcBlockSizeSmall
 18754
 18755matchlen_match1_repeat_extend_calcBlockSizeSmall:
 18756	MOVB (R8)(R10*1), R9
 18757	CMPB (BX)(R10*1), R9
 18758	JNE  repeat_extend_forward_end_calcBlockSizeSmall
 18759	LEAL 1(R10), R10
 18760
 18761repeat_extend_forward_end_calcBlockSizeSmall:
 18762	ADDL R10, CX
 18763	MOVL CX, BX
 18764	SUBL SI, BX
 18765	MOVL 16(SP), SI
 18766
 18767	// emitCopy
 18768two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
 18769	CMPL BX, $0x40
 18770	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
 18771	LEAL -60(BX), BX
 18772	ADDQ $0x03, AX
 18773	JMP  two_byte_offset_repeat_as_copy_calcBlockSizeSmall
 18774
 18775two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
 18776	MOVL BX, SI
 18777	SHLL $0x02, SI
 18778	CMPL BX, $0x0c
 18779	JAE  emit_copy_three_repeat_as_copy_calcBlockSizeSmall
 18780	ADDQ $0x02, AX
 18781	JMP  repeat_end_emit_calcBlockSizeSmall
 18782
 18783emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
 18784	ADDQ $0x03, AX
 18785
 18786repeat_end_emit_calcBlockSizeSmall:
 18787	MOVL CX, 12(SP)
 18788	JMP  search_loop_calcBlockSizeSmall
 18789
 18790no_repeat_found_calcBlockSizeSmall:
 18791	CMPL (DX)(BX*1), SI
 18792	JEQ  candidate_match_calcBlockSizeSmall
 18793	SHRQ $0x08, SI
 18794	MOVL 24(SP)(R9*4), BX
 18795	LEAL 2(CX), R8
 18796	CMPL (DX)(DI*1), SI
 18797	JEQ  candidate2_match_calcBlockSizeSmall
 18798	MOVL R8, 24(SP)(R9*4)
 18799	SHRQ $0x08, SI
 18800	CMPL (DX)(BX*1), SI
 18801	JEQ  candidate3_match_calcBlockSizeSmall
 18802	MOVL 20(SP), CX
 18803	JMP  search_loop_calcBlockSizeSmall
 18804
 18805candidate3_match_calcBlockSizeSmall:
 18806	ADDL $0x02, CX
 18807	JMP  candidate_match_calcBlockSizeSmall
 18808
 18809candidate2_match_calcBlockSizeSmall:
 18810	MOVL R8, 24(SP)(R9*4)
 18811	INCL CX
 18812	MOVL DI, BX
 18813
 18814candidate_match_calcBlockSizeSmall:
 18815	MOVL  12(SP), SI
 18816	TESTL BX, BX
 18817	JZ    match_extend_back_end_calcBlockSizeSmall
 18818
 18819match_extend_back_loop_calcBlockSizeSmall:
 18820	CMPL CX, SI
 18821	JBE  match_extend_back_end_calcBlockSizeSmall
 18822	MOVB -1(DX)(BX*1), DI
 18823	MOVB -1(DX)(CX*1), R8
 18824	CMPB DI, R8
 18825	JNE  match_extend_back_end_calcBlockSizeSmall
 18826	LEAL -1(CX), CX
 18827	DECL BX
 18828	JZ   match_extend_back_end_calcBlockSizeSmall
 18829	JMP  match_extend_back_loop_calcBlockSizeSmall
 18830
 18831match_extend_back_end_calcBlockSizeSmall:
 18832	MOVL CX, SI
 18833	SUBL 12(SP), SI
 18834	LEAQ 3(AX)(SI*1), SI
 18835	CMPQ SI, (SP)
 18836	JB   match_dst_size_check_calcBlockSizeSmall
 18837	MOVQ $0x00000000, ret+24(FP)
 18838	RET
 18839
 18840match_dst_size_check_calcBlockSizeSmall:
 18841	MOVL CX, SI
 18842	MOVL 12(SP), DI
 18843	CMPL DI, SI
 18844	JEQ  emit_literal_done_match_emit_calcBlockSizeSmall
 18845	MOVL SI, R8
 18846	MOVL SI, 12(SP)
 18847	LEAQ (DX)(DI*1), SI
 18848	SUBL DI, R8
 18849	LEAL -1(R8), SI
 18850	CMPL SI, $0x3c
 18851	JB   one_byte_match_emit_calcBlockSizeSmall
 18852	CMPL SI, $0x00000100
 18853	JB   two_bytes_match_emit_calcBlockSizeSmall
 18854	JB   three_bytes_match_emit_calcBlockSizeSmall
 18855
 18856three_bytes_match_emit_calcBlockSizeSmall:
 18857	ADDQ $0x03, AX
 18858	JMP  memmove_long_match_emit_calcBlockSizeSmall
 18859
 18860two_bytes_match_emit_calcBlockSizeSmall:
 18861	ADDQ $0x02, AX
 18862	CMPL SI, $0x40
 18863	JB   memmove_match_emit_calcBlockSizeSmall
 18864	JMP  memmove_long_match_emit_calcBlockSizeSmall
 18865
 18866one_byte_match_emit_calcBlockSizeSmall:
 18867	ADDQ $0x01, AX
 18868
 18869memmove_match_emit_calcBlockSizeSmall:
 18870	LEAQ (AX)(R8*1), AX
 18871	JMP  emit_literal_done_match_emit_calcBlockSizeSmall
 18872
 18873memmove_long_match_emit_calcBlockSizeSmall:
 18874	LEAQ (AX)(R8*1), AX
 18875
 18876emit_literal_done_match_emit_calcBlockSizeSmall:
 18877match_nolit_loop_calcBlockSizeSmall:
 18878	MOVL CX, SI
 18879	SUBL BX, SI
 18880	MOVL SI, 16(SP)
 18881	ADDL $0x04, CX
 18882	ADDL $0x04, BX
 18883	MOVQ src_len+8(FP), SI
 18884	SUBL CX, SI
 18885	LEAQ (DX)(CX*1), DI
 18886	LEAQ (DX)(BX*1), BX
 18887
 18888	// matchLen
 18889	XORL R9, R9
 18890
 18891matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
 18892	CMPL SI, $0x10
 18893	JB   matchlen_match8_match_nolit_calcBlockSizeSmall
 18894	MOVQ (DI)(R9*1), R8
 18895	MOVQ 8(DI)(R9*1), R10
 18896	XORQ (BX)(R9*1), R8
 18897	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
 18898	XORQ 8(BX)(R9*1), R10
 18899	JNZ  matchlen_bsf_16match_nolit_calcBlockSizeSmall
 18900	LEAL -16(SI), SI
 18901	LEAL 16(R9), R9
 18902	JMP  matchlen_loopback_16_match_nolit_calcBlockSizeSmall
 18903
 18904matchlen_bsf_16match_nolit_calcBlockSizeSmall:
 18905#ifdef GOAMD64_v3
 18906	TZCNTQ R10, R10
 18907
 18908#else
 18909	BSFQ R10, R10
 18910
 18911#endif
 18912	SARQ $0x03, R10
 18913	LEAL 8(R9)(R10*1), R9
 18914	JMP  match_nolit_end_calcBlockSizeSmall
 18915
 18916matchlen_match8_match_nolit_calcBlockSizeSmall:
 18917	CMPL SI, $0x08
 18918	JB   matchlen_match4_match_nolit_calcBlockSizeSmall
 18919	MOVQ (DI)(R9*1), R8
 18920	XORQ (BX)(R9*1), R8
 18921	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
 18922	LEAL -8(SI), SI
 18923	LEAL 8(R9), R9
 18924	JMP  matchlen_match4_match_nolit_calcBlockSizeSmall
 18925
 18926matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
 18927#ifdef GOAMD64_v3
 18928	TZCNTQ R8, R8
 18929
 18930#else
 18931	BSFQ R8, R8
 18932
 18933#endif
 18934	SARQ $0x03, R8
 18935	LEAL (R9)(R8*1), R9
 18936	JMP  match_nolit_end_calcBlockSizeSmall
 18937
 18938matchlen_match4_match_nolit_calcBlockSizeSmall:
 18939	CMPL SI, $0x04
 18940	JB   matchlen_match2_match_nolit_calcBlockSizeSmall
 18941	MOVL (DI)(R9*1), R8
 18942	CMPL (BX)(R9*1), R8
 18943	JNE  matchlen_match2_match_nolit_calcBlockSizeSmall
 18944	LEAL -4(SI), SI
 18945	LEAL 4(R9), R9
 18946
 18947matchlen_match2_match_nolit_calcBlockSizeSmall:
 18948	CMPL SI, $0x01
 18949	JE   matchlen_match1_match_nolit_calcBlockSizeSmall
 18950	JB   match_nolit_end_calcBlockSizeSmall
 18951	MOVW (DI)(R9*1), R8
 18952	CMPW (BX)(R9*1), R8
 18953	JNE  matchlen_match1_match_nolit_calcBlockSizeSmall
 18954	LEAL 2(R9), R9
 18955	SUBL $0x02, SI
 18956	JZ   match_nolit_end_calcBlockSizeSmall
 18957
 18958matchlen_match1_match_nolit_calcBlockSizeSmall:
 18959	MOVB (DI)(R9*1), R8
 18960	CMPB (BX)(R9*1), R8
 18961	JNE  match_nolit_end_calcBlockSizeSmall
 18962	LEAL 1(R9), R9
 18963
 18964match_nolit_end_calcBlockSizeSmall:
 18965	ADDL R9, CX
 18966	MOVL 16(SP), BX
 18967	ADDL $0x04, R9
 18968	MOVL CX, 12(SP)
 18969
 18970	// emitCopy
 18971two_byte_offset_match_nolit_calcBlockSizeSmall:
 18972	CMPL R9, $0x40
 18973	JBE  two_byte_offset_short_match_nolit_calcBlockSizeSmall
 18974	LEAL -60(R9), R9
 18975	ADDQ $0x03, AX
 18976	JMP  two_byte_offset_match_nolit_calcBlockSizeSmall
 18977
 18978two_byte_offset_short_match_nolit_calcBlockSizeSmall:
 18979	MOVL R9, BX
 18980	SHLL $0x02, BX
 18981	CMPL R9, $0x0c
 18982	JAE  emit_copy_three_match_nolit_calcBlockSizeSmall
 18983	ADDQ $0x02, AX
 18984	JMP  match_nolit_emitcopy_end_calcBlockSizeSmall
 18985
 18986emit_copy_three_match_nolit_calcBlockSizeSmall:
 18987	ADDQ $0x03, AX
 18988
 18989match_nolit_emitcopy_end_calcBlockSizeSmall:
 18990	CMPL CX, 8(SP)
 18991	JAE  emit_remainder_calcBlockSizeSmall
 18992	MOVQ -2(DX)(CX*1), SI
 18993	CMPQ AX, (SP)
 18994	JB   match_nolit_dst_ok_calcBlockSizeSmall
 18995	MOVQ $0x00000000, ret+24(FP)
 18996	RET
 18997
 18998match_nolit_dst_ok_calcBlockSizeSmall:
 18999	MOVQ  $0x9e3779b1, R8
 19000	MOVQ  SI, DI
 19001	SHRQ  $0x10, SI
 19002	MOVQ  SI, BX
 19003	SHLQ  $0x20, DI
 19004	IMULQ R8, DI
 19005	SHRQ  $0x37, DI
 19006	SHLQ  $0x20, BX
 19007	IMULQ R8, BX
 19008	SHRQ  $0x37, BX
 19009	LEAL  -2(CX), R8
 19010	LEAQ  24(SP)(BX*4), R9
 19011	MOVL  (R9), BX
 19012	MOVL  R8, 24(SP)(DI*4)
 19013	MOVL  CX, (R9)
 19014	CMPL  (DX)(BX*1), SI
 19015	JEQ   match_nolit_loop_calcBlockSizeSmall
 19016	INCL  CX
 19017	JMP   search_loop_calcBlockSizeSmall
 19018
 19019emit_remainder_calcBlockSizeSmall:
 19020	MOVQ src_len+8(FP), CX
 19021	SUBL 12(SP), CX
 19022	LEAQ 3(AX)(CX*1), CX
 19023	CMPQ CX, (SP)
 19024	JB   emit_remainder_ok_calcBlockSizeSmall
 19025	MOVQ $0x00000000, ret+24(FP)
 19026	RET
 19027
 19028emit_remainder_ok_calcBlockSizeSmall:
 19029	MOVQ src_len+8(FP), CX
 19030	MOVL 12(SP), BX
 19031	CMPL BX, CX
 19032	JEQ  emit_literal_done_emit_remainder_calcBlockSizeSmall
 19033	MOVL CX, SI
 19034	MOVL CX, 12(SP)
 19035	LEAQ (DX)(BX*1), CX
 19036	SUBL BX, SI
 19037	LEAL -1(SI), CX
 19038	CMPL CX, $0x3c
 19039	JB   one_byte_emit_remainder_calcBlockSizeSmall
 19040	CMPL CX, $0x00000100
 19041	JB   two_bytes_emit_remainder_calcBlockSizeSmall
 19042	JB   three_bytes_emit_remainder_calcBlockSizeSmall
 19043
 19044three_bytes_emit_remainder_calcBlockSizeSmall:
 19045	ADDQ $0x03, AX
 19046	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
 19047
 19048two_bytes_emit_remainder_calcBlockSizeSmall:
 19049	ADDQ $0x02, AX
 19050	CMPL CX, $0x40
 19051	JB   memmove_emit_remainder_calcBlockSizeSmall
 19052	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
 19053
 19054one_byte_emit_remainder_calcBlockSizeSmall:
 19055	ADDQ $0x01, AX
 19056
 19057memmove_emit_remainder_calcBlockSizeSmall:
 19058	LEAQ (AX)(SI*1), AX
 19059	JMP  emit_literal_done_emit_remainder_calcBlockSizeSmall
 19060
 19061memmove_long_emit_remainder_calcBlockSizeSmall:
 19062	LEAQ (AX)(SI*1), AX
 19063
 19064emit_literal_done_emit_remainder_calcBlockSizeSmall:
 19065	MOVQ AX, ret+24(FP)
 19066	RET
 19067
 19068// func emitLiteral(dst []byte, lit []byte) int
 19069// Requires: SSE2
 19070TEXT ·emitLiteral(SB), NOSPLIT, $0-56
 19071	MOVQ  lit_len+32(FP), DX
 19072	MOVQ  dst_base+0(FP), AX
 19073	MOVQ  lit_base+24(FP), CX
 19074	TESTQ DX, DX
 19075	JZ    emit_literal_end_standalone_skip
 19076	MOVL  DX, BX
 19077	LEAL  -1(DX), SI
 19078	CMPL  SI, $0x3c
 19079	JB    one_byte_standalone
 19080	CMPL  SI, $0x00000100
 19081	JB    two_bytes_standalone
 19082	CMPL  SI, $0x00010000
 19083	JB    three_bytes_standalone
 19084	CMPL  SI, $0x01000000
 19085	JB    four_bytes_standalone
 19086	MOVB  $0xfc, (AX)
 19087	MOVL  SI, 1(AX)
 19088	ADDQ  $0x05, BX
 19089	ADDQ  $0x05, AX
 19090	JMP   memmove_long_standalone
 19091
 19092four_bytes_standalone:
 19093	MOVL SI, DI
 19094	SHRL $0x10, DI
 19095	MOVB $0xf8, (AX)
 19096	MOVW SI, 1(AX)
 19097	MOVB DI, 3(AX)
 19098	ADDQ $0x04, BX
 19099	ADDQ $0x04, AX
 19100	JMP  memmove_long_standalone
 19101
 19102three_bytes_standalone:
 19103	MOVB $0xf4, (AX)
 19104	MOVW SI, 1(AX)
 19105	ADDQ $0x03, BX
 19106	ADDQ $0x03, AX
 19107	JMP  memmove_long_standalone
 19108
 19109two_bytes_standalone:
 19110	MOVB $0xf0, (AX)
 19111	MOVB SI, 1(AX)
 19112	ADDQ $0x02, BX
 19113	ADDQ $0x02, AX
 19114	CMPL SI, $0x40
 19115	JB   memmove_standalone
 19116	JMP  memmove_long_standalone
 19117
 19118one_byte_standalone:
 19119	SHLB $0x02, SI
 19120	MOVB SI, (AX)
 19121	ADDQ $0x01, BX
 19122	ADDQ $0x01, AX
 19123
 19124memmove_standalone:
 19125	// genMemMoveShort
 19126	CMPQ DX, $0x03
 19127	JB   emit_lit_memmove_standalone_memmove_move_1or2
 19128	JE   emit_lit_memmove_standalone_memmove_move_3
 19129	CMPQ DX, $0x08
 19130	JB   emit_lit_memmove_standalone_memmove_move_4through7
 19131	CMPQ DX, $0x10
 19132	JBE  emit_lit_memmove_standalone_memmove_move_8through16
 19133	CMPQ DX, $0x20
 19134	JBE  emit_lit_memmove_standalone_memmove_move_17through32
 19135	JMP  emit_lit_memmove_standalone_memmove_move_33through64
 19136
 19137emit_lit_memmove_standalone_memmove_move_1or2:
 19138	MOVB (CX), SI
 19139	MOVB -1(CX)(DX*1), CL
 19140	MOVB SI, (AX)
 19141	MOVB CL, -1(AX)(DX*1)
 19142	JMP  emit_literal_end_standalone
 19143
 19144emit_lit_memmove_standalone_memmove_move_3:
 19145	MOVW (CX), SI
 19146	MOVB 2(CX), CL
 19147	MOVW SI, (AX)
 19148	MOVB CL, 2(AX)
 19149	JMP  emit_literal_end_standalone
 19150
 19151emit_lit_memmove_standalone_memmove_move_4through7:
 19152	MOVL (CX), SI
 19153	MOVL -4(CX)(DX*1), CX
 19154	MOVL SI, (AX)
 19155	MOVL CX, -4(AX)(DX*1)
 19156	JMP  emit_literal_end_standalone
 19157
 19158emit_lit_memmove_standalone_memmove_move_8through16:
 19159	MOVQ (CX), SI
 19160	MOVQ -8(CX)(DX*1), CX
 19161	MOVQ SI, (AX)
 19162	MOVQ CX, -8(AX)(DX*1)
 19163	JMP  emit_literal_end_standalone
 19164
 19165emit_lit_memmove_standalone_memmove_move_17through32:
 19166	MOVOU (CX), X0
 19167	MOVOU -16(CX)(DX*1), X1
 19168	MOVOU X0, (AX)
 19169	MOVOU X1, -16(AX)(DX*1)
 19170	JMP   emit_literal_end_standalone
 19171
 19172emit_lit_memmove_standalone_memmove_move_33through64:
 19173	MOVOU (CX), X0
 19174	MOVOU 16(CX), X1
 19175	MOVOU -32(CX)(DX*1), X2
 19176	MOVOU -16(CX)(DX*1), X3
 19177	MOVOU X0, (AX)
 19178	MOVOU X1, 16(AX)
 19179	MOVOU X2, -32(AX)(DX*1)
 19180	MOVOU X3, -16(AX)(DX*1)
 19181	JMP   emit_literal_end_standalone
 19182	JMP emit_literal_end_standalone
 19183
 19184memmove_long_standalone:
 19185	// genMemMoveLong
 19186	MOVOU (CX), X0
 19187	MOVOU 16(CX), X1
 19188	MOVOU -32(CX)(DX*1), X2
 19189	MOVOU -16(CX)(DX*1), X3
 19190	MOVQ  DX, DI
 19191	SHRQ  $0x05, DI
 19192	MOVQ  AX, SI
 19193	ANDL  $0x0000001f, SI
 19194	MOVQ  $0x00000040, R8
 19195	SUBQ  SI, R8
 19196	DECQ  DI
 19197	JA    emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
 19198	LEAQ  -32(CX)(R8*1), SI
 19199	LEAQ  -32(AX)(R8*1), R9
 19200
 19201emit_lit_memmove_long_standalonelarge_big_loop_back:
 19202	MOVOU (SI), X4
 19203	MOVOU 16(SI), X5
 19204	MOVOA X4, (R9)
 19205	MOVOA X5, 16(R9)
 19206	ADDQ  $0x20, R9
 19207	ADDQ  $0x20, SI
 19208	ADDQ  $0x20, R8
 19209	DECQ  DI
 19210	JNA   emit_lit_memmove_long_standalonelarge_big_loop_back
 19211
 19212emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
 19213	MOVOU -32(CX)(R8*1), X4
 19214	MOVOU -16(CX)(R8*1), X5
 19215	MOVOA X4, -32(AX)(R8*1)
 19216	MOVOA X5, -16(AX)(R8*1)
 19217	ADDQ  $0x20, R8
 19218	CMPQ  DX, R8
 19219	JAE   emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
 19220	MOVOU X0, (AX)
 19221	MOVOU X1, 16(AX)
 19222	MOVOU X2, -32(AX)(DX*1)
 19223	MOVOU X3, -16(AX)(DX*1)
 19224	JMP   emit_literal_end_standalone
 19225	JMP emit_literal_end_standalone
 19226
 19227emit_literal_end_standalone_skip:
 19228	XORQ BX, BX
 19229
 19230emit_literal_end_standalone:
 19231	MOVQ BX, ret+48(FP)
 19232	RET
 19233
 19234// func emitRepeat(dst []byte, offset int, length int) int
 19235TEXT ·emitRepeat(SB), NOSPLIT, $0-48
 19236	XORQ BX, BX
 19237	MOVQ dst_base+0(FP), AX
 19238	MOVQ offset+24(FP), CX
 19239	MOVQ length+32(FP), DX
 19240
 19241	// emitRepeat
 19242emit_repeat_again_standalone:
 19243	MOVL DX, SI
 19244	LEAL -4(DX), DX
 19245	CMPL SI, $0x08
 19246	JBE  repeat_two_standalone
 19247	CMPL SI, $0x0c
 19248	JAE  cant_repeat_two_offset_standalone
 19249	CMPL CX, $0x00000800
 19250	JB   repeat_two_offset_standalone
 19251
 19252cant_repeat_two_offset_standalone:
 19253	CMPL DX, $0x00000104
 19254	JB   repeat_three_standalone
 19255	CMPL DX, $0x00010100
 19256	JB   repeat_four_standalone
 19257	CMPL DX, $0x0100ffff
 19258	JB   repeat_five_standalone
 19259	LEAL -16842747(DX), DX
 19260	MOVL $0xfffb001d, (AX)
 19261	MOVB $0xff, 4(AX)
 19262	ADDQ $0x05, AX
 19263	ADDQ $0x05, BX
 19264	JMP  emit_repeat_again_standalone
 19265
 19266repeat_five_standalone:
 19267	LEAL -65536(DX), DX
 19268	MOVL DX, CX
 19269	MOVW $0x001d, (AX)
 19270	MOVW DX, 2(AX)
 19271	SARL $0x10, CX
 19272	MOVB CL, 4(AX)
 19273	ADDQ $0x05, BX
 19274	ADDQ $0x05, AX
 19275	JMP  gen_emit_repeat_end
 19276
 19277repeat_four_standalone:
 19278	LEAL -256(DX), DX
 19279	MOVW $0x0019, (AX)
 19280	MOVW DX, 2(AX)
 19281	ADDQ $0x04, BX
 19282	ADDQ $0x04, AX
 19283	JMP  gen_emit_repeat_end
 19284
 19285repeat_three_standalone:
 19286	LEAL -4(DX), DX
 19287	MOVW $0x0015, (AX)
 19288	MOVB DL, 2(AX)
 19289	ADDQ $0x03, BX
 19290	ADDQ $0x03, AX
 19291	JMP  gen_emit_repeat_end
 19292
 19293repeat_two_standalone:
 19294	SHLL $0x02, DX
 19295	ORL  $0x01, DX
 19296	MOVW DX, (AX)
 19297	ADDQ $0x02, BX
 19298	ADDQ $0x02, AX
 19299	JMP  gen_emit_repeat_end
 19300
 19301repeat_two_offset_standalone:
 19302	XORQ SI, SI
 19303	LEAL 1(SI)(DX*4), DX
 19304	MOVB CL, 1(AX)
 19305	SARL $0x08, CX
 19306	SHLL $0x05, CX
 19307	ORL  CX, DX
 19308	MOVB DL, (AX)
 19309	ADDQ $0x02, BX
 19310	ADDQ $0x02, AX
 19311
 19312gen_emit_repeat_end:
 19313	MOVQ BX, ret+40(FP)
 19314	RET
 19315
 19316// func emitCopy(dst []byte, offset int, length int) int
 19317TEXT ·emitCopy(SB), NOSPLIT, $0-48
 19318	XORQ BX, BX
 19319	MOVQ dst_base+0(FP), AX
 19320	MOVQ offset+24(FP), CX
 19321	MOVQ length+32(FP), DX
 19322
 19323	// emitCopy
 19324	CMPL CX, $0x00010000
 19325	JB   two_byte_offset_standalone
 19326	CMPL DX, $0x40
 19327	JBE  four_bytes_remain_standalone
 19328	MOVB $0xff, (AX)
 19329	MOVL CX, 1(AX)
 19330	LEAL -64(DX), DX
 19331	ADDQ $0x05, BX
 19332	ADDQ $0x05, AX
 19333	CMPL DX, $0x04
 19334	JB   four_bytes_remain_standalone
 19335
 19336	// emitRepeat
 19337emit_repeat_again_standalone_emit_copy:
 19338	MOVL DX, SI
 19339	LEAL -4(DX), DX
 19340	CMPL SI, $0x08
 19341	JBE  repeat_two_standalone_emit_copy
 19342	CMPL SI, $0x0c
 19343	JAE  cant_repeat_two_offset_standalone_emit_copy
 19344	CMPL CX, $0x00000800
 19345	JB   repeat_two_offset_standalone_emit_copy
 19346
 19347cant_repeat_two_offset_standalone_emit_copy:
 19348	CMPL DX, $0x00000104
 19349	JB   repeat_three_standalone_emit_copy
 19350	CMPL DX, $0x00010100
 19351	JB   repeat_four_standalone_emit_copy
 19352	CMPL DX, $0x0100ffff
 19353	JB   repeat_five_standalone_emit_copy
 19354	LEAL -16842747(DX), DX
 19355	MOVL $0xfffb001d, (AX)
 19356	MOVB $0xff, 4(AX)
 19357	ADDQ $0x05, AX
 19358	ADDQ $0x05, BX
 19359	JMP  emit_repeat_again_standalone_emit_copy
 19360
 19361repeat_five_standalone_emit_copy:
 19362	LEAL -65536(DX), DX
 19363	MOVL DX, CX
 19364	MOVW $0x001d, (AX)
 19365	MOVW DX, 2(AX)
 19366	SARL $0x10, CX
 19367	MOVB CL, 4(AX)
 19368	ADDQ $0x05, BX
 19369	ADDQ $0x05, AX
 19370	JMP  gen_emit_copy_end
 19371
 19372repeat_four_standalone_emit_copy:
 19373	LEAL -256(DX), DX
 19374	MOVW $0x0019, (AX)
 19375	MOVW DX, 2(AX)
 19376	ADDQ $0x04, BX
 19377	ADDQ $0x04, AX
 19378	JMP  gen_emit_copy_end
 19379
 19380repeat_three_standalone_emit_copy:
 19381	LEAL -4(DX), DX
 19382	MOVW $0x0015, (AX)
 19383	MOVB DL, 2(AX)
 19384	ADDQ $0x03, BX
 19385	ADDQ $0x03, AX
 19386	JMP  gen_emit_copy_end
 19387
 19388repeat_two_standalone_emit_copy:
 19389	SHLL $0x02, DX
 19390	ORL  $0x01, DX
 19391	MOVW DX, (AX)
 19392	ADDQ $0x02, BX
 19393	ADDQ $0x02, AX
 19394	JMP  gen_emit_copy_end
 19395
 19396repeat_two_offset_standalone_emit_copy:
 19397	XORQ SI, SI
 19398	LEAL 1(SI)(DX*4), DX
 19399	MOVB CL, 1(AX)
 19400	SARL $0x08, CX
 19401	SHLL $0x05, CX
 19402	ORL  CX, DX
 19403	MOVB DL, (AX)
 19404	ADDQ $0x02, BX
 19405	ADDQ $0x02, AX
 19406	JMP  gen_emit_copy_end
 19407
 19408four_bytes_remain_standalone:
 19409	TESTL DX, DX
 19410	JZ    gen_emit_copy_end
 19411	XORL  SI, SI
 19412	LEAL  -1(SI)(DX*4), DX
 19413	MOVB  DL, (AX)
 19414	MOVL  CX, 1(AX)
 19415	ADDQ  $0x05, BX
 19416	ADDQ  $0x05, AX
 19417	JMP   gen_emit_copy_end
 19418
 19419two_byte_offset_standalone:
 19420	CMPL DX, $0x40
 19421	JBE  two_byte_offset_short_standalone
 19422	CMPL CX, $0x00000800
 19423	JAE  long_offset_short_standalone
 19424	MOVL $0x00000001, SI
 19425	LEAL 16(SI), SI
 19426	MOVB CL, 1(AX)
 19427	MOVL CX, DI
 19428	SHRL $0x08, DI
 19429	SHLL $0x05, DI
 19430	ORL  DI, SI
 19431	MOVB SI, (AX)
 19432	ADDQ $0x02, BX
 19433	ADDQ $0x02, AX
 19434	SUBL $0x08, DX
 19435
 19436	// emitRepeat
 19437	LEAL -4(DX), DX
 19438	JMP  cant_repeat_two_offset_standalone_emit_copy_short_2b
 19439
 19440emit_repeat_again_standalone_emit_copy_short_2b:
 19441	MOVL DX, SI
 19442	LEAL -4(DX), DX
 19443	CMPL SI, $0x08
 19444	JBE  repeat_two_standalone_emit_copy_short_2b
 19445	CMPL SI, $0x0c
 19446	JAE  cant_repeat_two_offset_standalone_emit_copy_short_2b
 19447	CMPL CX, $0x00000800
 19448	JB   repeat_two_offset_standalone_emit_copy_short_2b
 19449
 19450cant_repeat_two_offset_standalone_emit_copy_short_2b:
 19451	CMPL DX, $0x00000104
 19452	JB   repeat_three_standalone_emit_copy_short_2b
 19453	CMPL DX, $0x00010100
 19454	JB   repeat_four_standalone_emit_copy_short_2b
 19455	CMPL DX, $0x0100ffff
 19456	JB   repeat_five_standalone_emit_copy_short_2b
 19457	LEAL -16842747(DX), DX
 19458	MOVL $0xfffb001d, (AX)
 19459	MOVB $0xff, 4(AX)
 19460	ADDQ $0x05, AX
 19461	ADDQ $0x05, BX
 19462	JMP  emit_repeat_again_standalone_emit_copy_short_2b
 19463
 19464repeat_five_standalone_emit_copy_short_2b:
 19465	LEAL -65536(DX), DX
 19466	MOVL DX, CX
 19467	MOVW $0x001d, (AX)
 19468	MOVW DX, 2(AX)
 19469	SARL $0x10, CX
 19470	MOVB CL, 4(AX)
 19471	ADDQ $0x05, BX
 19472	ADDQ $0x05, AX
 19473	JMP  gen_emit_copy_end
 19474
 19475repeat_four_standalone_emit_copy_short_2b:
 19476	LEAL -256(DX), DX
 19477	MOVW $0x0019, (AX)
 19478	MOVW DX, 2(AX)
 19479	ADDQ $0x04, BX
 19480	ADDQ $0x04, AX
 19481	JMP  gen_emit_copy_end
 19482
 19483repeat_three_standalone_emit_copy_short_2b:
 19484	LEAL -4(DX), DX
 19485	MOVW $0x0015, (AX)
 19486	MOVB DL, 2(AX)
 19487	ADDQ $0x03, BX
 19488	ADDQ $0x03, AX
 19489	JMP  gen_emit_copy_end
 19490
 19491repeat_two_standalone_emit_copy_short_2b:
 19492	SHLL $0x02, DX
 19493	ORL  $0x01, DX
 19494	MOVW DX, (AX)
 19495	ADDQ $0x02, BX
 19496	ADDQ $0x02, AX
 19497	JMP  gen_emit_copy_end
 19498
 19499repeat_two_offset_standalone_emit_copy_short_2b:
 19500	XORQ SI, SI
 19501	LEAL 1(SI)(DX*4), DX
 19502	MOVB CL, 1(AX)
 19503	SARL $0x08, CX
 19504	SHLL $0x05, CX
 19505	ORL  CX, DX
 19506	MOVB DL, (AX)
 19507	ADDQ $0x02, BX
 19508	ADDQ $0x02, AX
 19509	JMP  gen_emit_copy_end
 19510
 19511long_offset_short_standalone:
 19512	MOVB $0xee, (AX)
 19513	MOVW CX, 1(AX)
 19514	LEAL -60(DX), DX
 19515	ADDQ $0x03, AX
 19516	ADDQ $0x03, BX
 19517
 19518	// emitRepeat
 19519emit_repeat_again_standalone_emit_copy_short:
 19520	MOVL DX, SI
 19521	LEAL -4(DX), DX
 19522	CMPL SI, $0x08
 19523	JBE  repeat_two_standalone_emit_copy_short
 19524	CMPL SI, $0x0c
 19525	JAE  cant_repeat_two_offset_standalone_emit_copy_short
 19526	CMPL CX, $0x00000800
 19527	JB   repeat_two_offset_standalone_emit_copy_short
 19528
 19529cant_repeat_two_offset_standalone_emit_copy_short:
 19530	CMPL DX, $0x00000104
 19531	JB   repeat_three_standalone_emit_copy_short
 19532	CMPL DX, $0x00010100
 19533	JB   repeat_four_standalone_emit_copy_short
 19534	CMPL DX, $0x0100ffff
 19535	JB   repeat_five_standalone_emit_copy_short
 19536	LEAL -16842747(DX), DX
 19537	MOVL $0xfffb001d, (AX)
 19538	MOVB $0xff, 4(AX)
 19539	ADDQ $0x05, AX
 19540	ADDQ $0x05, BX
 19541	JMP  emit_repeat_again_standalone_emit_copy_short
 19542
 19543repeat_five_standalone_emit_copy_short:
 19544	LEAL -65536(DX), DX
 19545	MOVL DX, CX
 19546	MOVW $0x001d, (AX)
 19547	MOVW DX, 2(AX)
 19548	SARL $0x10, CX
 19549	MOVB CL, 4(AX)
 19550	ADDQ $0x05, BX
 19551	ADDQ $0x05, AX
 19552	JMP  gen_emit_copy_end
 19553
 19554repeat_four_standalone_emit_copy_short:
 19555	LEAL -256(DX), DX
 19556	MOVW $0x0019, (AX)
 19557	MOVW DX, 2(AX)
 19558	ADDQ $0x04, BX
 19559	ADDQ $0x04, AX
 19560	JMP  gen_emit_copy_end
 19561
 19562repeat_three_standalone_emit_copy_short:
 19563	LEAL -4(DX), DX
 19564	MOVW $0x0015, (AX)
 19565	MOVB DL, 2(AX)
 19566	ADDQ $0x03, BX
 19567	ADDQ $0x03, AX
 19568	JMP  gen_emit_copy_end
 19569
 19570repeat_two_standalone_emit_copy_short:
 19571	SHLL $0x02, DX
 19572	ORL  $0x01, DX
 19573	MOVW DX, (AX)
 19574	ADDQ $0x02, BX
 19575	ADDQ $0x02, AX
 19576	JMP  gen_emit_copy_end
 19577
 19578repeat_two_offset_standalone_emit_copy_short:
 19579	XORQ SI, SI
 19580	LEAL 1(SI)(DX*4), DX
 19581	MOVB CL, 1(AX)
 19582	SARL $0x08, CX
 19583	SHLL $0x05, CX
 19584	ORL  CX, DX
 19585	MOVB DL, (AX)
 19586	ADDQ $0x02, BX
 19587	ADDQ $0x02, AX
 19588	JMP  gen_emit_copy_end
 19589
 19590two_byte_offset_short_standalone:
 19591	MOVL DX, SI
 19592	SHLL $0x02, SI
 19593	CMPL DX, $0x0c
 19594	JAE  emit_copy_three_standalone
 19595	CMPL CX, $0x00000800
 19596	JAE  emit_copy_three_standalone
 19597	LEAL -15(SI), SI
 19598	MOVB CL, 1(AX)
 19599	SHRL $0x08, CX
 19600	SHLL $0x05, CX
 19601	ORL  CX, SI
 19602	MOVB SI, (AX)
 19603	ADDQ $0x02, BX
 19604	ADDQ $0x02, AX
 19605	JMP  gen_emit_copy_end
 19606
 19607emit_copy_three_standalone:
 19608	LEAL -2(SI), SI
 19609	MOVB SI, (AX)
 19610	MOVW CX, 1(AX)
 19611	ADDQ $0x03, BX
 19612	ADDQ $0x03, AX
 19613
 19614gen_emit_copy_end:
 19615	MOVQ BX, ret+40(FP)
 19616	RET
 19617
 19618// func emitCopyNoRepeat(dst []byte, offset int, length int) int
 19619TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
 19620	XORQ BX, BX
 19621	MOVQ dst_base+0(FP), AX
 19622	MOVQ offset+24(FP), CX
 19623	MOVQ length+32(FP), DX
 19624
 19625	// emitCopy
 19626	CMPL CX, $0x00010000
 19627	JB   two_byte_offset_standalone_snappy
 19628
 19629four_bytes_loop_back_standalone_snappy:
 19630	CMPL DX, $0x40
 19631	JBE  four_bytes_remain_standalone_snappy
 19632	MOVB $0xff, (AX)
 19633	MOVL CX, 1(AX)
 19634	LEAL -64(DX), DX
 19635	ADDQ $0x05, BX
 19636	ADDQ $0x05, AX
 19637	CMPL DX, $0x04
 19638	JB   four_bytes_remain_standalone_snappy
 19639	JMP  four_bytes_loop_back_standalone_snappy
 19640
 19641four_bytes_remain_standalone_snappy:
 19642	TESTL DX, DX
 19643	JZ    gen_emit_copy_end_snappy
 19644	XORL  SI, SI
 19645	LEAL  -1(SI)(DX*4), DX
 19646	MOVB  DL, (AX)
 19647	MOVL  CX, 1(AX)
 19648	ADDQ  $0x05, BX
 19649	ADDQ  $0x05, AX
 19650	JMP   gen_emit_copy_end_snappy
 19651
 19652two_byte_offset_standalone_snappy:
 19653	CMPL DX, $0x40
 19654	JBE  two_byte_offset_short_standalone_snappy
 19655	MOVB $0xee, (AX)
 19656	MOVW CX, 1(AX)
 19657	LEAL -60(DX), DX
 19658	ADDQ $0x03, AX
 19659	ADDQ $0x03, BX
 19660	JMP  two_byte_offset_standalone_snappy
 19661
 19662two_byte_offset_short_standalone_snappy:
 19663	MOVL DX, SI
 19664	SHLL $0x02, SI
 19665	CMPL DX, $0x0c
 19666	JAE  emit_copy_three_standalone_snappy
 19667	CMPL CX, $0x00000800
 19668	JAE  emit_copy_three_standalone_snappy
 19669	LEAL -15(SI), SI
 19670	MOVB CL, 1(AX)
 19671	SHRL $0x08, CX
 19672	SHLL $0x05, CX
 19673	ORL  CX, SI
 19674	MOVB SI, (AX)
 19675	ADDQ $0x02, BX
 19676	ADDQ $0x02, AX
 19677	JMP  gen_emit_copy_end_snappy
 19678
 19679emit_copy_three_standalone_snappy:
 19680	LEAL -2(SI), SI
 19681	MOVB SI, (AX)
 19682	MOVW CX, 1(AX)
 19683	ADDQ $0x03, BX
 19684	ADDQ $0x03, AX
 19685
 19686gen_emit_copy_end_snappy:
 19687	MOVQ BX, ret+40(FP)
 19688	RET
 19689
 19690// func matchLen(a []byte, b []byte) int
 19691// Requires: BMI
 19692TEXT ·matchLen(SB), NOSPLIT, $0-56
 19693	MOVQ a_base+0(FP), AX
 19694	MOVQ b_base+24(FP), CX
 19695	MOVQ a_len+8(FP), DX
 19696
 19697	// matchLen
 19698	XORL SI, SI
 19699
 19700matchlen_loopback_16_standalone:
 19701	CMPL DX, $0x10
 19702	JB   matchlen_match8_standalone
 19703	MOVQ (AX)(SI*1), BX
 19704	MOVQ 8(AX)(SI*1), DI
 19705	XORQ (CX)(SI*1), BX
 19706	JNZ  matchlen_bsf_8_standalone
 19707	XORQ 8(CX)(SI*1), DI
 19708	JNZ  matchlen_bsf_16standalone
 19709	LEAL -16(DX), DX
 19710	LEAL 16(SI), SI
 19711	JMP  matchlen_loopback_16_standalone
 19712
 19713matchlen_bsf_16standalone:
 19714#ifdef GOAMD64_v3
 19715	TZCNTQ DI, DI
 19716
 19717#else
 19718	BSFQ DI, DI
 19719
 19720#endif
 19721	SARQ $0x03, DI
 19722	LEAL 8(SI)(DI*1), SI
 19723	JMP  gen_match_len_end
 19724
 19725matchlen_match8_standalone:
 19726	CMPL DX, $0x08
 19727	JB   matchlen_match4_standalone
 19728	MOVQ (AX)(SI*1), BX
 19729	XORQ (CX)(SI*1), BX
 19730	JNZ  matchlen_bsf_8_standalone
 19731	LEAL -8(DX), DX
 19732	LEAL 8(SI), SI
 19733	JMP  matchlen_match4_standalone
 19734
 19735matchlen_bsf_8_standalone:
 19736#ifdef GOAMD64_v3
 19737	TZCNTQ BX, BX
 19738
 19739#else
 19740	BSFQ BX, BX
 19741
 19742#endif
 19743	SARQ $0x03, BX
 19744	LEAL (SI)(BX*1), SI
 19745	JMP  gen_match_len_end
 19746
 19747matchlen_match4_standalone:
 19748	CMPL DX, $0x04
 19749	JB   matchlen_match2_standalone
 19750	MOVL (AX)(SI*1), BX
 19751	CMPL (CX)(SI*1), BX
 19752	JNE  matchlen_match2_standalone
 19753	LEAL -4(DX), DX
 19754	LEAL 4(SI), SI
 19755
 19756matchlen_match2_standalone:
 19757	CMPL DX, $0x01
 19758	JE   matchlen_match1_standalone
 19759	JB   gen_match_len_end
 19760	MOVW (AX)(SI*1), BX
 19761	CMPW (CX)(SI*1), BX
 19762	JNE  matchlen_match1_standalone
 19763	LEAL 2(SI), SI
 19764	SUBL $0x02, DX
 19765	JZ   gen_match_len_end
 19766
 19767matchlen_match1_standalone:
 19768	MOVB (AX)(SI*1), BL
 19769	CMPB (CX)(SI*1), BL
 19770	JNE  gen_match_len_end
 19771	LEAL 1(SI), SI
 19772
 19773gen_match_len_end:
 19774	MOVQ SI, ret+48(FP)
 19775	RET
 19776
 19777// func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 19778// Requires: SSE2
 19779TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
 19780	XORQ SI, SI
 19781	MOVQ dst_base+0(FP), AX
 19782	MOVQ dst_len+8(FP), CX
 19783	MOVQ src_base+24(FP), DX
 19784	MOVQ src_len+32(FP), BX
 19785	LEAQ (DX)(BX*1), BX
 19786	LEAQ -10(AX)(CX*1), CX
 19787	XORQ DI, DI
 19788
 19789lz4_s2_loop:
 19790	CMPQ    DX, BX
 19791	JAE     lz4_s2_corrupt
 19792	CMPQ    AX, CX
 19793	JAE     lz4_s2_dstfull
 19794	MOVBQZX (DX), R8
 19795	MOVQ    R8, R9
 19796	MOVQ    R8, R10
 19797	SHRQ    $0x04, R9
 19798	ANDQ    $0x0f, R10
 19799	CMPQ    R8, $0xf0
 19800	JB      lz4_s2_ll_end
 19801
 19802lz4_s2_ll_loop:
 19803	INCQ    DX
 19804	CMPQ    DX, BX
 19805	JAE     lz4_s2_corrupt
 19806	MOVBQZX (DX), R8
 19807	ADDQ    R8, R9
 19808	CMPQ    R8, $0xff
 19809	JEQ     lz4_s2_ll_loop
 19810
 19811lz4_s2_ll_end:
 19812	LEAQ  (DX)(R9*1), R8
 19813	ADDQ  $0x04, R10
 19814	CMPQ  R8, BX
 19815	JAE   lz4_s2_corrupt
 19816	INCQ  DX
 19817	INCQ  R8
 19818	TESTQ R9, R9
 19819	JZ    lz4_s2_lits_done
 19820	LEAQ  (AX)(R9*1), R11
 19821	CMPQ  R11, CX
 19822	JAE   lz4_s2_dstfull
 19823	ADDQ  R9, SI
 19824	LEAL  -1(R9), R11
 19825	CMPL  R11, $0x3c
 19826	JB    one_byte_lz4_s2
 19827	CMPL  R11, $0x00000100
 19828	JB    two_bytes_lz4_s2
 19829	CMPL  R11, $0x00010000
 19830	JB    three_bytes_lz4_s2
 19831	CMPL  R11, $0x01000000
 19832	JB    four_bytes_lz4_s2
 19833	MOVB  $0xfc, (AX)
 19834	MOVL  R11, 1(AX)
 19835	ADDQ  $0x05, AX
 19836	JMP   memmove_long_lz4_s2
 19837
 19838four_bytes_lz4_s2:
 19839	MOVL R11, R12
 19840	SHRL $0x10, R12
 19841	MOVB $0xf8, (AX)
 19842	MOVW R11, 1(AX)
 19843	MOVB R12, 3(AX)
 19844	ADDQ $0x04, AX
 19845	JMP  memmove_long_lz4_s2
 19846
 19847three_bytes_lz4_s2:
 19848	MOVB $0xf4, (AX)
 19849	MOVW R11, 1(AX)
 19850	ADDQ $0x03, AX
 19851	JMP  memmove_long_lz4_s2
 19852
 19853two_bytes_lz4_s2:
 19854	MOVB $0xf0, (AX)
 19855	MOVB R11, 1(AX)
 19856	ADDQ $0x02, AX
 19857	CMPL R11, $0x40
 19858	JB   memmove_lz4_s2
 19859	JMP  memmove_long_lz4_s2
 19860
 19861one_byte_lz4_s2:
 19862	SHLB $0x02, R11
 19863	MOVB R11, (AX)
 19864	ADDQ $0x01, AX
 19865
 19866memmove_lz4_s2:
 19867	LEAQ (AX)(R9*1), R11
 19868
 19869	// genMemMoveShort
 19870	CMPQ R9, $0x08
 19871	JBE  emit_lit_memmove_lz4_s2_memmove_move_8
 19872	CMPQ R9, $0x10
 19873	JBE  emit_lit_memmove_lz4_s2_memmove_move_8through16
 19874	CMPQ R9, $0x20
 19875	JBE  emit_lit_memmove_lz4_s2_memmove_move_17through32
 19876	JMP  emit_lit_memmove_lz4_s2_memmove_move_33through64
 19877
 19878emit_lit_memmove_lz4_s2_memmove_move_8:
 19879	MOVQ (DX), R12
 19880	MOVQ R12, (AX)
 19881	JMP  memmove_end_copy_lz4_s2
 19882
 19883emit_lit_memmove_lz4_s2_memmove_move_8through16:
 19884	MOVQ (DX), R12
 19885	MOVQ -8(DX)(R9*1), DX
 19886	MOVQ R12, (AX)
 19887	MOVQ DX, -8(AX)(R9*1)
 19888	JMP  memmove_end_copy_lz4_s2
 19889
 19890emit_lit_memmove_lz4_s2_memmove_move_17through32:
 19891	MOVOU (DX), X0
 19892	MOVOU -16(DX)(R9*1), X1
 19893	MOVOU X0, (AX)
 19894	MOVOU X1, -16(AX)(R9*1)
 19895	JMP   memmove_end_copy_lz4_s2
 19896
 19897emit_lit_memmove_lz4_s2_memmove_move_33through64:
 19898	MOVOU (DX), X0
 19899	MOVOU 16(DX), X1
 19900	MOVOU -32(DX)(R9*1), X2
 19901	MOVOU -16(DX)(R9*1), X3
 19902	MOVOU X0, (AX)
 19903	MOVOU X1, 16(AX)
 19904	MOVOU X2, -32(AX)(R9*1)
 19905	MOVOU X3, -16(AX)(R9*1)
 19906
 19907memmove_end_copy_lz4_s2:
 19908	MOVQ R11, AX
 19909	JMP  lz4_s2_lits_emit_done
 19910
 19911memmove_long_lz4_s2:
 19912	LEAQ (AX)(R9*1), R11
 19913
 19914	// genMemMoveLong
 19915	MOVOU (DX), X0
 19916	MOVOU 16(DX), X1
 19917	MOVOU -32(DX)(R9*1), X2
 19918	MOVOU -16(DX)(R9*1), X3
 19919	MOVQ  R9, R13
 19920	SHRQ  $0x05, R13
 19921	MOVQ  AX, R12
 19922	ANDL  $0x0000001f, R12
 19923	MOVQ  $0x00000040, R14
 19924	SUBQ  R12, R14
 19925	DECQ  R13
 19926	JA    emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
 19927	LEAQ  -32(DX)(R14*1), R12
 19928	LEAQ  -32(AX)(R14*1), R15
 19929
 19930emit_lit_memmove_long_lz4_s2large_big_loop_back:
 19931	MOVOU (R12), X4
 19932	MOVOU 16(R12), X5
 19933	MOVOA X4, (R15)
 19934	MOVOA X5, 16(R15)
 19935	ADDQ  $0x20, R15
 19936	ADDQ  $0x20, R12
 19937	ADDQ  $0x20, R14
 19938	DECQ  R13
 19939	JNA   emit_lit_memmove_long_lz4_s2large_big_loop_back
 19940
 19941emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32:
 19942	MOVOU -32(DX)(R14*1), X4
 19943	MOVOU -16(DX)(R14*1), X5
 19944	MOVOA X4, -32(AX)(R14*1)
 19945	MOVOA X5, -16(AX)(R14*1)
 19946	ADDQ  $0x20, R14
 19947	CMPQ  R9, R14
 19948	JAE   emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
 19949	MOVOU X0, (AX)
 19950	MOVOU X1, 16(AX)
 19951	MOVOU X2, -32(AX)(R9*1)
 19952	MOVOU X3, -16(AX)(R9*1)
 19953	MOVQ  R11, AX
 19954
 19955lz4_s2_lits_emit_done:
 19956	MOVQ R8, DX
 19957
 19958lz4_s2_lits_done:
 19959	CMPQ DX, BX
 19960	JNE  lz4_s2_match
 19961	CMPQ R10, $0x04
 19962	JEQ  lz4_s2_done
 19963	JMP  lz4_s2_corrupt
 19964
 19965lz4_s2_match:
 19966	LEAQ    2(DX), R8
 19967	CMPQ    R8, BX
 19968	JAE     lz4_s2_corrupt
 19969	MOVWQZX (DX), R9
 19970	MOVQ    R8, DX
 19971	TESTQ   R9, R9
 19972	JZ      lz4_s2_corrupt
 19973	CMPQ    R9, SI
 19974	JA      lz4_s2_corrupt
 19975	CMPQ    R10, $0x13
 19976	JNE     lz4_s2_ml_done
 19977
 19978lz4_s2_ml_loop:
 19979	MOVBQZX (DX), R8
 19980	INCQ    DX
 19981	ADDQ    R8, R10
 19982	CMPQ    DX, BX
 19983	JAE     lz4_s2_corrupt
 19984	CMPQ    R8, $0xff
 19985	JEQ     lz4_s2_ml_loop
 19986
 19987lz4_s2_ml_done:
 19988	ADDQ R10, SI
 19989	CMPQ R9, DI
 19990	JNE  lz4_s2_docopy
 19991
 19992	// emitRepeat
 19993emit_repeat_again_lz4_s2:
 19994	MOVL R10, R8
 19995	LEAL -4(R10), R10
 19996	CMPL R8, $0x08
 19997	JBE  repeat_two_lz4_s2
 19998	CMPL R8, $0x0c
 19999	JAE  cant_repeat_two_offset_lz4_s2
 20000	CMPL R9, $0x00000800
 20001	JB   repeat_two_offset_lz4_s2
 20002
 20003cant_repeat_two_offset_lz4_s2:
 20004	CMPL R10, $0x00000104
 20005	JB   repeat_three_lz4_s2
 20006	CMPL R10, $0x00010100
 20007	JB   repeat_four_lz4_s2
 20008	CMPL R10, $0x0100ffff
 20009	JB   repeat_five_lz4_s2
 20010	LEAL -16842747(R10), R10
 20011	MOVL $0xfffb001d, (AX)
 20012	MOVB $0xff, 4(AX)
 20013	ADDQ $0x05, AX
 20014	JMP  emit_repeat_again_lz4_s2
 20015
 20016repeat_five_lz4_s2:
 20017	LEAL -65536(R10), R10
 20018	MOVL R10, R9
 20019	MOVW $0x001d, (AX)
 20020	MOVW R10, 2(AX)
 20021	SARL $0x10, R9
 20022	MOVB R9, 4(AX)
 20023	ADDQ $0x05, AX
 20024	JMP  lz4_s2_loop
 20025
 20026repeat_four_lz4_s2:
 20027	LEAL -256(R10), R10
 20028	MOVW $0x0019, (AX)
 20029	MOVW R10, 2(AX)
 20030	ADDQ $0x04, AX
 20031	JMP  lz4_s2_loop
 20032
 20033repeat_three_lz4_s2:
 20034	LEAL -4(R10), R10
 20035	MOVW $0x0015, (AX)
 20036	MOVB R10, 2(AX)
 20037	ADDQ $0x03, AX
 20038	JMP  lz4_s2_loop
 20039
 20040repeat_two_lz4_s2:
 20041	SHLL $0x02, R10
 20042	ORL  $0x01, R10
 20043	MOVW R10, (AX)
 20044	ADDQ $0x02, AX
 20045	JMP  lz4_s2_loop
 20046
 20047repeat_two_offset_lz4_s2:
 20048	XORQ R8, R8
 20049	LEAL 1(R8)(R10*4), R10
 20050	MOVB R9, 1(AX)
 20051	SARL $0x08, R9
 20052	SHLL $0x05, R9
 20053	ORL  R9, R10
 20054	MOVB R10, (AX)
 20055	ADDQ $0x02, AX
 20056	JMP  lz4_s2_loop
 20057
 20058lz4_s2_docopy:
 20059	MOVQ R9, DI
 20060
 20061	// emitCopy
 20062	CMPL R10, $0x40
 20063	JBE  two_byte_offset_short_lz4_s2
 20064	CMPL R9, $0x00000800
 20065	JAE  long_offset_short_lz4_s2
 20066	MOVL $0x00000001, R8
 20067	LEAL 16(R8), R8
 20068	MOVB R9, 1(AX)
 20069	MOVL R9, R11
 20070	SHRL $0x08, R11
 20071	SHLL $0x05, R11
 20072	ORL  R11, R8
 20073	MOVB R8, (AX)
 20074	ADDQ $0x02, AX
 20075	SUBL $0x08, R10
 20076
 20077	// emitRepeat
 20078	LEAL -4(R10), R10
 20079	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 20080
 20081emit_repeat_again_lz4_s2_emit_copy_short_2b:
 20082	MOVL R10, R8
 20083	LEAL -4(R10), R10
 20084	CMPL R8, $0x08
 20085	JBE  repeat_two_lz4_s2_emit_copy_short_2b
 20086	CMPL R8, $0x0c
 20087	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 20088	CMPL R9, $0x00000800
 20089	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
 20090
 20091cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
 20092	CMPL R10, $0x00000104
 20093	JB   repeat_three_lz4_s2_emit_copy_short_2b
 20094	CMPL R10, $0x00010100
 20095	JB   repeat_four_lz4_s2_emit_copy_short_2b
 20096	CMPL R10, $0x0100ffff
 20097	JB   repeat_five_lz4_s2_emit_copy_short_2b
 20098	LEAL -16842747(R10), R10
 20099	MOVL $0xfffb001d, (AX)
 20100	MOVB $0xff, 4(AX)
 20101	ADDQ $0x05, AX
 20102	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
 20103
 20104repeat_five_lz4_s2_emit_copy_short_2b:
 20105	LEAL -65536(R10), R10
 20106	MOVL R10, R9
 20107	MOVW $0x001d, (AX)
 20108	MOVW R10, 2(AX)
 20109	SARL $0x10, R9
 20110	MOVB R9, 4(AX)
 20111	ADDQ $0x05, AX
 20112	JMP  lz4_s2_loop
 20113
 20114repeat_four_lz4_s2_emit_copy_short_2b:
 20115	LEAL -256(R10), R10
 20116	MOVW $0x0019, (AX)
 20117	MOVW R10, 2(AX)
 20118	ADDQ $0x04, AX
 20119	JMP  lz4_s2_loop
 20120
 20121repeat_three_lz4_s2_emit_copy_short_2b:
 20122	LEAL -4(R10), R10
 20123	MOVW $0x0015, (AX)
 20124	MOVB R10, 2(AX)
 20125	ADDQ $0x03, AX
 20126	JMP  lz4_s2_loop
 20127
 20128repeat_two_lz4_s2_emit_copy_short_2b:
 20129	SHLL $0x02, R10
 20130	ORL  $0x01, R10
 20131	MOVW R10, (AX)
 20132	ADDQ $0x02, AX
 20133	JMP  lz4_s2_loop
 20134
 20135repeat_two_offset_lz4_s2_emit_copy_short_2b:
 20136	XORQ R8, R8
 20137	LEAL 1(R8)(R10*4), R10
 20138	MOVB R9, 1(AX)
 20139	SARL $0x08, R9
 20140	SHLL $0x05, R9
 20141	ORL  R9, R10
 20142	MOVB R10, (AX)
 20143	ADDQ $0x02, AX
 20144	JMP  lz4_s2_loop
 20145
 20146long_offset_short_lz4_s2:
 20147	MOVB $0xee, (AX)
 20148	MOVW R9, 1(AX)
 20149	LEAL -60(R10), R10
 20150	ADDQ $0x03, AX
 20151
 20152	// emitRepeat
 20153emit_repeat_again_lz4_s2_emit_copy_short:
 20154	MOVL R10, R8
 20155	LEAL -4(R10), R10
 20156	CMPL R8, $0x08
 20157	JBE  repeat_two_lz4_s2_emit_copy_short
 20158	CMPL R8, $0x0c
 20159	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
 20160	CMPL R9, $0x00000800
 20161	JB   repeat_two_offset_lz4_s2_emit_copy_short
 20162
 20163cant_repeat_two_offset_lz4_s2_emit_copy_short:
 20164	CMPL R10, $0x00000104
 20165	JB   repeat_three_lz4_s2_emit_copy_short
 20166	CMPL R10, $0x00010100
 20167	JB   repeat_four_lz4_s2_emit_copy_short
 20168	CMPL R10, $0x0100ffff
 20169	JB   repeat_five_lz4_s2_emit_copy_short
 20170	LEAL -16842747(R10), R10
 20171	MOVL $0xfffb001d, (AX)
 20172	MOVB $0xff, 4(AX)
 20173	ADDQ $0x05, AX
 20174	JMP  emit_repeat_again_lz4_s2_emit_copy_short
 20175
 20176repeat_five_lz4_s2_emit_copy_short:
 20177	LEAL -65536(R10), R10
 20178	MOVL R10, R9
 20179	MOVW $0x001d, (AX)
 20180	MOVW R10, 2(AX)
 20181	SARL $0x10, R9
 20182	MOVB R9, 4(AX)
 20183	ADDQ $0x05, AX
 20184	JMP  lz4_s2_loop
 20185
 20186repeat_four_lz4_s2_emit_copy_short:
 20187	LEAL -256(R10), R10
 20188	MOVW $0x0019, (AX)
 20189	MOVW R10, 2(AX)
 20190	ADDQ $0x04, AX
 20191	JMP  lz4_s2_loop
 20192
 20193repeat_three_lz4_s2_emit_copy_short:
 20194	LEAL -4(R10), R10
 20195	MOVW $0x0015, (AX)
 20196	MOVB R10, 2(AX)
 20197	ADDQ $0x03, AX
 20198	JMP  lz4_s2_loop
 20199
 20200repeat_two_lz4_s2_emit_copy_short:
 20201	SHLL $0x02, R10
 20202	ORL  $0x01, R10
 20203	MOVW R10, (AX)
 20204	ADDQ $0x02, AX
 20205	JMP  lz4_s2_loop
 20206
 20207repeat_two_offset_lz4_s2_emit_copy_short:
 20208	XORQ R8, R8
 20209	LEAL 1(R8)(R10*4), R10
 20210	MOVB R9, 1(AX)
 20211	SARL $0x08, R9
 20212	SHLL $0x05, R9
 20213	ORL  R9, R10
 20214	MOVB R10, (AX)
 20215	ADDQ $0x02, AX
 20216	JMP  lz4_s2_loop
 20217
 20218two_byte_offset_short_lz4_s2:
 20219	MOVL R10, R8
 20220	SHLL $0x02, R8
 20221	CMPL R10, $0x0c
 20222	JAE  emit_copy_three_lz4_s2
 20223	CMPL R9, $0x00000800
 20224	JAE  emit_copy_three_lz4_s2
 20225	LEAL -15(R8), R8
 20226	MOVB R9, 1(AX)
 20227	SHRL $0x08, R9
 20228	SHLL $0x05, R9
 20229	ORL  R9, R8
 20230	MOVB R8, (AX)
 20231	ADDQ $0x02, AX
 20232	JMP  lz4_s2_loop
 20233
 20234emit_copy_three_lz4_s2:
 20235	LEAL -2(R8), R8
 20236	MOVB R8, (AX)
 20237	MOVW R9, 1(AX)
 20238	ADDQ $0x03, AX
 20239	JMP  lz4_s2_loop
 20240
 20241lz4_s2_done:
 20242	MOVQ dst_base+0(FP), CX
 20243	SUBQ CX, AX
 20244	MOVQ SI, uncompressed+48(FP)
 20245	MOVQ AX, dstUsed+56(FP)
 20246	RET
 20247
 20248lz4_s2_corrupt:
 20249	XORQ AX, AX
 20250	LEAQ -1(AX), SI
 20251	MOVQ SI, uncompressed+48(FP)
 20252	RET
 20253
 20254lz4_s2_dstfull:
 20255	XORQ AX, AX
 20256	LEAQ -2(AX), SI
 20257	MOVQ SI, uncompressed+48(FP)
 20258	RET
 20259
 20260// func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 20261// Requires: SSE2
 20262TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
 20263	XORQ SI, SI
 20264	MOVQ dst_base+0(FP), AX
 20265	MOVQ dst_len+8(FP), CX
 20266	MOVQ src_base+24(FP), DX
 20267	MOVQ src_len+32(FP), BX
 20268	LEAQ (DX)(BX*1), BX
 20269	LEAQ -10(AX)(CX*1), CX
 20270	XORQ DI, DI
 20271
 20272lz4s_s2_loop:
 20273	CMPQ    DX, BX
 20274	JAE     lz4s_s2_corrupt
 20275	CMPQ    AX, CX
 20276	JAE     lz4s_s2_dstfull
 20277	MOVBQZX (DX), R8
 20278	MOVQ    R8, R9
 20279	MOVQ    R8, R10
 20280	SHRQ    $0x04, R9
 20281	ANDQ    $0x0f, R10
 20282	CMPQ    R8, $0xf0
 20283	JB      lz4s_s2_ll_end
 20284
 20285lz4s_s2_ll_loop:
 20286	INCQ    DX
 20287	CMPQ    DX, BX
 20288	JAE     lz4s_s2_corrupt
 20289	MOVBQZX (DX), R8
 20290	ADDQ    R8, R9
 20291	CMPQ    R8, $0xff
 20292	JEQ     lz4s_s2_ll_loop
 20293
 20294lz4s_s2_ll_end:
 20295	LEAQ  (DX)(R9*1), R8
 20296	ADDQ  $0x03, R10
 20297	CMPQ  R8, BX
 20298	JAE   lz4s_s2_corrupt
 20299	INCQ  DX
 20300	INCQ  R8
 20301	TESTQ R9, R9
 20302	JZ    lz4s_s2_lits_done
 20303	LEAQ  (AX)(R9*1), R11
 20304	CMPQ  R11, CX
 20305	JAE   lz4s_s2_dstfull
 20306	ADDQ  R9, SI
 20307	LEAL  -1(R9), R11
 20308	CMPL  R11, $0x3c
 20309	JB    one_byte_lz4s_s2
 20310	CMPL  R11, $0x00000100
 20311	JB    two_bytes_lz4s_s2
 20312	CMPL  R11, $0x00010000
 20313	JB    three_bytes_lz4s_s2
 20314	CMPL  R11, $0x01000000
 20315	JB    four_bytes_lz4s_s2
 20316	MOVB  $0xfc, (AX)
 20317	MOVL  R11, 1(AX)
 20318	ADDQ  $0x05, AX
 20319	JMP   memmove_long_lz4s_s2
 20320
 20321four_bytes_lz4s_s2:
 20322	MOVL R11, R12
 20323	SHRL $0x10, R12
 20324	MOVB $0xf8, (AX)
 20325	MOVW R11, 1(AX)
 20326	MOVB R12, 3(AX)
 20327	ADDQ $0x04, AX
 20328	JMP  memmove_long_lz4s_s2
 20329
 20330three_bytes_lz4s_s2:
 20331	MOVB $0xf4, (AX)
 20332	MOVW R11, 1(AX)
 20333	ADDQ $0x03, AX
 20334	JMP  memmove_long_lz4s_s2
 20335
 20336two_bytes_lz4s_s2:
 20337	MOVB $0xf0, (AX)
 20338	MOVB R11, 1(AX)
 20339	ADDQ $0x02, AX
 20340	CMPL R11, $0x40
 20341	JB   memmove_lz4s_s2
 20342	JMP  memmove_long_lz4s_s2
 20343
 20344one_byte_lz4s_s2:
 20345	SHLB $0x02, R11
 20346	MOVB R11, (AX)
 20347	ADDQ $0x01, AX
 20348
 20349memmove_lz4s_s2:
 20350	LEAQ (AX)(R9*1), R11
 20351
 20352	// genMemMoveShort
 20353	CMPQ R9, $0x08
 20354	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8
 20355	CMPQ R9, $0x10
 20356	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8through16
 20357	CMPQ R9, $0x20
 20358	JBE  emit_lit_memmove_lz4s_s2_memmove_move_17through32
 20359	JMP  emit_lit_memmove_lz4s_s2_memmove_move_33through64
 20360
 20361emit_lit_memmove_lz4s_s2_memmove_move_8:
 20362	MOVQ (DX), R12
 20363	MOVQ R12, (AX)
 20364	JMP  memmove_end_copy_lz4s_s2
 20365
 20366emit_lit_memmove_lz4s_s2_memmove_move_8through16:
 20367	MOVQ (DX), R12
 20368	MOVQ -8(DX)(R9*1), DX
 20369	MOVQ R12, (AX)
 20370	MOVQ DX, -8(AX)(R9*1)
 20371	JMP  memmove_end_copy_lz4s_s2
 20372
 20373emit_lit_memmove_lz4s_s2_memmove_move_17through32:
 20374	MOVOU (DX), X0
 20375	MOVOU -16(DX)(R9*1), X1
 20376	MOVOU X0, (AX)
 20377	MOVOU X1, -16(AX)(R9*1)
 20378	JMP   memmove_end_copy_lz4s_s2
 20379
 20380emit_lit_memmove_lz4s_s2_memmove_move_33through64:
 20381	MOVOU (DX), X0
 20382	MOVOU 16(DX), X1
 20383	MOVOU -32(DX)(R9*1), X2
 20384	MOVOU -16(DX)(R9*1), X3
 20385	MOVOU X0, (AX)
 20386	MOVOU X1, 16(AX)
 20387	MOVOU X2, -32(AX)(R9*1)
 20388	MOVOU X3, -16(AX)(R9*1)
 20389
 20390memmove_end_copy_lz4s_s2:
 20391	MOVQ R11, AX
 20392	JMP  lz4s_s2_lits_emit_done
 20393
 20394memmove_long_lz4s_s2:
 20395	LEAQ (AX)(R9*1), R11
 20396
 20397	// genMemMoveLong
 20398	MOVOU (DX), X0
 20399	MOVOU 16(DX), X1
 20400	MOVOU -32(DX)(R9*1), X2
 20401	MOVOU -16(DX)(R9*1), X3
 20402	MOVQ  R9, R13
 20403	SHRQ  $0x05, R13
 20404	MOVQ  AX, R12
 20405	ANDL  $0x0000001f, R12
 20406	MOVQ  $0x00000040, R14
 20407	SUBQ  R12, R14
 20408	DECQ  R13
 20409	JA    emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
 20410	LEAQ  -32(DX)(R14*1), R12
 20411	LEAQ  -32(AX)(R14*1), R15
 20412
 20413emit_lit_memmove_long_lz4s_s2large_big_loop_back:
 20414	MOVOU (R12), X4
 20415	MOVOU 16(R12), X5
 20416	MOVOA X4, (R15)
 20417	MOVOA X5, 16(R15)
 20418	ADDQ  $0x20, R15
 20419	ADDQ  $0x20, R12
 20420	ADDQ  $0x20, R14
 20421	DECQ  R13
 20422	JNA   emit_lit_memmove_long_lz4s_s2large_big_loop_back
 20423
 20424emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32:
 20425	MOVOU -32(DX)(R14*1), X4
 20426	MOVOU -16(DX)(R14*1), X5
 20427	MOVOA X4, -32(AX)(R14*1)
 20428	MOVOA X5, -16(AX)(R14*1)
 20429	ADDQ  $0x20, R14
 20430	CMPQ  R9, R14
 20431	JAE   emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
 20432	MOVOU X0, (AX)
 20433	MOVOU X1, 16(AX)
 20434	MOVOU X2, -32(AX)(R9*1)
 20435	MOVOU X3, -16(AX)(R9*1)
 20436	MOVQ  R11, AX
 20437
 20438lz4s_s2_lits_emit_done:
 20439	MOVQ R8, DX
 20440
 20441lz4s_s2_lits_done:
 20442	CMPQ DX, BX
 20443	JNE  lz4s_s2_match
 20444	CMPQ R10, $0x03
 20445	JEQ  lz4s_s2_done
 20446	JMP  lz4s_s2_corrupt
 20447
 20448lz4s_s2_match:
 20449	CMPQ    R10, $0x03
 20450	JEQ     lz4s_s2_loop
 20451	LEAQ    2(DX), R8
 20452	CMPQ    R8, BX
 20453	JAE     lz4s_s2_corrupt
 20454	MOVWQZX (DX), R9
 20455	MOVQ    R8, DX
 20456	TESTQ   R9, R9
 20457	JZ      lz4s_s2_corrupt
 20458	CMPQ    R9, SI
 20459	JA      lz4s_s2_corrupt
 20460	CMPQ    R10, $0x12
 20461	JNE     lz4s_s2_ml_done
 20462
 20463lz4s_s2_ml_loop:
 20464	MOVBQZX (DX), R8
 20465	INCQ    DX
 20466	ADDQ    R8, R10
 20467	CMPQ    DX, BX
 20468	JAE     lz4s_s2_corrupt
 20469	CMPQ    R8, $0xff
 20470	JEQ     lz4s_s2_ml_loop
 20471
 20472lz4s_s2_ml_done:
 20473	ADDQ R10, SI
 20474	CMPQ R9, DI
 20475	JNE  lz4s_s2_docopy
 20476
 20477	// emitRepeat
 20478emit_repeat_again_lz4_s2:
 20479	MOVL R10, R8
 20480	LEAL -4(R10), R10
 20481	CMPL R8, $0x08
 20482	JBE  repeat_two_lz4_s2
 20483	CMPL R8, $0x0c
 20484	JAE  cant_repeat_two_offset_lz4_s2
 20485	CMPL R9, $0x00000800
 20486	JB   repeat_two_offset_lz4_s2
 20487
 20488cant_repeat_two_offset_lz4_s2:
 20489	CMPL R10, $0x00000104
 20490	JB   repeat_three_lz4_s2
 20491	CMPL R10, $0x00010100
 20492	JB   repeat_four_lz4_s2
 20493	CMPL R10, $0x0100ffff
 20494	JB   repeat_five_lz4_s2
 20495	LEAL -16842747(R10), R10
 20496	MOVL $0xfffb001d, (AX)
 20497	MOVB $0xff, 4(AX)
 20498	ADDQ $0x05, AX
 20499	JMP  emit_repeat_again_lz4_s2
 20500
 20501repeat_five_lz4_s2:
 20502	LEAL -65536(R10), R10
 20503	MOVL R10, R9
 20504	MOVW $0x001d, (AX)
 20505	MOVW R10, 2(AX)
 20506	SARL $0x10, R9
 20507	MOVB R9, 4(AX)
 20508	ADDQ $0x05, AX
 20509	JMP  lz4s_s2_loop
 20510
 20511repeat_four_lz4_s2:
 20512	LEAL -256(R10), R10
 20513	MOVW $0x0019, (AX)
 20514	MOVW R10, 2(AX)
 20515	ADDQ $0x04, AX
 20516	JMP  lz4s_s2_loop
 20517
 20518repeat_three_lz4_s2:
 20519	LEAL -4(R10), R10
 20520	MOVW $0x0015, (AX)
 20521	MOVB R10, 2(AX)
 20522	ADDQ $0x03, AX
 20523	JMP  lz4s_s2_loop
 20524
 20525repeat_two_lz4_s2:
 20526	SHLL $0x02, R10
 20527	ORL  $0x01, R10
 20528	MOVW R10, (AX)
 20529	ADDQ $0x02, AX
 20530	JMP  lz4s_s2_loop
 20531
 20532repeat_two_offset_lz4_s2:
 20533	XORQ R8, R8
 20534	LEAL 1(R8)(R10*4), R10
 20535	MOVB R9, 1(AX)
 20536	SARL $0x08, R9
 20537	SHLL $0x05, R9
 20538	ORL  R9, R10
 20539	MOVB R10, (AX)
 20540	ADDQ $0x02, AX
 20541	JMP  lz4s_s2_loop
 20542
 20543lz4s_s2_docopy:
 20544	MOVQ R9, DI
 20545
 20546	// emitCopy
 20547	CMPL R10, $0x40
 20548	JBE  two_byte_offset_short_lz4_s2
 20549	CMPL R9, $0x00000800
 20550	JAE  long_offset_short_lz4_s2
 20551	MOVL $0x00000001, R8
 20552	LEAL 16(R8), R8
 20553	MOVB R9, 1(AX)
 20554	MOVL R9, R11
 20555	SHRL $0x08, R11
 20556	SHLL $0x05, R11
 20557	ORL  R11, R8
 20558	MOVB R8, (AX)
 20559	ADDQ $0x02, AX
 20560	SUBL $0x08, R10
 20561
 20562	// emitRepeat
 20563	LEAL -4(R10), R10
 20564	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 20565
 20566emit_repeat_again_lz4_s2_emit_copy_short_2b:
 20567	MOVL R10, R8
 20568	LEAL -4(R10), R10
 20569	CMPL R8, $0x08
 20570	JBE  repeat_two_lz4_s2_emit_copy_short_2b
 20571	CMPL R8, $0x0c
 20572	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 20573	CMPL R9, $0x00000800
 20574	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
 20575
 20576cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
 20577	CMPL R10, $0x00000104
 20578	JB   repeat_three_lz4_s2_emit_copy_short_2b
 20579	CMPL R10, $0x00010100
 20580	JB   repeat_four_lz4_s2_emit_copy_short_2b
 20581	CMPL R10, $0x0100ffff
 20582	JB   repeat_five_lz4_s2_emit_copy_short_2b
 20583	LEAL -16842747(R10), R10
 20584	MOVL $0xfffb001d, (AX)
 20585	MOVB $0xff, 4(AX)
 20586	ADDQ $0x05, AX
 20587	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
 20588
 20589repeat_five_lz4_s2_emit_copy_short_2b:
 20590	LEAL -65536(R10), R10
 20591	MOVL R10, R9
 20592	MOVW $0x001d, (AX)
 20593	MOVW R10, 2(AX)
 20594	SARL $0x10, R9
 20595	MOVB R9, 4(AX)
 20596	ADDQ $0x05, AX
 20597	JMP  lz4s_s2_loop
 20598
 20599repeat_four_lz4_s2_emit_copy_short_2b:
 20600	LEAL -256(R10), R10
 20601	MOVW $0x0019, (AX)
 20602	MOVW R10, 2(AX)
 20603	ADDQ $0x04, AX
 20604	JMP  lz4s_s2_loop
 20605
 20606repeat_three_lz4_s2_emit_copy_short_2b:
 20607	LEAL -4(R10), R10
 20608	MOVW $0x0015, (AX)
 20609	MOVB R10, 2(AX)
 20610	ADDQ $0x03, AX
 20611	JMP  lz4s_s2_loop
 20612
 20613repeat_two_lz4_s2_emit_copy_short_2b:
 20614	SHLL $0x02, R10
 20615	ORL  $0x01, R10
 20616	MOVW R10, (AX)
 20617	ADDQ $0x02, AX
 20618	JMP  lz4s_s2_loop
 20619
 20620repeat_two_offset_lz4_s2_emit_copy_short_2b:
 20621	XORQ R8, R8
 20622	LEAL 1(R8)(R10*4), R10
 20623	MOVB R9, 1(AX)
 20624	SARL $0x08, R9
 20625	SHLL $0x05, R9
 20626	ORL  R9, R10
 20627	MOVB R10, (AX)
 20628	ADDQ $0x02, AX
 20629	JMP  lz4s_s2_loop
 20630
 20631long_offset_short_lz4_s2:
 20632	MOVB $0xee, (AX)
 20633	MOVW R9, 1(AX)
 20634	LEAL -60(R10), R10
 20635	ADDQ $0x03, AX
 20636
 20637	// emitRepeat
 20638emit_repeat_again_lz4_s2_emit_copy_short:
 20639	MOVL R10, R8
 20640	LEAL -4(R10), R10
 20641	CMPL R8, $0x08
 20642	JBE  repeat_two_lz4_s2_emit_copy_short
 20643	CMPL R8, $0x0c
 20644	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
 20645	CMPL R9, $0x00000800
 20646	JB   repeat_two_offset_lz4_s2_emit_copy_short
 20647
 20648cant_repeat_two_offset_lz4_s2_emit_copy_short:
 20649	CMPL R10, $0x00000104
 20650	JB   repeat_three_lz4_s2_emit_copy_short
 20651	CMPL R10, $0x00010100
 20652	JB   repeat_four_lz4_s2_emit_copy_short
 20653	CMPL R10, $0x0100ffff
 20654	JB   repeat_five_lz4_s2_emit_copy_short
 20655	LEAL -16842747(R10), R10
 20656	MOVL $0xfffb001d, (AX)
 20657	MOVB $0xff, 4(AX)
 20658	ADDQ $0x05, AX
 20659	JMP  emit_repeat_again_lz4_s2_emit_copy_short
 20660
 20661repeat_five_lz4_s2_emit_copy_short:
 20662	LEAL -65536(R10), R10
 20663	MOVL R10, R9
 20664	MOVW $0x001d, (AX)
 20665	MOVW R10, 2(AX)
 20666	SARL $0x10, R9
 20667	MOVB R9, 4(AX)
 20668	ADDQ $0x05, AX
 20669	JMP  lz4s_s2_loop
 20670
 20671repeat_four_lz4_s2_emit_copy_short:
 20672	LEAL -256(R10), R10
 20673	MOVW $0x0019, (AX)
 20674	MOVW R10, 2(AX)
 20675	ADDQ $0x04, AX
 20676	JMP  lz4s_s2_loop
 20677
 20678repeat_three_lz4_s2_emit_copy_short:
 20679	LEAL -4(R10), R10
 20680	MOVW $0x0015, (AX)
 20681	MOVB R10, 2(AX)
 20682	ADDQ $0x03, AX
 20683	JMP  lz4s_s2_loop
 20684
 20685repeat_two_lz4_s2_emit_copy_short:
 20686	SHLL $0x02, R10
 20687	ORL  $0x01, R10
 20688	MOVW R10, (AX)
 20689	ADDQ $0x02, AX
 20690	JMP  lz4s_s2_loop
 20691
 20692repeat_two_offset_lz4_s2_emit_copy_short:
 20693	XORQ R8, R8
 20694	LEAL 1(R8)(R10*4), R10
 20695	MOVB R9, 1(AX)
 20696	SARL $0x08, R9
 20697	SHLL $0x05, R9
 20698	ORL  R9, R10
 20699	MOVB R10, (AX)
 20700	ADDQ $0x02, AX
 20701	JMP  lz4s_s2_loop
 20702
 20703two_byte_offset_short_lz4_s2:
 20704	MOVL R10, R8
 20705	SHLL $0x02, R8
 20706	CMPL R10, $0x0c
 20707	JAE  emit_copy_three_lz4_s2
 20708	CMPL R9, $0x00000800
 20709	JAE  emit_copy_three_lz4_s2
 20710	LEAL -15(R8), R8
 20711	MOVB R9, 1(AX)
 20712	SHRL $0x08, R9
 20713	SHLL $0x05, R9
 20714	ORL  R9, R8
 20715	MOVB R8, (AX)
 20716	ADDQ $0x02, AX
 20717	JMP  lz4s_s2_loop
 20718
 20719emit_copy_three_lz4_s2:
 20720	LEAL -2(R8), R8
 20721	MOVB R8, (AX)
 20722	MOVW R9, 1(AX)
 20723	ADDQ $0x03, AX
 20724	JMP  lz4s_s2_loop
 20725
 20726lz4s_s2_done:
 20727	MOVQ dst_base+0(FP), CX
 20728	SUBQ CX, AX
 20729	MOVQ SI, uncompressed+48(FP)
 20730	MOVQ AX, dstUsed+56(FP)
 20731	RET
 20732
 20733lz4s_s2_corrupt:
 20734	XORQ AX, AX
 20735	LEAQ -1(AX), SI
 20736	MOVQ SI, uncompressed+48(FP)
 20737	RET
 20738
 20739lz4s_s2_dstfull:
 20740	XORQ AX, AX
 20741	LEAQ -2(AX), SI
 20742	MOVQ SI, uncompressed+48(FP)
 20743	RET
 20744
 20745// func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 20746// Requires: SSE2
 20747TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
 20748	XORQ SI, SI
 20749	MOVQ dst_base+0(FP), AX
 20750	MOVQ dst_len+8(FP), CX
 20751	MOVQ src_base+24(FP), DX
 20752	MOVQ src_len+32(FP), BX
 20753	LEAQ (DX)(BX*1), BX
 20754	LEAQ -10(AX)(CX*1), CX
 20755
 20756lz4_snappy_loop:
 20757	CMPQ    DX, BX
 20758	JAE     lz4_snappy_corrupt
 20759	CMPQ    AX, CX
 20760	JAE     lz4_snappy_dstfull
 20761	MOVBQZX (DX), DI
 20762	MOVQ    DI, R8
 20763	MOVQ    DI, R9
 20764	SHRQ    $0x04, R8
 20765	ANDQ    $0x0f, R9
 20766	CMPQ    DI, $0xf0
 20767	JB      lz4_snappy_ll_end
 20768
 20769lz4_snappy_ll_loop:
 20770	INCQ    DX
 20771	CMPQ    DX, BX
 20772	JAE     lz4_snappy_corrupt
 20773	MOVBQZX (DX), DI
 20774	ADDQ    DI, R8
 20775	CMPQ    DI, $0xff
 20776	JEQ     lz4_snappy_ll_loop
 20777
 20778lz4_snappy_ll_end:
 20779	LEAQ  (DX)(R8*1), DI
 20780	ADDQ  $0x04, R9
 20781	CMPQ  DI, BX
 20782	JAE   lz4_snappy_corrupt
 20783	INCQ  DX
 20784	INCQ  DI
 20785	TESTQ R8, R8
 20786	JZ    lz4_snappy_lits_done
 20787	LEAQ  (AX)(R8*1), R10
 20788	CMPQ  R10, CX
 20789	JAE   lz4_snappy_dstfull
 20790	ADDQ  R8, SI
 20791	LEAL  -1(R8), R10
 20792	CMPL  R10, $0x3c
 20793	JB    one_byte_lz4_snappy
 20794	CMPL  R10, $0x00000100
 20795	JB    two_bytes_lz4_snappy
 20796	CMPL  R10, $0x00010000
 20797	JB    three_bytes_lz4_snappy
 20798	CMPL  R10, $0x01000000
 20799	JB    four_bytes_lz4_snappy
 20800	MOVB  $0xfc, (AX)
 20801	MOVL  R10, 1(AX)
 20802	ADDQ  $0x05, AX
 20803	JMP   memmove_long_lz4_snappy
 20804
 20805four_bytes_lz4_snappy:
 20806	MOVL R10, R11
 20807	SHRL $0x10, R11
 20808	MOVB $0xf8, (AX)
 20809	MOVW R10, 1(AX)
 20810	MOVB R11, 3(AX)
 20811	ADDQ $0x04, AX
 20812	JMP  memmove_long_lz4_snappy
 20813
 20814three_bytes_lz4_snappy:
 20815	MOVB $0xf4, (AX)
 20816	MOVW R10, 1(AX)
 20817	ADDQ $0x03, AX
 20818	JMP  memmove_long_lz4_snappy
 20819
 20820two_bytes_lz4_snappy:
 20821	MOVB $0xf0, (AX)
 20822	MOVB R10, 1(AX)
 20823	ADDQ $0x02, AX
 20824	CMPL R10, $0x40
 20825	JB   memmove_lz4_snappy
 20826	JMP  memmove_long_lz4_snappy
 20827
 20828one_byte_lz4_snappy:
 20829	SHLB $0x02, R10
 20830	MOVB R10, (AX)
 20831	ADDQ $0x01, AX
 20832
 20833memmove_lz4_snappy:
 20834	LEAQ (AX)(R8*1), R10
 20835
 20836	// genMemMoveShort
 20837	CMPQ R8, $0x08
 20838	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8
 20839	CMPQ R8, $0x10
 20840	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8through16
 20841	CMPQ R8, $0x20
 20842	JBE  emit_lit_memmove_lz4_snappy_memmove_move_17through32
 20843	JMP  emit_lit_memmove_lz4_snappy_memmove_move_33through64
 20844
 20845emit_lit_memmove_lz4_snappy_memmove_move_8:
 20846	MOVQ (DX), R11
 20847	MOVQ R11, (AX)
 20848	JMP  memmove_end_copy_lz4_snappy
 20849
 20850emit_lit_memmove_lz4_snappy_memmove_move_8through16:
 20851	MOVQ (DX), R11
 20852	MOVQ -8(DX)(R8*1), DX
 20853	MOVQ R11, (AX)
 20854	MOVQ DX, -8(AX)(R8*1)
 20855	JMP  memmove_end_copy_lz4_snappy
 20856
 20857emit_lit_memmove_lz4_snappy_memmove_move_17through32:
 20858	MOVOU (DX), X0
 20859	MOVOU -16(DX)(R8*1), X1
 20860	MOVOU X0, (AX)
 20861	MOVOU X1, -16(AX)(R8*1)
 20862	JMP   memmove_end_copy_lz4_snappy
 20863
 20864emit_lit_memmove_lz4_snappy_memmove_move_33through64:
 20865	MOVOU (DX), X0
 20866	MOVOU 16(DX), X1
 20867	MOVOU -32(DX)(R8*1), X2
 20868	MOVOU -16(DX)(R8*1), X3
 20869	MOVOU X0, (AX)
 20870	MOVOU X1, 16(AX)
 20871	MOVOU X2, -32(AX)(R8*1)
 20872	MOVOU X3, -16(AX)(R8*1)
 20873
 20874memmove_end_copy_lz4_snappy:
 20875	MOVQ R10, AX
 20876	JMP  lz4_snappy_lits_emit_done
 20877
 20878memmove_long_lz4_snappy:
 20879	LEAQ (AX)(R8*1), R10
 20880
 20881	// genMemMoveLong
 20882	MOVOU (DX), X0
 20883	MOVOU 16(DX), X1
 20884	MOVOU -32(DX)(R8*1), X2
 20885	MOVOU -16(DX)(R8*1), X3
 20886	MOVQ  R8, R12
 20887	SHRQ  $0x05, R12
 20888	MOVQ  AX, R11
 20889	ANDL  $0x0000001f, R11
 20890	MOVQ  $0x00000040, R13
 20891	SUBQ  R11, R13
 20892	DECQ  R12
 20893	JA    emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
 20894	LEAQ  -32(DX)(R13*1), R11
 20895	LEAQ  -32(AX)(R13*1), R14
 20896
 20897emit_lit_memmove_long_lz4_snappylarge_big_loop_back:
 20898	MOVOU (R11), X4
 20899	MOVOU 16(R11), X5
 20900	MOVOA X4, (R14)
 20901	MOVOA X5, 16(R14)
 20902	ADDQ  $0x20, R14
 20903	ADDQ  $0x20, R11
 20904	ADDQ  $0x20, R13
 20905	DECQ  R12
 20906	JNA   emit_lit_memmove_long_lz4_snappylarge_big_loop_back
 20907
 20908emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32:
 20909	MOVOU -32(DX)(R13*1), X4
 20910	MOVOU -16(DX)(R13*1), X5
 20911	MOVOA X4, -32(AX)(R13*1)
 20912	MOVOA X5, -16(AX)(R13*1)
 20913	ADDQ  $0x20, R13
 20914	CMPQ  R8, R13
 20915	JAE   emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
 20916	MOVOU X0, (AX)
 20917	MOVOU X1, 16(AX)
 20918	MOVOU X2, -32(AX)(R8*1)
 20919	MOVOU X3, -16(AX)(R8*1)
 20920	MOVQ  R10, AX
 20921
 20922lz4_snappy_lits_emit_done:
 20923	MOVQ DI, DX
 20924
 20925lz4_snappy_lits_done:
 20926	CMPQ DX, BX
 20927	JNE  lz4_snappy_match
 20928	CMPQ R9, $0x04
 20929	JEQ  lz4_snappy_done
 20930	JMP  lz4_snappy_corrupt
 20931
 20932lz4_snappy_match:
 20933	LEAQ    2(DX), DI
 20934	CMPQ    DI, BX
 20935	JAE     lz4_snappy_corrupt
 20936	MOVWQZX (DX), R8
 20937	MOVQ    DI, DX
 20938	TESTQ   R8, R8
 20939	JZ      lz4_snappy_corrupt
 20940	CMPQ    R8, SI
 20941	JA      lz4_snappy_corrupt
 20942	CMPQ    R9, $0x13
 20943	JNE     lz4_snappy_ml_done
 20944
 20945lz4_snappy_ml_loop:
 20946	MOVBQZX (DX), DI
 20947	INCQ    DX
 20948	ADDQ    DI, R9
 20949	CMPQ    DX, BX
 20950	JAE     lz4_snappy_corrupt
 20951	CMPQ    DI, $0xff
 20952	JEQ     lz4_snappy_ml_loop
 20953
 20954lz4_snappy_ml_done:
 20955	ADDQ R9, SI
 20956
 20957	// emitCopy
 20958two_byte_offset_lz4_s2:
 20959	CMPL R9, $0x40
 20960	JBE  two_byte_offset_short_lz4_s2
 20961	MOVB $0xee, (AX)
 20962	MOVW R8, 1(AX)
 20963	LEAL -60(R9), R9
 20964	ADDQ $0x03, AX
 20965	CMPQ AX, CX
 20966	JAE  lz4_snappy_loop
 20967	JMP  two_byte_offset_lz4_s2
 20968
 20969two_byte_offset_short_lz4_s2:
 20970	MOVL R9, DI
 20971	SHLL $0x02, DI
 20972	CMPL R9, $0x0c
 20973	JAE  emit_copy_three_lz4_s2
 20974	CMPL R8, $0x00000800
 20975	JAE  emit_copy_three_lz4_s2
 20976	LEAL -15(DI), DI
 20977	MOVB R8, 1(AX)
 20978	SHRL $0x08, R8
 20979	SHLL $0x05, R8
 20980	ORL  R8, DI
 20981	MOVB DI, (AX)
 20982	ADDQ $0x02, AX
 20983	JMP  lz4_snappy_loop
 20984
 20985emit_copy_three_lz4_s2:
 20986	LEAL -2(DI), DI
 20987	MOVB DI, (AX)
 20988	MOVW R8, 1(AX)
 20989	ADDQ $0x03, AX
 20990	JMP  lz4_snappy_loop
 20991
 20992lz4_snappy_done:
 20993	MOVQ dst_base+0(FP), CX
 20994	SUBQ CX, AX
 20995	MOVQ SI, uncompressed+48(FP)
 20996	MOVQ AX, dstUsed+56(FP)
 20997	RET
 20998
 20999lz4_snappy_corrupt:
 21000	XORQ AX, AX
 21001	LEAQ -1(AX), SI
 21002	MOVQ SI, uncompressed+48(FP)
 21003	RET
 21004
 21005lz4_snappy_dstfull:
 21006	XORQ AX, AX
 21007	LEAQ -2(AX), SI
 21008	MOVQ SI, uncompressed+48(FP)
 21009	RET
 21010
 21011// func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 21012// Requires: SSE2
 21013TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
 21014	XORQ SI, SI
 21015	MOVQ dst_base+0(FP), AX
 21016	MOVQ dst_len+8(FP), CX
 21017	MOVQ src_base+24(FP), DX
 21018	MOVQ src_len+32(FP), BX
 21019	LEAQ (DX)(BX*1), BX
 21020	LEAQ -10(AX)(CX*1), CX
 21021
 21022lz4s_snappy_loop:
 21023	CMPQ    DX, BX
 21024	JAE     lz4s_snappy_corrupt
 21025	CMPQ    AX, CX
 21026	JAE     lz4s_snappy_dstfull
 21027	MOVBQZX (DX), DI
 21028	MOVQ    DI, R8
 21029	MOVQ    DI, R9
 21030	SHRQ    $0x04, R8
 21031	ANDQ    $0x0f, R9
 21032	CMPQ    DI, $0xf0
 21033	JB      lz4s_snappy_ll_end
 21034
 21035lz4s_snappy_ll_loop:
 21036	INCQ    DX
 21037	CMPQ    DX, BX
 21038	JAE     lz4s_snappy_corrupt
 21039	MOVBQZX (DX), DI
 21040	ADDQ    DI, R8
 21041	CMPQ    DI, $0xff
 21042	JEQ     lz4s_snappy_ll_loop
 21043
 21044lz4s_snappy_ll_end:
 21045	LEAQ  (DX)(R8*1), DI
 21046	ADDQ  $0x03, R9
 21047	CMPQ  DI, BX
 21048	JAE   lz4s_snappy_corrupt
 21049	INCQ  DX
 21050	INCQ  DI
 21051	TESTQ R8, R8
 21052	JZ    lz4s_snappy_lits_done
 21053	LEAQ  (AX)(R8*1), R10
 21054	CMPQ  R10, CX
 21055	JAE   lz4s_snappy_dstfull
 21056	ADDQ  R8, SI
 21057	LEAL  -1(R8), R10
 21058	CMPL  R10, $0x3c
 21059	JB    one_byte_lz4s_snappy
 21060	CMPL  R10, $0x00000100
 21061	JB    two_bytes_lz4s_snappy
 21062	CMPL  R10, $0x00010000
 21063	JB    three_bytes_lz4s_snappy
 21064	CMPL  R10, $0x01000000
 21065	JB    four_bytes_lz4s_snappy
 21066	MOVB  $0xfc, (AX)
 21067	MOVL  R10, 1(AX)
 21068	ADDQ  $0x05, AX
 21069	JMP   memmove_long_lz4s_snappy
 21070
 21071four_bytes_lz4s_snappy:
 21072	MOVL R10, R11
 21073	SHRL $0x10, R11
 21074	MOVB $0xf8, (AX)
 21075	MOVW R10, 1(AX)
 21076	MOVB R11, 3(AX)
 21077	ADDQ $0x04, AX
 21078	JMP  memmove_long_lz4s_snappy
 21079
 21080three_bytes_lz4s_snappy:
 21081	MOVB $0xf4, (AX)
 21082	MOVW R10, 1(AX)
 21083	ADDQ $0x03, AX
 21084	JMP  memmove_long_lz4s_snappy
 21085
 21086two_bytes_lz4s_snappy:
 21087	MOVB $0xf0, (AX)
 21088	MOVB R10, 1(AX)
 21089	ADDQ $0x02, AX
 21090	CMPL R10, $0x40
 21091	JB   memmove_lz4s_snappy
 21092	JMP  memmove_long_lz4s_snappy
 21093
 21094one_byte_lz4s_snappy:
 21095	SHLB $0x02, R10
 21096	MOVB R10, (AX)
 21097	ADDQ $0x01, AX
 21098
 21099memmove_lz4s_snappy:
 21100	LEAQ (AX)(R8*1), R10
 21101
 21102	// genMemMoveShort
 21103	CMPQ R8, $0x08
 21104	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8
 21105	CMPQ R8, $0x10
 21106	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8through16
 21107	CMPQ R8, $0x20
 21108	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_17through32
 21109	JMP  emit_lit_memmove_lz4s_snappy_memmove_move_33through64
 21110
 21111emit_lit_memmove_lz4s_snappy_memmove_move_8:
 21112	MOVQ (DX), R11
 21113	MOVQ R11, (AX)
 21114	JMP  memmove_end_copy_lz4s_snappy
 21115
 21116emit_lit_memmove_lz4s_snappy_memmove_move_8through16:
 21117	MOVQ (DX), R11
 21118	MOVQ -8(DX)(R8*1), DX
 21119	MOVQ R11, (AX)
 21120	MOVQ DX, -8(AX)(R8*1)
 21121	JMP  memmove_end_copy_lz4s_snappy
 21122
 21123emit_lit_memmove_lz4s_snappy_memmove_move_17through32:
 21124	MOVOU (DX), X0
 21125	MOVOU -16(DX)(R8*1), X1
 21126	MOVOU X0, (AX)
 21127	MOVOU X1, -16(AX)(R8*1)
 21128	JMP   memmove_end_copy_lz4s_snappy
 21129
 21130emit_lit_memmove_lz4s_snappy_memmove_move_33through64:
 21131	MOVOU (DX), X0
 21132	MOVOU 16(DX), X1
 21133	MOVOU -32(DX)(R8*1), X2
 21134	MOVOU -16(DX)(R8*1), X3
 21135	MOVOU X0, (AX)
 21136	MOVOU X1, 16(AX)
 21137	MOVOU X2, -32(AX)(R8*1)
 21138	MOVOU X3, -16(AX)(R8*1)
 21139
 21140memmove_end_copy_lz4s_snappy:
 21141	MOVQ R10, AX
 21142	JMP  lz4s_snappy_lits_emit_done
 21143
 21144memmove_long_lz4s_snappy:
 21145	LEAQ (AX)(R8*1), R10
 21146
 21147	// genMemMoveLong
 21148	MOVOU (DX), X0
 21149	MOVOU 16(DX), X1
 21150	MOVOU -32(DX)(R8*1), X2
 21151	MOVOU -16(DX)(R8*1), X3
 21152	MOVQ  R8, R12
 21153	SHRQ  $0x05, R12
 21154	MOVQ  AX, R11
 21155	ANDL  $0x0000001f, R11
 21156	MOVQ  $0x00000040, R13
 21157	SUBQ  R11, R13
 21158	DECQ  R12
 21159	JA    emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
 21160	LEAQ  -32(DX)(R13*1), R11
 21161	LEAQ  -32(AX)(R13*1), R14
 21162
 21163emit_lit_memmove_long_lz4s_snappylarge_big_loop_back:
 21164	MOVOU (R11), X4
 21165	MOVOU 16(R11), X5
 21166	MOVOA X4, (R14)
 21167	MOVOA X5, 16(R14)
 21168	ADDQ  $0x20, R14
 21169	ADDQ  $0x20, R11
 21170	ADDQ  $0x20, R13
 21171	DECQ  R12
 21172	JNA   emit_lit_memmove_long_lz4s_snappylarge_big_loop_back
 21173
 21174emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32:
 21175	MOVOU -32(DX)(R13*1), X4
 21176	MOVOU -16(DX)(R13*1), X5
 21177	MOVOA X4, -32(AX)(R13*1)
 21178	MOVOA X5, -16(AX)(R13*1)
 21179	ADDQ  $0x20, R13
 21180	CMPQ  R8, R13
 21181	JAE   emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
 21182	MOVOU X0, (AX)
 21183	MOVOU X1, 16(AX)
 21184	MOVOU X2, -32(AX)(R8*1)
 21185	MOVOU X3, -16(AX)(R8*1)
 21186	MOVQ  R10, AX
 21187
 21188lz4s_snappy_lits_emit_done:
 21189	MOVQ DI, DX
 21190
 21191lz4s_snappy_lits_done:
 21192	CMPQ DX, BX
 21193	JNE  lz4s_snappy_match
 21194	CMPQ R9, $0x03
 21195	JEQ  lz4s_snappy_done
 21196	JMP  lz4s_snappy_corrupt
 21197
 21198lz4s_snappy_match:
 21199	CMPQ    R9, $0x03
 21200	JEQ     lz4s_snappy_loop
 21201	LEAQ    2(DX), DI
 21202	CMPQ    DI, BX
 21203	JAE     lz4s_snappy_corrupt
 21204	MOVWQZX (DX), R8
 21205	MOVQ    DI, DX
 21206	TESTQ   R8, R8
 21207	JZ      lz4s_snappy_corrupt
 21208	CMPQ    R8, SI
 21209	JA      lz4s_snappy_corrupt
 21210	CMPQ    R9, $0x12
 21211	JNE     lz4s_snappy_ml_done
 21212
 21213lz4s_snappy_ml_loop:
 21214	MOVBQZX (DX), DI
 21215	INCQ    DX
 21216	ADDQ    DI, R9
 21217	CMPQ    DX, BX
 21218	JAE     lz4s_snappy_corrupt
 21219	CMPQ    DI, $0xff
 21220	JEQ     lz4s_snappy_ml_loop
 21221
 21222lz4s_snappy_ml_done:
 21223	ADDQ R9, SI
 21224
 21225	// emitCopy
 21226two_byte_offset_lz4_s2:
 21227	CMPL R9, $0x40
 21228	JBE  two_byte_offset_short_lz4_s2
 21229	MOVB $0xee, (AX)
 21230	MOVW R8, 1(AX)
 21231	LEAL -60(R9), R9
 21232	ADDQ $0x03, AX
 21233	CMPQ AX, CX
 21234	JAE  lz4s_snappy_loop
 21235	JMP  two_byte_offset_lz4_s2
 21236
 21237two_byte_offset_short_lz4_s2:
 21238	MOVL R9, DI
 21239	SHLL $0x02, DI
 21240	CMPL R9, $0x0c
 21241	JAE  emit_copy_three_lz4_s2
 21242	CMPL R8, $0x00000800
 21243	JAE  emit_copy_three_lz4_s2
 21244	LEAL -15(DI), DI
 21245	MOVB R8, 1(AX)
 21246	SHRL $0x08, R8
 21247	SHLL $0x05, R8
 21248	ORL  R8, DI
 21249	MOVB DI, (AX)
 21250	ADDQ $0x02, AX
 21251	JMP  lz4s_snappy_loop
 21252
 21253emit_copy_three_lz4_s2:
 21254	LEAL -2(DI), DI
 21255	MOVB DI, (AX)
 21256	MOVW R8, 1(AX)
 21257	ADDQ $0x03, AX
 21258	JMP  lz4s_snappy_loop
 21259
 21260lz4s_snappy_done:
 21261	MOVQ dst_base+0(FP), CX
 21262	SUBQ CX, AX
 21263	MOVQ SI, uncompressed+48(FP)
 21264	MOVQ AX, dstUsed+56(FP)
 21265	RET
 21266
 21267lz4s_snappy_corrupt:
 21268	XORQ AX, AX
 21269	LEAQ -1(AX), SI
 21270	MOVQ SI, uncompressed+48(FP)
 21271	RET
 21272
 21273lz4s_snappy_dstfull:
 21274	XORQ AX, AX
 21275	LEAQ -2(AX), SI
 21276	MOVQ SI, uncompressed+48(FP)
 21277	RET

View as plain text