mirror of https://go.googlesource.com/go
217 lines
5.5 KiB
ArmAsm
217 lines
5.5 KiB
ArmAsm
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
#include "go_asm.h"
|
|
#include "textflag.h"
|
|
|
|
// Caller must confirm availability of vx facility before calling.
|
|
TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
|
|
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
|
LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
|
MOVD $ret+48(FP), R5
|
|
BR indexbody<>(SB)
|
|
|
|
// Caller must confirm availability of vx facility before calling.
|
|
TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
|
|
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
|
LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
|
MOVD $ret+32(FP), R5
|
|
BR indexbody<>(SB)
|
|
|
|
// s: string we are searching
|
|
// sep: string to search for
|
|
// R1=&s[0], R2=len(s)
|
|
// R3=&sep[0], R4=len(sep)
|
|
// R5=&ret (int)
|
|
// Caller must confirm availability of vx facility before calling.
|
|
TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
|
|
CMPBGT R4, R2, notfound
|
|
ADD R1, R2
|
|
SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
|
|
CMPBEQ R4, $0, notfound
|
|
SUB $1, R4 // R4=len(sep)-1 for use as VLL index
|
|
VLL R4, (R3), V0 // contains first 16 bytes of sep
|
|
MOVD R1, R7
|
|
index2plus:
|
|
CMPBNE R4, $1, index3plus
|
|
MOVD $15(R7), R9
|
|
CMPBGE R9, R2, index2to16
|
|
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
|
VONE V16
|
|
VREPH $0, V0, V1
|
|
CMPBGE R9, R2, index2to16
|
|
index2loop:
|
|
VL 0(R7), V2 // 16 bytes, even indices
|
|
VL 1(R7), V4 // 16 bytes, odd indices
|
|
VCEQH V1, V2, V5 // compare even indices
|
|
VCEQH V1, V4, V6 // compare odd indices
|
|
VSEL V5, V6, V31, V7 // merge even and odd indices
|
|
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
|
BLT foundV17
|
|
MOVD $16(R7), R7 // R7+=16
|
|
ADD $15, R7, R9
|
|
CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
|
|
CMPBLE R7, R2, index2to16
|
|
BR notfound
|
|
|
|
index3plus:
|
|
CMPBNE R4, $2, index4plus
|
|
ADD $15, R7, R9
|
|
CMPBGE R9, R2, index2to16
|
|
MOVD $1, R0
|
|
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
|
VONE V16
|
|
VREPH $0, V0, V1
|
|
VREPB $2, V0, V8
|
|
index3loop:
|
|
VL (R7), V2 // load 16-bytes into V2
|
|
VLL R0, 16(R7), V3 // load 2-bytes into V3
|
|
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
|
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2
|
|
VCEQH V1, V2, V5 // compare 2-byte even indices
|
|
VCEQH V1, V4, V6 // compare 2-byte odd indices
|
|
VCEQB V8, V9, V10 // compare last bytes
|
|
VSEL V5, V6, V31, V7 // merge even and odd indices
|
|
VN V7, V10, V7 // AND indices with last byte
|
|
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
|
BLT foundV17
|
|
MOVD $16(R7), R7 // R7+=16
|
|
ADD $15, R7, R9
|
|
CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
|
|
CMPBLE R7, R2, index2to16
|
|
BR notfound
|
|
|
|
index4plus:
|
|
CMPBNE R4, $3, index5plus
|
|
ADD $15, R7, R9
|
|
CMPBGE R9, R2, index2to16
|
|
MOVD $2, R0
|
|
VGBM $0x8888, V29 // 0xff000000ff000000...
|
|
VGBM $0x2222, V30 // 0x0000ff000000ff00...
|
|
VGBM $0xcccc, V31 // 0xffff0000ffff0000...
|
|
VONE V16
|
|
VREPF $0, V0, V1
|
|
index4loop:
|
|
VL (R7), V2 // load 16-bytes into V2
|
|
VLL R0, 16(R7), V3 // load 3-bytes into V3
|
|
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
|
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1
|
|
VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1
|
|
VCEQF V1, V2, V5 // compare index 0, 4, ...
|
|
VCEQF V1, V4, V6 // compare index 1, 5, ...
|
|
VCEQF V1, V9, V11 // compare index 2, 6, ...
|
|
VCEQF V1, V10, V12 // compare index 3, 7, ...
|
|
VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ...
|
|
VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
|
|
VSEL V13, V14, V31, V7 // final merge
|
|
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
|
BLT foundV17
|
|
MOVD $16(R7), R7 // R7+=16
|
|
ADD $15, R7, R9
|
|
CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
|
|
CMPBLE R7, R2, index2to16
|
|
BR notfound
|
|
|
|
index5plus:
|
|
CMPBGT R4, $15, index17plus
|
|
index2to16:
|
|
CMPBGT R7, R2, notfound
|
|
MOVD $1(R7), R8
|
|
CMPBGT R8, R2, index2to16tail
|
|
index2to16loop:
|
|
// unrolled 2x
|
|
VLL R4, (R7), V1
|
|
VLL R4, 1(R7), V2
|
|
VCEQGS V0, V1, V3
|
|
BEQ found
|
|
MOVD $1(R7), R7
|
|
VCEQGS V0, V2, V4
|
|
BEQ found
|
|
MOVD $1(R7), R7
|
|
CMPBLT R7, R2, index2to16loop
|
|
CMPBGT R7, R2, notfound
|
|
index2to16tail:
|
|
VLL R4, (R7), V1
|
|
VCEQGS V0, V1, V2
|
|
BEQ found
|
|
BR notfound
|
|
|
|
index17plus:
|
|
CMPBGT R4, $31, index33plus
|
|
SUB $16, R4, R0
|
|
VLL R0, 16(R3), V1
|
|
VONE V7
|
|
index17to32loop:
|
|
VL (R7), V2
|
|
VLL R0, 16(R7), V3
|
|
VCEQG V0, V2, V4
|
|
VCEQG V1, V3, V5
|
|
VN V4, V5, V6
|
|
VCEQGS V6, V7, V8
|
|
BEQ found
|
|
MOVD $1(R7), R7
|
|
CMPBLE R7, R2, index17to32loop
|
|
BR notfound
|
|
|
|
index33plus:
|
|
CMPBGT R4, $47, index49plus
|
|
SUB $32, R4, R0
|
|
VL 16(R3), V1
|
|
VLL R0, 32(R3), V2
|
|
VONE V11
|
|
index33to48loop:
|
|
VL (R7), V3
|
|
VL 16(R7), V4
|
|
VLL R0, 32(R7), V5
|
|
VCEQG V0, V3, V6
|
|
VCEQG V1, V4, V7
|
|
VCEQG V2, V5, V8
|
|
VN V6, V7, V9
|
|
VN V8, V9, V10
|
|
VCEQGS V10, V11, V12
|
|
BEQ found
|
|
MOVD $1(R7), R7
|
|
CMPBLE R7, R2, index33to48loop
|
|
BR notfound
|
|
|
|
index49plus:
|
|
CMPBGT R4, $63, index65plus
|
|
SUB $48, R4, R0
|
|
VL 16(R3), V1
|
|
VL 32(R3), V2
|
|
VLL R0, 48(R3), V3
|
|
VONE V15
|
|
index49to64loop:
|
|
VL (R7), V4
|
|
VL 16(R7), V5
|
|
VL 32(R7), V6
|
|
VLL R0, 48(R7), V7
|
|
VCEQG V0, V4, V8
|
|
VCEQG V1, V5, V9
|
|
VCEQG V2, V6, V10
|
|
VCEQG V3, V7, V11
|
|
VN V8, V9, V12
|
|
VN V10, V11, V13
|
|
VN V12, V13, V14
|
|
VCEQGS V14, V15, V16
|
|
BEQ found
|
|
MOVD $1(R7), R7
|
|
CMPBLE R7, R2, index49to64loop
|
|
notfound:
|
|
MOVD $-1, (R5)
|
|
RET
|
|
|
|
index65plus:
|
|
// not implemented
|
|
MOVD $0, (R0)
|
|
RET
|
|
|
|
foundV17: // index is in doubleword V17[0]
|
|
VLGVG $0, V17, R8
|
|
ADD R8, R7
|
|
found:
|
|
SUB R1, R7
|
|
MOVD R7, (R5)
|
|
RET
|