gotosocial/vendor/github.com/tmthrgd/go-hex/hex_encode_amd64.s

228 lines
3.8 KiB
ArmAsm

// Copyright 2016 Tom Thorogood. All rights reserved.
// Use of this source code is governed by a
// Modified BSD License license that can be found in
// the LICENSE file.
//
// Copyright 2005-2016, Wojciech Muła. All rights reserved.
// Use of this source code is governed by a
// Simplified BSD License license that can be found in
// the LICENSE file.
//
// This file is auto-generated - do not modify
// +build amd64,!gccgo,!appengine
#include "textflag.h"
DATA encodeMask<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f
DATA encodeMask<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f
GLOBL encodeMask<>(SB),RODATA,$16
TEXT ·encodeAVX(SB),NOSPLIT,$0
MOVQ dst+0(FP), DI
MOVQ src+8(FP), SI
MOVQ len+16(FP), BX
MOVQ alpha+24(FP), DX
MOVOU (DX), X15
CMPQ BX, $16
JB tail
bigloop:
MOVOU -16(SI)(BX*1), X0
VPAND encodeMask<>(SB), X0, X1
PSRLW $4, X0
PAND encodeMask<>(SB), X0
VPUNPCKHBW X1, X0, X3
PUNPCKLBW X1, X0
VPSHUFB X0, X15, X1
VPSHUFB X3, X15, X2
MOVOU X2, -16(DI)(BX*2)
MOVOU X1, -32(DI)(BX*2)
SUBQ $16, BX
JZ ret
CMPQ BX, $16
JAE bigloop
tail:
CMPQ BX, $2
JB tail_in_1
JE tail_in_2
CMPQ BX, $4
JB tail_in_3
JE tail_in_4
CMPQ BX, $6
JB tail_in_5
JE tail_in_6
CMPQ BX, $8
JB tail_in_7
tail_in_8:
MOVQ (SI), X0
JMP tail_conv
tail_in_7:
PINSRB $6, 6(SI), X0
tail_in_6:
PINSRB $5, 5(SI), X0
tail_in_5:
PINSRB $4, 4(SI), X0
tail_in_4:
PINSRD $0, (SI), X0
JMP tail_conv
tail_in_3:
PINSRB $2, 2(SI), X0
tail_in_2:
PINSRB $1, 1(SI), X0
tail_in_1:
PINSRB $0, (SI), X0
tail_conv:
VPAND encodeMask<>(SB), X0, X1
PSRLW $4, X0
PAND encodeMask<>(SB), X0
PUNPCKLBW X1, X0
VPSHUFB X0, X15, X1
CMPQ BX, $2
JB tail_out_1
JE tail_out_2
CMPQ BX, $4
JB tail_out_3
JE tail_out_4
CMPQ BX, $6
JB tail_out_5
JE tail_out_6
CMPQ BX, $8
JB tail_out_7
tail_out_8:
MOVOU X1, (DI)
SUBQ $8, BX
JZ ret
ADDQ $8, SI
ADDQ $16, DI
JMP tail
tail_out_7:
PEXTRB $13, X1, 13(DI)
PEXTRB $12, X1, 12(DI)
tail_out_6:
PEXTRB $11, X1, 11(DI)
PEXTRB $10, X1, 10(DI)
tail_out_5:
PEXTRB $9, X1, 9(DI)
PEXTRB $8, X1, 8(DI)
tail_out_4:
MOVQ X1, (DI)
RET
tail_out_3:
PEXTRB $5, X1, 5(DI)
PEXTRB $4, X1, 4(DI)
tail_out_2:
PEXTRB $3, X1, 3(DI)
PEXTRB $2, X1, 2(DI)
tail_out_1:
PEXTRB $1, X1, 1(DI)
PEXTRB $0, X1, (DI)
ret:
RET
TEXT ·encodeSSE(SB),NOSPLIT,$0
MOVQ dst+0(FP), DI
MOVQ src+8(FP), SI
MOVQ len+16(FP), BX
MOVQ alpha+24(FP), DX
MOVOU (DX), X15
CMPQ BX, $16
JB tail
bigloop:
MOVOU -16(SI)(BX*1), X0
MOVOU X0, X1
PAND encodeMask<>(SB), X1
PSRLW $4, X0
PAND encodeMask<>(SB), X0
MOVOU X0, X3
PUNPCKHBW X1, X3
PUNPCKLBW X1, X0
MOVOU X15, X1
PSHUFB X0, X1
MOVOU X15, X2
PSHUFB X3, X2
MOVOU X2, -16(DI)(BX*2)
MOVOU X1, -32(DI)(BX*2)
SUBQ $16, BX
JZ ret
CMPQ BX, $16
JAE bigloop
tail:
CMPQ BX, $2
JB tail_in_1
JE tail_in_2
CMPQ BX, $4
JB tail_in_3
JE tail_in_4
CMPQ BX, $6
JB tail_in_5
JE tail_in_6
CMPQ BX, $8
JB tail_in_7
tail_in_8:
MOVQ (SI), X0
JMP tail_conv
tail_in_7:
PINSRB $6, 6(SI), X0
tail_in_6:
PINSRB $5, 5(SI), X0
tail_in_5:
PINSRB $4, 4(SI), X0
tail_in_4:
PINSRD $0, (SI), X0
JMP tail_conv
tail_in_3:
PINSRB $2, 2(SI), X0
tail_in_2:
PINSRB $1, 1(SI), X0
tail_in_1:
PINSRB $0, (SI), X0
tail_conv:
MOVOU X0, X1
PAND encodeMask<>(SB), X1
PSRLW $4, X0
PAND encodeMask<>(SB), X0
PUNPCKLBW X1, X0
MOVOU X15, X1
PSHUFB X0, X1
CMPQ BX, $2
JB tail_out_1
JE tail_out_2
CMPQ BX, $4
JB tail_out_3
JE tail_out_4
CMPQ BX, $6
JB tail_out_5
JE tail_out_6
CMPQ BX, $8
JB tail_out_7
tail_out_8:
MOVOU X1, (DI)
SUBQ $8, BX
JZ ret
ADDQ $8, SI
ADDQ $16, DI
JMP tail
tail_out_7:
PEXTRB $13, X1, 13(DI)
PEXTRB $12, X1, 12(DI)
tail_out_6:
PEXTRB $11, X1, 11(DI)
PEXTRB $10, X1, 10(DI)
tail_out_5:
PEXTRB $9, X1, 9(DI)
PEXTRB $8, X1, 8(DI)
tail_out_4:
MOVQ X1, (DI)
RET
tail_out_3:
PEXTRB $5, X1, 5(DI)
PEXTRB $4, X1, 4(DI)
tail_out_2:
PEXTRB $3, X1, 3(DI)
PEXTRB $2, X1, 2(DI)
tail_out_1:
PEXTRB $1, X1, 1(DI)
PEXTRB $0, X1, (DI)
ret:
RET