mirror of https://go.googlesource.com/go
157 lines
4.9 KiB
ArmAsm
157 lines
4.9 KiB
ArmAsm
// Copyright 2017 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
#include "textflag.h"
|
|
|
|
// Minimax polynomial coefficients and other constants
|
|
DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
|
|
DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
|
|
DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
|
|
DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
|
|
DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
|
|
DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
|
|
DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
|
|
DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
|
|
DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
|
|
GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72
|
|
|
|
// Index tables
|
|
DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
|
|
DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
|
|
DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
|
|
DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
|
|
DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
|
|
DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
|
|
DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
|
|
DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
|
|
DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
|
|
DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
|
|
GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80
|
|
|
|
DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
|
|
DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
|
|
DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
|
|
DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
|
|
DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
|
|
DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
|
|
DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
|
|
DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
|
|
DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
|
|
DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
|
|
DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
|
|
DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
|
|
DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
|
|
DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
|
|
DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
|
|
DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
|
|
GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128
|
|
|
|
DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
|
|
DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
|
|
DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
|
|
DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
|
|
DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
|
|
DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
|
|
DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
|
|
DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
|
|
DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
|
|
DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
|
|
DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
|
|
DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
|
|
DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
|
|
DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
|
|
DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
|
|
DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
|
|
GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128
|
|
|
|
// Cbrt returns the cube root of the argument.
|
|
//
|
|
// Special cases are:
|
|
// Cbrt(±0) = ±0
|
|
// Cbrt(±Inf) = ±Inf
|
|
// Cbrt(NaN) = NaN
|
|
// The algorithm used is minimax polynomial approximation
|
|
// with coefficients determined with a Remez exchange algorithm.
|
|
|
|
TEXT ·cbrtAsm(SB), NOSPLIT, $0-16
|
|
FMOVD x+0(FP), F0
|
|
MOVD $·cbrtrodataL9<>+0(SB), R9
|
|
LGDR F0, R2
|
|
WORD $0xC039000F //iilf %r3,1048575
|
|
BYTE $0xFF
|
|
BYTE $0xFF
|
|
SRAD $32, R2
|
|
WORD $0xB9170012 //llgtr %r1,%r2
|
|
MOVW R1, R6
|
|
MOVW R3, R7
|
|
CMPBLE R6, R7, L2
|
|
WORD $0xC0397FEF //iilf %r3,2146435071
|
|
BYTE $0xFF
|
|
BYTE $0xFF
|
|
MOVW R3, R7
|
|
CMPBLE R6, R7, L8
|
|
L1:
|
|
FMOVD F0, ret+8(FP)
|
|
RET
|
|
L3:
|
|
L2:
|
|
LTDBR F0, F0
|
|
BEQ L1
|
|
FMOVD F0, F2
|
|
WORD $0xED209040 //mdb %f2,.L10-.L9(%r9)
|
|
BYTE $0x00
|
|
BYTE $0x1C
|
|
MOVH $0x200, R4
|
|
LGDR F2, R2
|
|
SRAD $32, R2
|
|
L4:
|
|
RISBGZ $57, $62, $39, R2, R3
|
|
MOVD $·cbrttab12067<>+0(SB), R1
|
|
WORD $0x48131000 //lh %r1,0(%r3,%r1)
|
|
RISBGZ $57, $62, $45, R2, R3
|
|
MOVD $·cbrttab22068<>+0(SB), R5
|
|
RISBGNZ $60, $63, $48, R2, R2
|
|
WORD $0x4A135000 //ah %r1,0(%r3,%r5)
|
|
BYTE $0x18 //lr %r3,%r1
|
|
BYTE $0x31
|
|
MOVD $·cbrttab32069<>+0(SB), R1
|
|
FMOVD 56(R9), F1
|
|
FMOVD 48(R9), F5
|
|
WORD $0xEC23393B //rosbg %r2,%r3,57,59,4
|
|
BYTE $0x04
|
|
BYTE $0x56
|
|
WORD $0xE3121000 //llc %r1,0(%r2,%r1)
|
|
BYTE $0x00
|
|
BYTE $0x94
|
|
ADDW R3, R1
|
|
ADDW R4, R1
|
|
SLW $16, R1, R1
|
|
SLD $32, R1, R1
|
|
LDGR R1, F2
|
|
WFMDB V2, V2, V4
|
|
WFMDB V4, V0, V6
|
|
WFMSDB V4, V6, V2, V4
|
|
FMOVD 40(R9), F6
|
|
FMSUB F1, F4, F2
|
|
FMOVD 32(R9), F4
|
|
WFMDB V2, V2, V3
|
|
FMOVD 24(R9), F1
|
|
FMUL F3, F0
|
|
FMOVD 16(R9), F3
|
|
WFMADB V2, V0, V5, V2
|
|
FMOVD 8(R9), F5
|
|
FMADD F6, F2, F4
|
|
WFMADB V2, V1, V3, V1
|
|
WFMDB V2, V2, V6
|
|
FMOVD 0(R9), F3
|
|
WFMADB V4, V6, V1, V4
|
|
WFMADB V2, V5, V3, V2
|
|
FMADD F4, F6, F2
|
|
FMADD F2, F0, F0
|
|
FMOVD F0, ret+8(FP)
|
|
RET
|
|
L8:
|
|
MOVH $0x0, R4
|
|
BR L4
|