diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 08:29:26 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 11:37:03 -0800 |
| commit | 11c01de14c879ffc8dbac8ce32242a7552cbd4ad (patch) | |
| tree | 70ec1403eab4be667adb5ce5a4c50f5b9da87e07 /sysdeps | |
| parent | 146310177aa9f2c7d990ef856ed6e8bb94407f06 (diff) | |
| download | glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.tar.xz glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.zip | |
x86-64: Add vector asin/asinf implementation to libmvec
Implement vectorized asin/asinf containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI. It also contains
accuracy and ABI tests for vector asin/asinf with regenerated ulps.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Diffstat (limited to 'sysdeps')
48 files changed, 2177 insertions, 0 deletions
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index a93258db6f..ab03a07f92 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -47,18 +47,26 @@ GLIBC_2.22 _ZGVeN8v_sin F GLIBC_2.22 _ZGVeN8vv_pow F GLIBC_2.22 _ZGVeN8vvv_sincos F GLIBC_2.35 _ZGVbN2v_acos F +GLIBC_2.35 _ZGVbN2v_asin F GLIBC_2.35 _ZGVbN2v_atan F GLIBC_2.35 _ZGVbN4v_acosf F +GLIBC_2.35 _ZGVbN4v_asinf F GLIBC_2.35 _ZGVbN4v_atanf F GLIBC_2.35 _ZGVcN4v_acos F +GLIBC_2.35 _ZGVcN4v_asin F GLIBC_2.35 _ZGVcN4v_atan F GLIBC_2.35 _ZGVcN8v_acosf F +GLIBC_2.35 _ZGVcN8v_asinf F GLIBC_2.35 _ZGVcN8v_atanf F GLIBC_2.35 _ZGVdN4v_acos F +GLIBC_2.35 _ZGVdN4v_asin F GLIBC_2.35 _ZGVdN4v_atan F GLIBC_2.35 _ZGVdN8v_acosf F +GLIBC_2.35 _ZGVdN8v_asinf F GLIBC_2.35 _ZGVdN8v_atanf F GLIBC_2.35 _ZGVeN16v_acosf F +GLIBC_2.35 _ZGVeN16v_asinf F GLIBC_2.35 _ZGVeN16v_atanf F GLIBC_2.35 _ZGVeN8v_acos F +GLIBC_2.35 _ZGVeN8v_asin F GLIBC_2.35 _ZGVeN8v_atan F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 1c0e5c5e35..73cb8849ff 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -66,6 +66,10 @@ # define __DECL_SIMD_atan __DECL_SIMD_x86_64 # undef __DECL_SIMD_atanf # define __DECL_SIMD_atanf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_asin +# define __DECL_SIMD_asin __DECL_SIMD_x86_64 +# undef __DECL_SIMD_asinf +# define __DECL_SIMD_asinf __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index ddcccb11d7..4552c2bdfa 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -32,6 +32,8 @@ !GCC$ builtin (acosf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (atan) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (atanf) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (asin) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (asinf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -49,3 +51,5 @@ !GCC$ builtin (acosf) attributes simd (notinbranch) if('x32') !GCC$ builtin (atan) attributes simd (notinbranch) if('x32') !GCC$ builtin (atanf) attributes simd (notinbranch) if('x32') +!GCC$ builtin (asin) attributes simd (notinbranch) if('x32') +!GCC$ builtin (asinf) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index dae0887f13..e0eae0b196 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -23,6 +23,7 @@ postclean-generated += libmvec.mk # Define for both math and mathvec directories. libmvec-funcs = \ acos \ + asin \ atan \ cos \ exp \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index 424f6d526e..10baf869a5 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -15,8 +15,10 @@ libmvec { } GLIBC_2.35 { _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos; + _ZGVbN2v_asin; _ZGVcN4v_asin; _ZGVdN4v_asin; _ZGVeN8v_asin; _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan; _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf; + _ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf; _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf; } } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 2e64e59803..ea0f833381 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -93,6 +93,26 @@ float: 1 float128: 2 ldouble: 1 +Function: "asin_vlen16": +float: 1 + +Function: "asin_vlen2": +double: 1 + +Function: "asin_vlen4": +double: 1 +float: 1 + +Function: "asin_vlen4_avx2": +double: 1 + +Function: "asin_vlen8": +double: 1 +float: 1 + +Function: "asin_vlen8_avx2": +float: 1 + Function: "asinh": double: 2 float: 2 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S new file mode 100644 index 0000000000..57e1d41a7b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized asin, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_asin _ZGVbN2v_asin_sse2 +#include "../svml_d_asin2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c new file mode 100644 index 0000000000..e46c3af81e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized asin, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_asin +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_asin, __GI__ZGVbN2v_asin, __redirect__ZGVbN2v_asin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S new file mode 100644 index 0000000000..a6f7a41623 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S @@ -0,0 +1,288 @@ +/* Function asin vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * SelMask = (|x| >= 0.5) ? 1 : 0; + * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x| + * asin(x) = (SelMask ? (Pi/2 - 2*Poly(R)) : Poly(R))*(-1)^sign(x) + * + */ + +/* Offsets for data table __svml_dasin_data_internal + */ +#define AbsMask 0 +#define OneHalf 16 +#define SmallNorm 32 +#define One 48 +#define Two 64 +#define sqrt_coeff 80 +#define poly_coeff 144 +#define Pi2H 336 + +#include <sysdep.h> + + .text + .section .text.sse4,"ax",@progbits +ENTRY(_ZGVbN2v_asin_sse4) + subq $72, %rsp + cfi_def_cfa_offset(80) + movaps %xmm0, %xmm5 + movups __svml_dasin_data_internal(%rip), %xmm3 + movups OneHalf+__svml_dasin_data_internal(%rip), %xmm8 + +/* x = |arg| */ + movaps %xmm3, %xmm4 + andps %xmm5, %xmm4 + +/* Y = 0.5 - 0.5*x */ + movaps %xmm8, %xmm6 + mulpd %xmm4, %xmm6 + movaps %xmm8, %xmm14 + +/* x^2 */ + movaps %xmm4, %xmm2 + subpd %xmm6, %xmm14 + mulpd %xmm4, %xmm2 + +/* S ~ -2*sqrt(Y) */ + cvtpd2ps %xmm14, %xmm9 + minpd %xmm14, %xmm2 + movlhps %xmm9, %xmm9 + movaps %xmm14, %xmm15 + rsqrtps %xmm9, %xmm10 + cmpltpd SmallNorm+__svml_dasin_data_internal(%rip), %xmm15 + addpd %xmm14, %xmm14 + cvtps2pd %xmm10, %xmm11 + andnps %xmm11, %xmm15 + movaps %xmm4, %xmm1 + movaps %xmm15, %xmm12 + andnps %xmm5, %xmm3 + mulpd %xmm15, %xmm12 + mulpd %xmm14, %xmm15 + mulpd %xmm12, %xmm14 + cmpnltpd %xmm8, %xmm1 + subpd Two+__svml_dasin_data_internal(%rip), %xmm14 + +/* polynomial */ + movups poly_coeff+__svml_dasin_data_internal(%rip), %xmm6 + movaps %xmm2, %xmm12 + mulpd %xmm2, %xmm6 + mulpd %xmm2, %xmm12 + addpd poly_coeff+16+__svml_dasin_data_internal(%rip), %xmm6 + movups One+__svml_dasin_data_internal(%rip), %xmm7 + movaps %xmm12, %xmm8 |
