diff options
50 files changed, 1741 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 2ccdd1fc53..b4647ca918 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -109,4 +109,15 @@ #define __DECL_SIMD_acosf32x #define __DECL_SIMD_acosf64x #define __DECL_SIMD_acosf128x + +#define __DECL_SIMD_atan +#define __DECL_SIMD_atanf +#define __DECL_SIMD_atanl +#define __DECL_SIMD_atanf16 +#define __DECL_SIMD_atanf32 +#define __DECL_SIMD_atanf64 +#define __DECL_SIMD_atanf128 +#define __DECL_SIMD_atanf32x +#define __DECL_SIMD_atanf64x +#define __DECL_SIMD_atanf128x #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 2cc6654208..3e27c21f21 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -54,7 +54,7 @@ __MATHCALL_VEC (acos,, (_Mdouble_ __x)); /* Arc sine of X. */ __MATHCALL (asin,, (_Mdouble_ __x)); /* Arc tangent of X. */ -__MATHCALL (atan,, (_Mdouble_ __x)); +__MATHCALL_VEC (atan,, (_Mdouble_ __x)); /* Arc tangent of Y/X. */ __MATHCALL (atan2,, (_Mdouble_ __y, _Mdouble_ __x)); diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index b37b55777e..a93258db6f 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -47,10 +47,18 @@ GLIBC_2.22 _ZGVeN8v_sin F GLIBC_2.22 _ZGVeN8vv_pow F GLIBC_2.22 _ZGVeN8vvv_sincos F GLIBC_2.35 _ZGVbN2v_acos F +GLIBC_2.35 _ZGVbN2v_atan F GLIBC_2.35 _ZGVbN4v_acosf F +GLIBC_2.35 _ZGVbN4v_atanf F GLIBC_2.35 _ZGVcN4v_acos F +GLIBC_2.35 _ZGVcN4v_atan F GLIBC_2.35 _ZGVcN8v_acosf F +GLIBC_2.35 _ZGVcN8v_atanf F GLIBC_2.35 _ZGVdN4v_acos F +GLIBC_2.35 _ZGVdN4v_atan F GLIBC_2.35 _ZGVdN8v_acosf F +GLIBC_2.35 _ZGVdN8v_atanf F GLIBC_2.35 _ZGVeN16v_acosf F +GLIBC_2.35 _ZGVeN16v_atanf F GLIBC_2.35 _ZGVeN8v_acos F +GLIBC_2.35 _ZGVeN8v_atan F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index dabb74cbb9..1c0e5c5e35 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -62,6 +62,10 @@ # define __DECL_SIMD_acos __DECL_SIMD_x86_64 # undef __DECL_SIMD_acosf # define __DECL_SIMD_acosf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_atan +# define __DECL_SIMD_atan __DECL_SIMD_x86_64 +# undef __DECL_SIMD_atanf +# define __DECL_SIMD_atanf __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index 4bcbd1fbce..ddcccb11d7 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -30,6 +30,8 @@ !GCC$ builtin (powf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (acos) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (acosf) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (atan) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (atanf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -45,3 +47,5 @@ !GCC$ builtin (powf) attributes simd (notinbranch) if('x32') !GCC$ builtin (acos) attributes simd (notinbranch) if('x32') !GCC$ builtin (acosf) attributes simd (notinbranch) if('x32') +!GCC$ builtin (atan) attributes simd (notinbranch) if('x32') +!GCC$ builtin (atanf) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index 7acf1f306c..dae0887f13 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -23,6 +23,7 @@ postclean-generated += libmvec.mk # Define for both math and mathvec directories. libmvec-funcs = \ acos \ + atan \ cos \ exp \ log \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index 2985fe7ca7..424f6d526e 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -15,6 +15,8 @@ libmvec { } GLIBC_2.35 { _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos; + _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan; _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf; + _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf; } } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 6c12976c82..2e64e59803 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -164,6 +164,26 @@ float: 2 float128: 2 ldouble: 1 +Function: "atan_vlen16": +float: 1 + +Function: "atan_vlen2": +double: 1 + +Function: "atan_vlen4": +double: 1 +float: 1 + +Function: "atan_vlen4_avx2": +double: 1 + +Function: "atan_vlen8": +double: 1 +float: 1 + +Function: "atan_vlen8_avx2": +float: 1 + Function: "atanh": double: 2 float: 2 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S new file mode 100644 index 0000000000..115e5223aa --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized atan, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_atan _ZGVbN2v_atan_sse2 +#include "../svml_d_atan2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c new file mode 100644 index 0000000000..93f079ffcb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized atan, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_atan +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_atan, __GI__ZGVbN2v_atan, __redirect__ZGVbN2v_atan) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S new file mode 100644 index 0000000000..f0ad036b9e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S @@ -0,0 +1,245 @@ +/* Function atan vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * For 0.0 <= x <= 7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x) + * For 7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x) + * For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x) + * For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x) + * For 39.0/16.0 <= x <= inf : atan(x) = atan(inf) + atan(s), where s=-1.0/x + * Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16. + * + */ + +/* Offsets for data table __svml_datan_data_internal_avx512 + */ +#define AbsMask 0 +#define Shifter 16 +#define MaxThreshold 32 +#define MOne 48 +#define One 64 +#define LargeX 80 +#define Zero 96 +#define Tbl_H 112 +#define Tbl_L 368 +#define dIndexMed 624 +#define Pi2 640 +#define Pi2_low 656 +#define coeff 672 + +#include <sysdep.h> + + .text + .section .text.sse4,"ax",@progbits +ENTRY(_ZGVbN2v_atan_sse4) + lea Tbl_H+128+__svml_datan_data_internal_avx512(%rip), %rcx + movups __svml_datan_data_internal_avx512(%rip), %xmm4 + movups Shifter+__svml_datan_data_internal_avx512(%rip), %xmm3 + andps %xmm0, %xmm4 + movaps %xmm3, %xmm12 + movaps %xmm4, %xmm5 + addpd %xmm4, %xmm12 + movaps %xmm12, %xmm7 + +/* + * table lookup sequence + * VPERMUTE not available + */ + movaps %xmm12, %xmm10 + subpd %xmm3, %xmm7 + subpd %xmm7, %xmm5 + mulpd %xmm4, %xmm7 + movups MaxThreshold+__svml_datan_data_internal_avx512(%rip), %xmm2 + psllq $3, %xmm10 + +/* saturate X range */ + movups LargeX+__svml_datan_data_internal_avx512(%rip), %xmm8 + pxor %xmm4, %xmm0 + cmplepd %xmm4, %xmm2 + addpd One+__svml_datan_data_internal_avx512(%rip), %xmm7 + minpd %xmm4, %xmm8 + movups MOne+__svml_datan_data_intern |
