diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 10:19:39 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-30 10:19:13 -0800 |
| commit | c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c (patch) | |
| tree | 0aa72249cd873496766c00736e08bd13ddd23b9d | |
| parent | 8881cca8fb8d3a7ee89d174017dd27eded90366c (diff) | |
| download | glibc-c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c.tar.xz glibc-c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c.zip | |
x86-64: Add vector tan/tanf implementation to libmvec
Implement vectorized tan/tanf containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI. It also contains
accuracy and ABI tests for vector tan/tanf with regenerated ulps.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
50 files changed, 21913 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index e716664306..4b63a53742 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -318,4 +318,15 @@ #define __DECL_SIMD_erfcf32x #define __DECL_SIMD_erfcf64x #define __DECL_SIMD_erfcf128x + +#define __DECL_SIMD_tan +#define __DECL_SIMD_tanf +#define __DECL_SIMD_tanl +#define __DECL_SIMD_tanf16 +#define __DECL_SIMD_tanf32 +#define __DECL_SIMD_tanf64 +#define __DECL_SIMD_tanf128 +#define __DECL_SIMD_tanf32x +#define __DECL_SIMD_tanf64x +#define __DECL_SIMD_tanf128x #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 24e28b5c4f..999ae4d703 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -63,7 +63,7 @@ __MATHCALL_VEC (cos,, (_Mdouble_ __x)); /* Sine of X. */ __MATHCALL_VEC (sin,, (_Mdouble_ __x)); /* Tangent of X. */ -__MATHCALL (tan,, (_Mdouble_ __x)); +__MATHCALL_VEC (tan,, (_Mdouble_ __x)); /* Hyperbolic functions. */ diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index e9e98bab65..055530f3ab 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -63,6 +63,7 @@ GLIBC_2.35 _ZGVbN2v_log10 F GLIBC_2.35 _ZGVbN2v_log1p F GLIBC_2.35 _ZGVbN2v_log2 F GLIBC_2.35 _ZGVbN2v_sinh F +GLIBC_2.35 _ZGVbN2v_tan F GLIBC_2.35 _ZGVbN2v_tanh F GLIBC_2.35 _ZGVbN2vv_atan2 F GLIBC_2.35 _ZGVbN2vv_hypot F @@ -83,6 +84,7 @@ GLIBC_2.35 _ZGVbN4v_log10f F GLIBC_2.35 _ZGVbN4v_log1pf F GLIBC_2.35 _ZGVbN4v_log2f F GLIBC_2.35 _ZGVbN4v_sinhf F +GLIBC_2.35 _ZGVbN4v_tanf F GLIBC_2.35 _ZGVbN4v_tanhf F GLIBC_2.35 _ZGVbN4vv_atan2f F GLIBC_2.35 _ZGVbN4vv_hypotf F @@ -103,6 +105,7 @@ GLIBC_2.35 _ZGVcN4v_log10 F GLIBC_2.35 _ZGVcN4v_log1p F GLIBC_2.35 _ZGVcN4v_log2 F GLIBC_2.35 _ZGVcN4v_sinh F +GLIBC_2.35 _ZGVcN4v_tan F GLIBC_2.35 _ZGVcN4v_tanh F GLIBC_2.35 _ZGVcN4vv_atan2 F GLIBC_2.35 _ZGVcN4vv_hypot F @@ -123,6 +126,7 @@ GLIBC_2.35 _ZGVcN8v_log10f F GLIBC_2.35 _ZGVcN8v_log1pf F GLIBC_2.35 _ZGVcN8v_log2f F GLIBC_2.35 _ZGVcN8v_sinhf F +GLIBC_2.35 _ZGVcN8v_tanf F GLIBC_2.35 _ZGVcN8v_tanhf F GLIBC_2.35 _ZGVcN8vv_atan2f F GLIBC_2.35 _ZGVcN8vv_hypotf F @@ -143,6 +147,7 @@ GLIBC_2.35 _ZGVdN4v_log10 F GLIBC_2.35 _ZGVdN4v_log1p F GLIBC_2.35 _ZGVdN4v_log2 F GLIBC_2.35 _ZGVdN4v_sinh F +GLIBC_2.35 _ZGVdN4v_tan F GLIBC_2.35 _ZGVdN4v_tanh F GLIBC_2.35 _ZGVdN4vv_atan2 F GLIBC_2.35 _ZGVdN4vv_hypot F @@ -163,6 +168,7 @@ GLIBC_2.35 _ZGVdN8v_log10f F GLIBC_2.35 _ZGVdN8v_log1pf F GLIBC_2.35 _ZGVdN8v_log2f F GLIBC_2.35 _ZGVdN8v_sinhf F +GLIBC_2.35 _ZGVdN8v_tanf F GLIBC_2.35 _ZGVdN8v_tanhf F GLIBC_2.35 _ZGVdN8vv_atan2f F GLIBC_2.35 _ZGVdN8vv_hypotf F @@ -183,6 +189,7 @@ GLIBC_2.35 _ZGVeN16v_log10f F GLIBC_2.35 _ZGVeN16v_log1pf F GLIBC_2.35 _ZGVeN16v_log2f F GLIBC_2.35 _ZGVeN16v_sinhf F +GLIBC_2.35 _ZGVeN16v_tanf F GLIBC_2.35 _ZGVeN16v_tanhf F GLIBC_2.35 _ZGVeN16vv_atan2f F GLIBC_2.35 _ZGVeN16vv_hypotf F @@ -203,6 +210,7 @@ GLIBC_2.35 _ZGVeN8v_log10 F GLIBC_2.35 _ZGVeN8v_log1p F GLIBC_2.35 _ZGVeN8v_log2 F GLIBC_2.35 _ZGVeN8v_sinh F +GLIBC_2.35 _ZGVeN8v_tan F GLIBC_2.35 _ZGVeN8v_tanh F GLIBC_2.35 _ZGVeN8vv_atan2 F GLIBC_2.35 _ZGVeN8vv_hypot F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 9a55e2e542..8de1d6aee9 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -138,6 +138,10 @@ # define __DECL_SIMD_erfc __DECL_SIMD_x86_64 # undef __DECL_SIMD_erfcf # define __DECL_SIMD_erfcf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_tan +# define __DECL_SIMD_tan __DECL_SIMD_x86_64 +# undef __DECL_SIMD_tanf +# define __DECL_SIMD_tanf __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index 818134dc75..d2735fd064 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -68,6 +68,8 @@ !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (erfc) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (erfcf) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (tan) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (tanf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -121,3 +123,5 @@ !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x32') !GCC$ builtin (erfc) attributes simd (notinbranch) if('x32') !GCC$ builtin (erfcf) attributes simd (notinbranch) if('x32') +!GCC$ builtin (tan) attributes simd (notinbranch) if('x32') +!GCC$ builtin (tanf) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index be1d6cbb92..7d76f7b55d 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -47,6 +47,7 @@ libmvec-funcs = \ sin \ sincos \ sinh \ + tan \ tanh \ # Define libmvec function for benchtests directory. diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index b10ae69894..63d6b61d0b 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -31,6 +31,7 @@ libmvec { _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p; _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2; _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh; + _ZGVbN2v_tan; _ZGVcN4v_tan; _ZGVdN4v_tan; _ZGVeN8v_tan; _ZGVbN2v_tanh; _ZGVcN4v_tanh; _ZGVdN4v_tanh; _ZGVeN8v_tanh; _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2; _ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot; @@ -51,6 +52,7 @@ libmvec { _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf; _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f; _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf; + _ZGVbN4v_tanf; _ZGVcN8v_tanf; _ZGVdN8v_tanf; _ZGVeN16v_tanf; _ZGVbN4v_tanhf; _ZGVcN8v_tanhf; _ZGVdN8v_tanhf; _ZGVeN16v_tanhf; _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f; _ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf; diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index f3ee98358f..325e4b5b84 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -2080,6 +2080,26 @@ float: 1 float128: 1 ldouble: 2 +Function: "tan_vlen16": +float: 1 + +Function: "tan_vlen2": +double: 2 + +Function: "tan_vlen4": +double: 2 +float: 2 + +Function: "tan_vlen4_avx2": +double: 1 + +Function: "tan_vlen8": +double: 2 +float: 2 + +Function: "tan_vlen8_avx2": +float: 2 + Function: "tanh": double: 2 float: 2 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S new file mode 100644 index 0000000000..a8f5a390cd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized tan, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_tan _ZGVbN2v_tan_sse2 +#include "../svml_d_tan2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c new file mode 100644 index 0000000000..4c41025a23 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized tan, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_tan +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_tan, __GI__ZGVbN2v_tan, __redirect__ZGVbN2v_tan) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S new file mode 100644 index 0000000000..2c8bdb59d9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S @@ -0,0 +1,6259 @@ +/* Function tan vect |
