x86-64: Add vector tan/tanf implementation to libmvec

Implement vectorized tan/tanf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector tan/tanf with regenerated ulps. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
author: Sunil K Pandey <skpgkp2@gmail.com> 2021-12-29 10:19:39 -0800
committer: Sunil K Pandey <skpgkp2@gmail.com> 2021-12-30 10:19:13 -0800
commit: c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c (patch)
tree: 0aa72249cd873496766c00736e08bd13ddd23b9d
parent: 8881cca8fb8d3a7ee89d174017dd27eded90366c (diff)
download: glibc-c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c.tar.xz
glibc-c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c.zip
50 files changed, 21913 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h
index e716664306..4b63a53742 100644
--- a/bits/libm-simd-decl-stubs.h
+++ b/bits/libm-simd-decl-stubs.h
@@ -318,4 +318,15 @@
 #define __DECL_SIMD_erfcf32x
 #define __DECL_SIMD_erfcf64x
 #define __DECL_SIMD_erfcf128x
+
+#define __DECL_SIMD_tan
+#define __DECL_SIMD_tanf
+#define __DECL_SIMD_tanl
+#define __DECL_SIMD_tanf16
+#define __DECL_SIMD_tanf32
+#define __DECL_SIMD_tanf64
+#define __DECL_SIMD_tanf128
+#define __DECL_SIMD_tanf32x
+#define __DECL_SIMD_tanf64x
+#define __DECL_SIMD_tanf128x
 #endif
diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h
index 24e28b5c4f..999ae4d703 100644
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@@ -63,7 +63,7 @@ __MATHCALL_VEC (cos,, (_Mdouble_ __x));
 /* Sine of X.  */
 __MATHCALL_VEC (sin,, (_Mdouble_ __x));
 /* Tangent of X.  */
-__MATHCALL (tan,, (_Mdouble_ __x));
+__MATHCALL_VEC (tan,, (_Mdouble_ __x));
 
 /* Hyperbolic functions.  */
 
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index e9e98bab65..055530f3ab 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -63,6 +63,7 @@ GLIBC_2.35 _ZGVbN2v_log10 F
 GLIBC_2.35 _ZGVbN2v_log1p F
 GLIBC_2.35 _ZGVbN2v_log2 F
 GLIBC_2.35 _ZGVbN2v_sinh F
+GLIBC_2.35 _ZGVbN2v_tan F
 GLIBC_2.35 _ZGVbN2v_tanh F
 GLIBC_2.35 _ZGVbN2vv_atan2 F
 GLIBC_2.35 _ZGVbN2vv_hypot F
@@ -83,6 +84,7 @@ GLIBC_2.35 _ZGVbN4v_log10f F
 GLIBC_2.35 _ZGVbN4v_log1pf F
 GLIBC_2.35 _ZGVbN4v_log2f F
 GLIBC_2.35 _ZGVbN4v_sinhf F
+GLIBC_2.35 _ZGVbN4v_tanf F
 GLIBC_2.35 _ZGVbN4v_tanhf F
 GLIBC_2.35 _ZGVbN4vv_atan2f F
 GLIBC_2.35 _ZGVbN4vv_hypotf F
@@ -103,6 +105,7 @@ GLIBC_2.35 _ZGVcN4v_log10 F
 GLIBC_2.35 _ZGVcN4v_log1p F
 GLIBC_2.35 _ZGVcN4v_log2 F
 GLIBC_2.35 _ZGVcN4v_sinh F
+GLIBC_2.35 _ZGVcN4v_tan F
 GLIBC_2.35 _ZGVcN4v_tanh F
 GLIBC_2.35 _ZGVcN4vv_atan2 F
 GLIBC_2.35 _ZGVcN4vv_hypot F
@@ -123,6 +126,7 @@ GLIBC_2.35 _ZGVcN8v_log10f F
 GLIBC_2.35 _ZGVcN8v_log1pf F
 GLIBC_2.35 _ZGVcN8v_log2f F
 GLIBC_2.35 _ZGVcN8v_sinhf F
+GLIBC_2.35 _ZGVcN8v_tanf F
 GLIBC_2.35 _ZGVcN8v_tanhf F
 GLIBC_2.35 _ZGVcN8vv_atan2f F
 GLIBC_2.35 _ZGVcN8vv_hypotf F
@@ -143,6 +147,7 @@ GLIBC_2.35 _ZGVdN4v_log10 F
 GLIBC_2.35 _ZGVdN4v_log1p F
 GLIBC_2.35 _ZGVdN4v_log2 F
 GLIBC_2.35 _ZGVdN4v_sinh F
+GLIBC_2.35 _ZGVdN4v_tan F
 GLIBC_2.35 _ZGVdN4v_tanh F
 GLIBC_2.35 _ZGVdN4vv_atan2 F
 GLIBC_2.35 _ZGVdN4vv_hypot F
@@ -163,6 +168,7 @@ GLIBC_2.35 _ZGVdN8v_log10f F
 GLIBC_2.35 _ZGVdN8v_log1pf F
 GLIBC_2.35 _ZGVdN8v_log2f F
 GLIBC_2.35 _ZGVdN8v_sinhf F
+GLIBC_2.35 _ZGVdN8v_tanf F
 GLIBC_2.35 _ZGVdN8v_tanhf F
 GLIBC_2.35 _ZGVdN8vv_atan2f F
 GLIBC_2.35 _ZGVdN8vv_hypotf F
@@ -183,6 +189,7 @@ GLIBC_2.35 _ZGVeN16v_log10f F
 GLIBC_2.35 _ZGVeN16v_log1pf F
 GLIBC_2.35 _ZGVeN16v_log2f F
 GLIBC_2.35 _ZGVeN16v_sinhf F
+GLIBC_2.35 _ZGVeN16v_tanf F
 GLIBC_2.35 _ZGVeN16v_tanhf F
 GLIBC_2.35 _ZGVeN16vv_atan2f F
 GLIBC_2.35 _ZGVeN16vv_hypotf F
@@ -203,6 +210,7 @@ GLIBC_2.35 _ZGVeN8v_log10 F
 GLIBC_2.35 _ZGVeN8v_log1p F
 GLIBC_2.35 _ZGVeN8v_log2 F
 GLIBC_2.35 _ZGVeN8v_sinh F
+GLIBC_2.35 _ZGVeN8v_tan F
 GLIBC_2.35 _ZGVeN8v_tanh F
 GLIBC_2.35 _ZGVeN8vv_atan2 F
 GLIBC_2.35 _ZGVeN8vv_hypot F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index 9a55e2e542..8de1d6aee9 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -138,6 +138,10 @@
 #  define __DECL_SIMD_erfc __DECL_SIMD_x86_64
 #  undef __DECL_SIMD_erfcf
 #  define __DECL_SIMD_erfcf __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_tan
+#  define __DECL_SIMD_tan __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_tanf
+#  define __DECL_SIMD_tanf __DECL_SIMD_x86_64
 
 # endif
 #endif
diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
index 818134dc75..d2735fd064 100644
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@@ -68,6 +68,8 @@
 !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (erfc) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (erfcf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (tan) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (tanf) attributes simd (notinbranch) if('x86_64')
 
 !GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -121,3 +123,5 @@
 !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (erfc) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (erfcf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (tan) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (tanf) attributes simd (notinbranch) if('x32')
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index be1d6cbb92..7d76f7b55d 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -47,6 +47,7 @@ libmvec-funcs = \
   sin \
   sincos \
   sinh \
+  tan \
   tanh \
 
 # Define libmvec function for benchtests directory.
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index b10ae69894..63d6b61d0b 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -31,6 +31,7 @@ libmvec {
     _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p;
     _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2;
     _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh;
+    _ZGVbN2v_tan; _ZGVcN4v_tan; _ZGVdN4v_tan; _ZGVeN8v_tan;
     _ZGVbN2v_tanh; _ZGVcN4v_tanh; _ZGVdN4v_tanh; _ZGVeN8v_tanh;
     _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2;
     _ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot;
@@ -51,6 +52,7 @@ libmvec {
     _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf;
     _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f;
     _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf;
+    _ZGVbN4v_tanf; _ZGVcN8v_tanf; _ZGVdN8v_tanf; _ZGVeN16v_tanf;
     _ZGVbN4v_tanhf; _ZGVcN8v_tanhf; _ZGVdN8v_tanhf; _ZGVeN16v_tanhf;
     _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f;
     _ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf;
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index f3ee98358f..325e4b5b84 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -2080,6 +2080,26 @@ float: 1
 float128: 1
 ldouble: 2
 
+Function: "tan_vlen16":
+float: 1
+
+Function: "tan_vlen2":
+double: 2
+
+Function: "tan_vlen4":
+double: 2
+float: 2
+
+Function: "tan_vlen4_avx2":
+double: 1
+
+Function: "tan_vlen8":
+double: 2
+float: 2
+
+Function: "tan_vlen8_avx2":
+float: 2
+
 Function: "tanh":
 double: 2
 float: 2
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S
new file mode 100644
index 0000000000..a8f5a390cd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized tan, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_tan _ZGVbN2v_tan_sse2
+#include "../svml_d_tan2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c
new file mode 100644
index 0000000000..4c41025a23
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized tan, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_tan
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_tan, __GI__ZGVbN2v_tan, __redirect__ZGVbN2v_tan)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
new file mode 100644
index 0000000000..2c8bdb59d9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
@@ -0,0 +1,6259 @@
+/* Function tan vect
author	Sunil K Pandey <skpgkp2@gmail.com>	2021-12-29 10:19:39 -0800
committer	Sunil K Pandey <skpgkp2@gmail.com>	2021-12-30 10:19:13 -0800
commit	c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c (patch)
tree	0aa72249cd873496766c00736e08bd13ddd23b9d
parent	8881cca8fb8d3a7ee89d174017dd27eded90366c (diff)
download	glibc-c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c.tar.xz glibc-c21c7bc24ec58c516d1608695d9c5a86f2f2fc0c.zip