aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bits/libm-simd-decl-stubs.h11
-rw-r--r--math/bits/mathcalls.h2
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/libmvec.abilist8
-rw-r--r--sysdeps/x86/fpu/bits/math-vector.h4
-rw-r--r--sysdeps/x86/fpu/finclude/math-vector-fortran.h4
-rw-r--r--sysdeps/x86_64/fpu/Makeconfig1
-rw-r--r--sysdeps/x86_64/fpu/Versions2
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S245
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan4_core_avx2.S225
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_atan8_core_avx512.S213
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S174
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf4_core_sse4.S164
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_atanf8_core_avx2.S148
-rw-r--r--sysdeps/x86_64/fpu/svml_d_atan2_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_atan4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_atan4_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_d_atan8_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_s_atanf16_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_s_atanf4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_s_atanf8_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_s_atanf8_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-atan-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-atan-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-atan-avx512f.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-atan.c3
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-atanf-avx512f.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-atanf.c3
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c1
50 files changed, 1741 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h
index 2ccdd1fc53..b4647ca918 100644
--- a/bits/libm-simd-decl-stubs.h
+++ b/bits/libm-simd-decl-stubs.h
@@ -109,4 +109,15 @@
#define __DECL_SIMD_acosf32x
#define __DECL_SIMD_acosf64x
#define __DECL_SIMD_acosf128x
+
+#define __DECL_SIMD_atan
+#define __DECL_SIMD_atanf
+#define __DECL_SIMD_atanl
+#define __DECL_SIMD_atanf16
+#define __DECL_SIMD_atanf32
+#define __DECL_SIMD_atanf64
+#define __DECL_SIMD_atanf128
+#define __DECL_SIMD_atanf32x
+#define __DECL_SIMD_atanf64x
+#define __DECL_SIMD_atanf128x
#endif
diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h
index 2cc6654208..3e27c21f21 100644
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@@ -54,7 +54,7 @@ __MATHCALL_VEC (acos,, (_Mdouble_ __x));
/* Arc sine of X. */
__MATHCALL (asin,, (_Mdouble_ __x));
/* Arc tangent of X. */
-__MATHCALL (atan,, (_Mdouble_ __x));
+__MATHCALL_VEC (atan,, (_Mdouble_ __x));
/* Arc tangent of Y/X. */
__MATHCALL (atan2,, (_Mdouble_ __y, _Mdouble_ __x));
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index b37b55777e..a93258db6f 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -47,10 +47,18 @@ GLIBC_2.22 _ZGVeN8v_sin F
GLIBC_2.22 _ZGVeN8vv_pow F
GLIBC_2.22 _ZGVeN8vvv_sincos F
GLIBC_2.35 _ZGVbN2v_acos F
+GLIBC_2.35 _ZGVbN2v_atan F
GLIBC_2.35 _ZGVbN4v_acosf F
+GLIBC_2.35 _ZGVbN4v_atanf F
GLIBC_2.35 _ZGVcN4v_acos F
+GLIBC_2.35 _ZGVcN4v_atan F
GLIBC_2.35 _ZGVcN8v_acosf F
+GLIBC_2.35 _ZGVcN8v_atanf F
GLIBC_2.35 _ZGVdN4v_acos F
+GLIBC_2.35 _ZGVdN4v_atan F
GLIBC_2.35 _ZGVdN8v_acosf F
+GLIBC_2.35 _ZGVdN8v_atanf F
GLIBC_2.35 _ZGVeN16v_acosf F
+GLIBC_2.35 _ZGVeN16v_atanf F
GLIBC_2.35 _ZGVeN8v_acos F
+GLIBC_2.35 _ZGVeN8v_atan F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index dabb74cbb9..1c0e5c5e35 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -62,6 +62,10 @@
# define __DECL_SIMD_acos __DECL_SIMD_x86_64
# undef __DECL_SIMD_acosf
# define __DECL_SIMD_acosf __DECL_SIMD_x86_64
+# undef __DECL_SIMD_atan
+# define __DECL_SIMD_atan __DECL_SIMD_x86_64
+# undef __DECL_SIMD_atanf
+# define __DECL_SIMD_atanf __DECL_SIMD_x86_64
# endif
#endif
diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
index 4bcbd1fbce..ddcccb11d7 100644
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@@ -30,6 +30,8 @@
!GCC$ builtin (powf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (acos) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (acosf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (atan) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (atanf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -45,3 +47,5 @@
!GCC$ builtin (powf) attributes simd (notinbranch) if('x32')
!GCC$ builtin (acos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (acosf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (atan) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (atanf) attributes simd (notinbranch) if('x32')
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index 7acf1f306c..dae0887f13 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -23,6 +23,7 @@ postclean-generated += libmvec.mk
# Define for both math and mathvec directories.
libmvec-funcs = \
acos \
+ atan \
cos \
exp \
log \
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index 2985fe7ca7..424f6d526e 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -15,6 +15,8 @@ libmvec {
}
GLIBC_2.35 {
_ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos;
+ _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan;
_ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
+ _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf;
}
}
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 6c12976c82..2e64e59803 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -164,6 +164,26 @@ float: 2
float128: 2
ldouble: 1
+Function: "atan_vlen16":
+float: 1
+
+Function: "atan_vlen2":
+double: 1
+
+Function: "atan_vlen4":
+double: 1
+float: 1
+
+Function: "atan_vlen4_avx2":
+double: 1
+
+Function: "atan_vlen8":
+double: 1
+float: 1
+
+Function: "atan_vlen8_avx2":
+float: 1
+
Function: "atanh":
double: 2
float: 2
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S
new file mode 100644
index 0000000000..115e5223aa
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized atan, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_atan _ZGVbN2v_atan_sse2
+#include "../svml_d_atan2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c
new file mode 100644
index 0000000000..93f079ffcb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized atan, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_atan
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_atan, __GI__ZGVbN2v_atan, __redirect__ZGVbN2v_atan)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S
new file mode 100644
index 0000000000..f0ad036b9e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan2_core_sse4.S
@@ -0,0 +1,245 @@
+/* Function atan vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * For 0.0 <= x <= 7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
+ * For 7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
+ * For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
+ * For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
+ * For 39.0/16.0 <= x <= inf : atan(x) = atan(inf) + atan(s), where s=-1.0/x
+ * Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
+ *
+ */
+
+/* Offsets for data table __svml_datan_data_internal_avx512
+ */
+#define AbsMask 0
+#define Shifter 16
+#define MaxThreshold 32
+#define MOne 48
+#define One 64
+#define LargeX 80
+#define Zero 96
+#define Tbl_H 112
+#define Tbl_L 368
+#define dIndexMed 624
+#define Pi2 640
+#define Pi2_low 656
+#define coeff 672
+
+#include <sysdep.h>
+
+ .text
+ .section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN2v_atan_sse4)
+ lea Tbl_H+128+__svml_datan_data_internal_avx512(%rip), %rcx
+ movups __svml_datan_data_internal_avx512(%rip), %xmm4
+ movups Shifter+__svml_datan_data_internal_avx512(%rip), %xmm3
+ andps %xmm0, %xmm4
+ movaps %xmm3, %xmm12
+ movaps %xmm4, %xmm5
+ addpd %xmm4, %xmm12
+ movaps %xmm12, %xmm7
+
+/*
+ * table lookup sequence
+ * VPERMUTE not available
+ */
+ movaps %xmm12, %xmm10
+ subpd %xmm3, %xmm7
+ subpd %xmm7, %xmm5
+ mulpd %xmm4, %xmm7
+ movups MaxThreshold+__svml_datan_data_internal_avx512(%rip), %xmm2
+ psllq $3, %xmm10
+
+/* saturate X range */
+ movups LargeX+__svml_datan_data_internal_avx512(%rip), %xmm8
+ pxor %xmm4, %xmm0
+ cmplepd %xmm4, %xmm2
+ addpd One+__svml_datan_data_internal_avx512(%rip), %xmm7
+ minpd %xmm4, %xmm8
+ movups MOne+__svml_datan_data_intern