aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2021-12-29 08:29:26 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2021-12-29 11:37:03 -0800
commit11c01de14c879ffc8dbac8ce32242a7552cbd4ad (patch)
tree70ec1403eab4be667adb5ce5a4c50f5b9da87e07 /sysdeps
parent146310177aa9f2c7d990ef856ed6e8bb94407f06 (diff)
downloadglibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.tar.xz
glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.zip
x86-64: Add vector asin/asinf implementation to libmvec
Implement vectorized asin/asinf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector asin/asinf with regenerated ulps. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/libmvec.abilist8
-rw-r--r--sysdeps/x86/fpu/bits/math-vector.h4
-rw-r--r--sysdeps/x86/fpu/finclude/math-vector-fortran.h4
-rw-r--r--sysdeps/x86_64/fpu/Makeconfig1
-rw-r--r--sysdeps/x86_64/fpu/Versions2
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S288
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin4_core_avx2.S273
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_asin8_core_avx512.S295
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf16_core_avx512.S260
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf4_core_sse4.S252
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_asinf8_core_avx2.S249
-rw-r--r--sysdeps/x86_64/fpu/svml_d_asin2_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_asin4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_asin4_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_d_asin8_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_s_asinf16_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_s_asinf4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_s_asinf8_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_s_asinf8_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-asin-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-asin-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-asin-avx512f.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-asin.c3
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-asinf-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-asinf-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-asinf-avx512f.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-asinf.c3
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c1
48 files changed, 2177 insertions, 0 deletions
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index a93258db6f..ab03a07f92 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -47,18 +47,26 @@ GLIBC_2.22 _ZGVeN8v_sin F
GLIBC_2.22 _ZGVeN8vv_pow F
GLIBC_2.22 _ZGVeN8vvv_sincos F
GLIBC_2.35 _ZGVbN2v_acos F
+GLIBC_2.35 _ZGVbN2v_asin F
GLIBC_2.35 _ZGVbN2v_atan F
GLIBC_2.35 _ZGVbN4v_acosf F
+GLIBC_2.35 _ZGVbN4v_asinf F
GLIBC_2.35 _ZGVbN4v_atanf F
GLIBC_2.35 _ZGVcN4v_acos F
+GLIBC_2.35 _ZGVcN4v_asin F
GLIBC_2.35 _ZGVcN4v_atan F
GLIBC_2.35 _ZGVcN8v_acosf F
+GLIBC_2.35 _ZGVcN8v_asinf F
GLIBC_2.35 _ZGVcN8v_atanf F
GLIBC_2.35 _ZGVdN4v_acos F
+GLIBC_2.35 _ZGVdN4v_asin F
GLIBC_2.35 _ZGVdN4v_atan F
GLIBC_2.35 _ZGVdN8v_acosf F
+GLIBC_2.35 _ZGVdN8v_asinf F
GLIBC_2.35 _ZGVdN8v_atanf F
GLIBC_2.35 _ZGVeN16v_acosf F
+GLIBC_2.35 _ZGVeN16v_asinf F
GLIBC_2.35 _ZGVeN16v_atanf F
GLIBC_2.35 _ZGVeN8v_acos F
+GLIBC_2.35 _ZGVeN8v_asin F
GLIBC_2.35 _ZGVeN8v_atan F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index 1c0e5c5e35..73cb8849ff 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -66,6 +66,10 @@
# define __DECL_SIMD_atan __DECL_SIMD_x86_64
# undef __DECL_SIMD_atanf
# define __DECL_SIMD_atanf __DECL_SIMD_x86_64
+# undef __DECL_SIMD_asin
+# define __DECL_SIMD_asin __DECL_SIMD_x86_64
+# undef __DECL_SIMD_asinf
+# define __DECL_SIMD_asinf __DECL_SIMD_x86_64
# endif
#endif
diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
index ddcccb11d7..4552c2bdfa 100644
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@@ -32,6 +32,8 @@
!GCC$ builtin (acosf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (atan) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (atanf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (asin) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (asinf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -49,3 +51,5 @@
!GCC$ builtin (acosf) attributes simd (notinbranch) if('x32')
!GCC$ builtin (atan) attributes simd (notinbranch) if('x32')
!GCC$ builtin (atanf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (asin) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (asinf) attributes simd (notinbranch) if('x32')
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index dae0887f13..e0eae0b196 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -23,6 +23,7 @@ postclean-generated += libmvec.mk
# Define for both math and mathvec directories.
libmvec-funcs = \
acos \
+ asin \
atan \
cos \
exp \
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index 424f6d526e..10baf869a5 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -15,8 +15,10 @@ libmvec {
}
GLIBC_2.35 {
_ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos;
+ _ZGVbN2v_asin; _ZGVcN4v_asin; _ZGVdN4v_asin; _ZGVeN8v_asin;
_ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan;
_ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
+ _ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf;
_ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf;
}
}
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 2e64e59803..ea0f833381 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -93,6 +93,26 @@ float: 1
float128: 2
ldouble: 1
+Function: "asin_vlen16":
+float: 1
+
+Function: "asin_vlen2":
+double: 1
+
+Function: "asin_vlen4":
+double: 1
+float: 1
+
+Function: "asin_vlen4_avx2":
+double: 1
+
+Function: "asin_vlen8":
+double: 1
+float: 1
+
+Function: "asin_vlen8_avx2":
+float: 1
+
Function: "asinh":
double: 2
float: 2
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S
new file mode 100644
index 0000000000..57e1d41a7b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized asin, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_asin _ZGVbN2v_asin_sse2
+#include "../svml_d_asin2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c
new file mode 100644
index 0000000000..e46c3af81e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized asin, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_asin
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_asin, __GI__ZGVbN2v_asin, __redirect__ZGVbN2v_asin)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S
new file mode 100644
index 0000000000..a6f7a41623
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S
@@ -0,0 +1,288 @@
+/* Function asin vectorized with SSE4.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ https://www.gnu.org/licenses/. */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ * SelMask = (|x| >= 0.5) ? 1 : 0;
+ * R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x|
+ * asin(x) = (SelMask ? (Pi/2 - 2*Poly(R)) : Poly(R))*(-1)^sign(x)
+ *
+ */
+
+/* Offsets for data table __svml_dasin_data_internal
+ */
+#define AbsMask 0
+#define OneHalf 16
+#define SmallNorm 32
+#define One 48
+#define Two 64
+#define sqrt_coeff 80
+#define poly_coeff 144
+#define Pi2H 336
+
+#include <sysdep.h>
+
+ .text
+ .section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN2v_asin_sse4)
+ subq $72, %rsp
+ cfi_def_cfa_offset(80)
+ movaps %xmm0, %xmm5
+ movups __svml_dasin_data_internal(%rip), %xmm3
+ movups OneHalf+__svml_dasin_data_internal(%rip), %xmm8
+
+/* x = |arg| */
+ movaps %xmm3, %xmm4
+ andps %xmm5, %xmm4
+
+/* Y = 0.5 - 0.5*x */
+ movaps %xmm8, %xmm6
+ mulpd %xmm4, %xmm6
+ movaps %xmm8, %xmm14
+
+/* x^2 */
+ movaps %xmm4, %xmm2
+ subpd %xmm6, %xmm14
+ mulpd %xmm4, %xmm2
+
+/* S ~ -2*sqrt(Y) */
+ cvtpd2ps %xmm14, %xmm9
+ minpd %xmm14, %xmm2
+ movlhps %xmm9, %xmm9
+ movaps %xmm14, %xmm15
+ rsqrtps %xmm9, %xmm10
+ cmpltpd SmallNorm+__svml_dasin_data_internal(%rip), %xmm15
+ addpd %xmm14, %xmm14
+ cvtps2pd %xmm10, %xmm11
+ andnps %xmm11, %xmm15
+ movaps %xmm4, %xmm1
+ movaps %xmm15, %xmm12
+ andnps %xmm5, %xmm3
+ mulpd %xmm15, %xmm12
+ mulpd %xmm14, %xmm15
+ mulpd %xmm12, %xmm14
+ cmpnltpd %xmm8, %xmm1
+ subpd Two+__svml_dasin_data_internal(%rip), %xmm14
+
+/* polynomial */
+ movups poly_coeff+__svml_dasin_data_internal(%rip), %xmm6
+ movaps %xmm2, %xmm12
+ mulpd %xmm2, %xmm6
+ mulpd %xmm2, %xmm12
+ addpd poly_coeff+16+__svml_dasin_data_internal(%rip), %xmm6
+ movups One+__svml_dasin_data_internal(%rip), %xmm7
+ movaps %xmm12, %xmm8</