x86-64: Add vector asin/asinf implementation to libmvec

Implement vectorized asin/asinf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector asin/asinf with regenerated ulps. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
author: Sunil K Pandey <skpgkp2@gmail.com> 2021-12-29 08:29:26 -0800
committer: Sunil K Pandey <skpgkp2@gmail.com> 2021-12-29 11:37:03 -0800
commit: 11c01de14c879ffc8dbac8ce32242a7552cbd4ad (patch)
tree: 70ec1403eab4be667adb5ce5a4c50f5b9da87e07 /sysdeps
parent: 146310177aa9f2c7d990ef856ed6e8bb94407f06 (diff)
download: glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.tar.xz
glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.zip
48 files changed, 2177 insertions, 0 deletions
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index a93258db6f..ab03a07f92 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -47,18 +47,26 @@ GLIBC_2.22 _ZGVeN8v_sin F
 GLIBC_2.22 _ZGVeN8vv_pow F
 GLIBC_2.22 _ZGVeN8vvv_sincos F
 GLIBC_2.35 _ZGVbN2v_acos F
+GLIBC_2.35 _ZGVbN2v_asin F
 GLIBC_2.35 _ZGVbN2v_atan F
 GLIBC_2.35 _ZGVbN4v_acosf F
+GLIBC_2.35 _ZGVbN4v_asinf F
 GLIBC_2.35 _ZGVbN4v_atanf F
 GLIBC_2.35 _ZGVcN4v_acos F
+GLIBC_2.35 _ZGVcN4v_asin F
 GLIBC_2.35 _ZGVcN4v_atan F
 GLIBC_2.35 _ZGVcN8v_acosf F
+GLIBC_2.35 _ZGVcN8v_asinf F
 GLIBC_2.35 _ZGVcN8v_atanf F
 GLIBC_2.35 _ZGVdN4v_acos F
+GLIBC_2.35 _ZGVdN4v_asin F
 GLIBC_2.35 _ZGVdN4v_atan F
 GLIBC_2.35 _ZGVdN8v_acosf F
+GLIBC_2.35 _ZGVdN8v_asinf F
 GLIBC_2.35 _ZGVdN8v_atanf F
 GLIBC_2.35 _ZGVeN16v_acosf F
+GLIBC_2.35 _ZGVeN16v_asinf F
 GLIBC_2.35 _ZGVeN16v_atanf F
 GLIBC_2.35 _ZGVeN8v_acos F
+GLIBC_2.35 _ZGVeN8v_asin F
 GLIBC_2.35 _ZGVeN8v_atan F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index 1c0e5c5e35..73cb8849ff 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -66,6 +66,10 @@
 #  define __DECL_SIMD_atan __DECL_SIMD_x86_64
 #  undef __DECL_SIMD_atanf
 #  define __DECL_SIMD_atanf __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_asin
+#  define __DECL_SIMD_asin __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_asinf
+#  define __DECL_SIMD_asinf __DECL_SIMD_x86_64
 
 # endif
 #endif
diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
index ddcccb11d7..4552c2bdfa 100644
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@@ -32,6 +32,8 @@
 !GCC$ builtin (acosf) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (atan) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (atanf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (asin) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (asinf) attributes simd (notinbranch) if('x86_64')
 
 !GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -49,3 +51,5 @@
 !GCC$ builtin (acosf) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (atan) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (atanf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (asin) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (asinf) attributes simd (notinbranch) if('x32')
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index dae0887f13..e0eae0b196 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -23,6 +23,7 @@ postclean-generated += libmvec.mk
 # Define for both math and mathvec directories.
 libmvec-funcs = \
   acos \
+  asin \
   atan \
   cos \
   exp \
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index 424f6d526e..10baf869a5 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -15,8 +15,10 @@ libmvec {
   }
   GLIBC_2.35 {
     _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos;
+    _ZGVbN2v_asin; _ZGVcN4v_asin; _ZGVdN4v_asin; _ZGVeN8v_asin;
     _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan;
     _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
+    _ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf;
     _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf;
   }
 }
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 2e64e59803..ea0f833381 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -93,6 +93,26 @@ float: 1
 float128: 2
 ldouble: 1
 
+Function: "asin_vlen16":
+float: 1
+
+Function: "asin_vlen2":
+double: 1
+
+Function: "asin_vlen4":
+double: 1
+float: 1
+
+Function: "asin_vlen4_avx2":
+double: 1
+
+Function: "asin_vlen8":
+double: 1
+float: 1
+
+Function: "asin_vlen8_avx2":
+float: 1
+
 Function: "asinh":
 double: 2
 float: 2
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S
new file mode 100644
index 0000000000..57e1d41a7b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized asin, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_asin _ZGVbN2v_asin_sse2
+#include "../svml_d_asin2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c
new file mode 100644
index 0000000000..e46c3af81e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized asin, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_asin
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_asin, __GI__ZGVbN2v_asin, __redirect__ZGVbN2v_asin)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S
new file mode 100644
index 0000000000..a6f7a41623
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_asin2_core_sse4.S
@@ -0,0 +1,288 @@
+/* Function asin vectorized with SSE4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      SelMask = (|x| >= 0.5) ? 1 : 0;
+ *      R = SelMask ? sqrt(0.5 - 0.5*|x|) : |x|
+ *      asin(x) = (SelMask ? (Pi/2 - 2*Poly(R)) : Poly(R))*(-1)^sign(x)
+ *
+ */
+
+/* Offsets for data table __svml_dasin_data_internal
+ */
+#define AbsMask                       	0
+#define OneHalf                       	16
+#define SmallNorm                     	32
+#define One                           	48
+#define Two                           	64
+#define sqrt_coeff                    	80
+#define poly_coeff                    	144
+#define Pi2H                          	336
+
+#include <sysdep.h>
+
+        .text
+	.section .text.sse4,"ax",@progbits
+ENTRY(_ZGVbN2v_asin_sse4)
+        subq      $72, %rsp
+        cfi_def_cfa_offset(80)
+        movaps    %xmm0, %xmm5
+        movups    __svml_dasin_data_internal(%rip), %xmm3
+        movups    OneHalf+__svml_dasin_data_internal(%rip), %xmm8
+
+/* x = |arg| */
+        movaps    %xmm3, %xmm4
+        andps     %xmm5, %xmm4
+
+/* Y = 0.5 - 0.5*x */
+        movaps    %xmm8, %xmm6
+        mulpd     %xmm4, %xmm6
+        movaps    %xmm8, %xmm14
+
+/* x^2 */
+        movaps    %xmm4, %xmm2
+        subpd     %xmm6, %xmm14
+        mulpd     %xmm4, %xmm2
+
+/* S ~ -2*sqrt(Y) */
+        cvtpd2ps  %xmm14, %xmm9
+        minpd     %xmm14, %xmm2
+        movlhps   %xmm9, %xmm9
+        movaps    %xmm14, %xmm15
+        rsqrtps   %xmm9, %xmm10
+        cmpltpd   SmallNorm+__svml_dasin_data_internal(%rip), %xmm15
+        addpd     %xmm14, %xmm14
+        cvtps2pd  %xmm10, %xmm11
+        andnps    %xmm11, %xmm15
+        movaps    %xmm4, %xmm1
+        movaps    %xmm15, %xmm12
+        andnps    %xmm5, %xmm3
+        mulpd     %xmm15, %xmm12
+        mulpd     %xmm14, %xmm15
+        mulpd     %xmm12, %xmm14
+        cmpnltpd  %xmm8, %xmm1
+        subpd     Two+__svml_dasin_data_internal(%rip), %xmm14
+
+/* polynomial */
+        movups    poly_coeff+__svml_dasin_data_internal(%rip), %xmm6
+        movaps    %xmm2, %xmm12
+        mulpd     %xmm2, %xmm6
+        mulpd     %xmm2, %xmm12
+        addpd     poly_coeff+16+__svml_dasin_data_internal(%rip), %xmm6
+        movups    One+__svml_dasin_data_internal(%rip), %xmm7
+        movaps    %xmm12, %xmm8</
author	Sunil K Pandey <skpgkp2@gmail.com>	2021-12-29 08:29:26 -0800
committer	Sunil K Pandey <skpgkp2@gmail.com>	2021-12-29 11:37:03 -0800
commit	11c01de14c879ffc8dbac8ce32242a7552cbd4ad (patch)
tree	70ec1403eab4be667adb5ce5a4c50f5b9da87e07 /sysdeps
parent	146310177aa9f2c7d990ef856ed6e8bb94407f06 (diff)
download	glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.tar.xz glibc-11c01de14c879ffc8dbac8ce32242a7552cbd4ad.zip