aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2021-12-29 10:13:20 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2021-12-30 10:19:03 -0800
commit8881cca8fb8d3a7ee89d174017dd27eded90366c (patch)
treeaa183021a734c54404062791e2582b17a5055f40
parentbc1e344dc1fb7f406c42e03a63dd3dbf426af9e7 (diff)
downloadglibc-8881cca8fb8d3a7ee89d174017dd27eded90366c.tar.xz
glibc-8881cca8fb8d3a7ee89d174017dd27eded90366c.zip
x86-64: Add vector erfc/erfcf implementation to libmvec
Implement vectorized erfc/erfcf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector erfc/erfcf with regenerated ulps. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
-rw-r--r--bits/libm-simd-decl-stubs.h11
-rw-r--r--math/bits/mathcalls.h2
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/libmvec.abilist8
-rw-r--r--sysdeps/x86/fpu/bits/math-vector.h4
-rw-r--r--sysdeps/x86/fpu/finclude/math-vector-fortran.h4
-rw-r--r--sysdeps/x86_64/fpu/Makeconfig1
-rw-r--r--sysdeps/x86_64/fpu/Versions2
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S3853
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc4_core_avx2.S3857
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core_avx512.S3860
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf16_core_avx512.S932
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf4_core_sse4.S939
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_erfcf8_core_avx2.S957
-rw-r--r--sysdeps/x86_64/fpu/svml_d_erfc2_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_erfc4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_erfc4_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_d_erfc8_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_s_erfcf16_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_s_erfcf4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_s_erfcf8_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_s_erfcf8_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-erfc-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-erfc-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-erfc-avx512f.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-erfc.c3
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-erfcf-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-erfcf-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-erfcf-avx512f.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-erfcf.c3
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c1
50 files changed, 14970 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h
index bcaddb7a0e..e716664306 100644
--- a/bits/libm-simd-decl-stubs.h
+++ b/bits/libm-simd-decl-stubs.h
@@ -307,4 +307,15 @@
#define __DECL_SIMD_asinhf32x
#define __DECL_SIMD_asinhf64x
#define __DECL_SIMD_asinhf128x
+
+#define __DECL_SIMD_erfc
+#define __DECL_SIMD_erfcf
+#define __DECL_SIMD_erfcl
+#define __DECL_SIMD_erfcf16
+#define __DECL_SIMD_erfcf32
+#define __DECL_SIMD_erfcf64
+#define __DECL_SIMD_erfcf128
+#define __DECL_SIMD_erfcf32x
+#define __DECL_SIMD_erfcf64x
+#define __DECL_SIMD_erfcf128x
#endif
diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h
index 40e055e579..24e28b5c4f 100644
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@@ -229,7 +229,7 @@ __MATHCALL (yn,, (int, _Mdouble_));
#if defined __USE_XOPEN || defined __USE_ISOC99
/* Error and gamma functions. */
__MATHCALL_VEC (erf,, (_Mdouble_));
-__MATHCALL (erfc,, (_Mdouble_));
+__MATHCALL_VEC (erfc,, (_Mdouble_));
__MATHCALL (lgamma,, (_Mdouble_));
#endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index df265d6a12..e9e98bab65 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_atanh F
GLIBC_2.35 _ZGVbN2v_cbrt F
GLIBC_2.35 _ZGVbN2v_cosh F
GLIBC_2.35 _ZGVbN2v_erf F
+GLIBC_2.35 _ZGVbN2v_erfc F
GLIBC_2.35 _ZGVbN2v_exp10 F
GLIBC_2.35 _ZGVbN2v_exp2 F
GLIBC_2.35 _ZGVbN2v_expm1 F
@@ -73,6 +74,7 @@ GLIBC_2.35 _ZGVbN4v_atanf F
GLIBC_2.35 _ZGVbN4v_atanhf F
GLIBC_2.35 _ZGVbN4v_cbrtf F
GLIBC_2.35 _ZGVbN4v_coshf F
+GLIBC_2.35 _ZGVbN4v_erfcf F
GLIBC_2.35 _ZGVbN4v_erff F
GLIBC_2.35 _ZGVbN4v_exp10f F
GLIBC_2.35 _ZGVbN4v_exp2f F
@@ -93,6 +95,7 @@ GLIBC_2.35 _ZGVcN4v_atanh F
GLIBC_2.35 _ZGVcN4v_cbrt F
GLIBC_2.35 _ZGVcN4v_cosh F
GLIBC_2.35 _ZGVcN4v_erf F
+GLIBC_2.35 _ZGVcN4v_erfc F
GLIBC_2.35 _ZGVcN4v_exp10 F
GLIBC_2.35 _ZGVcN4v_exp2 F
GLIBC_2.35 _ZGVcN4v_expm1 F
@@ -111,6 +114,7 @@ GLIBC_2.35 _ZGVcN8v_atanf F
GLIBC_2.35 _ZGVcN8v_atanhf F
GLIBC_2.35 _ZGVcN8v_cbrtf F
GLIBC_2.35 _ZGVcN8v_coshf F
+GLIBC_2.35 _ZGVcN8v_erfcf F
GLIBC_2.35 _ZGVcN8v_erff F
GLIBC_2.35 _ZGVcN8v_exp10f F
GLIBC_2.35 _ZGVcN8v_exp2f F
@@ -131,6 +135,7 @@ GLIBC_2.35 _ZGVdN4v_atanh F
GLIBC_2.35 _ZGVdN4v_cbrt F
GLIBC_2.35 _ZGVdN4v_cosh F
GLIBC_2.35 _ZGVdN4v_erf F
+GLIBC_2.35 _ZGVdN4v_erfc F
GLIBC_2.35 _ZGVdN4v_exp10 F
GLIBC_2.35 _ZGVdN4v_exp2 F
GLIBC_2.35 _ZGVdN4v_expm1 F
@@ -149,6 +154,7 @@ GLIBC_2.35 _ZGVdN8v_atanf F
GLIBC_2.35 _ZGVdN8v_atanhf F
GLIBC_2.35 _ZGVdN8v_cbrtf F
GLIBC_2.35 _ZGVdN8v_coshf F
+GLIBC_2.35 _ZGVdN8v_erfcf F
GLIBC_2.35 _ZGVdN8v_erff F
GLIBC_2.35 _ZGVdN8v_exp10f F
GLIBC_2.35 _ZGVdN8v_exp2f F
@@ -168,6 +174,7 @@ GLIBC_2.35 _ZGVeN16v_atanf F
GLIBC_2.35 _ZGVeN16v_atanhf F
GLIBC_2.35 _ZGVeN16v_cbrtf F
GLIBC_2.35 _ZGVeN16v_coshf F
+GLIBC_2.35 _ZGVeN16v_erfcf F
GLIBC_2.35 _ZGVeN16v_erff F
GLIBC_2.35 _ZGVeN16v_exp10f F
GLIBC_2.35 _ZGVeN16v_exp2f F
@@ -188,6 +195,7 @@ GLIBC_2.35 _ZGVeN8v_atanh F
GLIBC_2.35 _ZGVeN8v_cbrt F
GLIBC_2.35 _ZGVeN8v_cosh F
GLIBC_2.35 _ZGVeN8v_erf F
+GLIBC_2.35 _ZGVeN8v_erfc F
GLIBC_2.35 _ZGVeN8v_exp10 F
GLIBC_2.35 _ZGVeN8v_exp2 F
GLIBC_2.35 _ZGVeN8v_expm1 F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index 71b7d660db..9a55e2e542 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -134,6 +134,10 @@
# define __DECL_SIMD_asinh __DECL_SIMD_x86_64
# undef __DECL_SIMD_asinhf
# define __DECL_SIMD_asinhf __DECL_SIMD_x86_64
+# undef __DECL_SIMD_erfc
+# define __DECL_SIMD_erfc __DECL_SIMD_x86_64
+# undef __DECL_SIMD_erfcf
+# define __DECL_SIMD_erfcf __DECL_SIMD_x86_64
# endif
#endif
diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
index 4d3afdf753..818134dc75 100644
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@@ -66,6 +66,8 @@
!GCC$ builtin (tanhf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (asinh) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (asinhf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (erfc) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (erfcf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@@ -117,3 +119,5 @@
!GCC$ builtin (tanhf) attributes simd (notinbranch) if('x32')
!GCC$ builtin (asinh) attributes simd (notinbranch) if('x32')
!GCC$ builtin (asinhf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (erfc) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (erfcf) attributes simd (notinbranch) if('x32')
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index 2ff33c7dd8..be1d6cbb92 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -33,6 +33,7 @@ libmvec-funcs = \
cos \
cosh \
erf \
+ erfc \
exp \
exp10 \
exp2 \
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index e6ead13085..b10ae69894 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -23,6 +23,7 @@ libmvec {
_ZGVbN2v_cbrt; _ZGVcN4v_cbrt; _ZGVdN4v_cbrt; _ZGVeN8v_cbrt;
_ZGVbN2v_cosh; _ZGVcN4v_cosh; _ZGVdN4v_cosh; _ZGVeN8v_cosh;
_ZGVbN2v_erf; _ZGVcN4v_erf; _ZGVdN4v_erf; _ZGVeN8v_erf;
+ _ZGVbN2v_erfc; _ZGVcN4v_erfc; _ZGVdN4v_erfc; _ZGVeN8v_erfc;
_ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10;
_ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2;
_ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1;
@@ -41,6 +42,7 @@ libmvec {
_ZGVbN4v_atanhf; _ZGVcN8v_atanhf; _ZGVdN8v_atanhf; _ZGVeN16v_atanhf;
_ZGVbN4v_cbrtf; _ZGVcN8v_cbrtf; _ZGVdN8v_cbrtf; _ZGVeN16v_cbrtf;
_ZGVbN4v_coshf; _ZGVcN8v_coshf; _ZGVdN8v_coshf; _ZGVeN16v_coshf;
+ _ZGVbN4v_erfcf; _ZGVcN8v_erfcf; _ZGVdN8v_erfcf; _ZGVeN16v_erfcf;
_ZGVbN4v_erff; _ZGVcN8v_erff; _ZGVdN8v_erff; _ZGVeN16v_erff;
_ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f;
_ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f;
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 71e9fced02..f3ee98358f 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1359,6 +1359,26 @@ float: 6
float128: 5
ldouble: 5
+Function: "erfc_vlen16":
+float: 1
+
+Function: "erfc_vlen2":
+double: 1
+
+Function: "erfc_vlen4":
+double: 1
+float: 1
+
+Function: "erfc_vlen4_avx2":
+double: 1
+
+Function: "erfc_vlen8":
+double: 1
+float: 1
+
+Function: "erfc_vlen8_avx2":
+float: 1
+
Function: "exp":
double: 1
float: 1
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S
new file mode 100644
index 0000000000..31aea74264
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized erfc, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_erfc _ZGVbN2v_erfc_sse2
+#include "../svml_d_erfc2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c
new file mode 100644
index 0000000000..b457c5bc75
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized erfc, vector length is 2.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_erfc
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_erfc, __GI__ZGVbN2v_erfc, __redirect__ZGVbN2v_erfc)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S
new file mode 100644
index 0000000000..3fd172770a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S