diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 10:13:20 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-30 10:19:03 -0800 |
| commit | 8881cca8fb8d3a7ee89d174017dd27eded90366c (patch) | |
| tree | aa183021a734c54404062791e2582b17a5055f40 | |
| parent | bc1e344dc1fb7f406c42e03a63dd3dbf426af9e7 (diff) | |
| download | glibc-8881cca8fb8d3a7ee89d174017dd27eded90366c.tar.xz glibc-8881cca8fb8d3a7ee89d174017dd27eded90366c.zip | |
x86-64: Add vector erfc/erfcf implementation to libmvec
Implement vectorized erfc/erfcf containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI. It also contains
accuracy and ABI tests for vector erfc/erfcf with regenerated ulps.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
50 files changed, 14970 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index bcaddb7a0e..e716664306 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -307,4 +307,15 @@ #define __DECL_SIMD_asinhf32x #define __DECL_SIMD_asinhf64x #define __DECL_SIMD_asinhf128x + +#define __DECL_SIMD_erfc +#define __DECL_SIMD_erfcf +#define __DECL_SIMD_erfcl +#define __DECL_SIMD_erfcf16 +#define __DECL_SIMD_erfcf32 +#define __DECL_SIMD_erfcf64 +#define __DECL_SIMD_erfcf128 +#define __DECL_SIMD_erfcf32x +#define __DECL_SIMD_erfcf64x +#define __DECL_SIMD_erfcf128x #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 40e055e579..24e28b5c4f 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -229,7 +229,7 @@ __MATHCALL (yn,, (int, _Mdouble_)); #if defined __USE_XOPEN || defined __USE_ISOC99 /* Error and gamma functions. */ __MATHCALL_VEC (erf,, (_Mdouble_)); -__MATHCALL (erfc,, (_Mdouble_)); +__MATHCALL_VEC (erfc,, (_Mdouble_)); __MATHCALL (lgamma,, (_Mdouble_)); #endif diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index df265d6a12..e9e98bab65 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_atanh F GLIBC_2.35 _ZGVbN2v_cbrt F GLIBC_2.35 _ZGVbN2v_cosh F GLIBC_2.35 _ZGVbN2v_erf F +GLIBC_2.35 _ZGVbN2v_erfc F GLIBC_2.35 _ZGVbN2v_exp10 F GLIBC_2.35 _ZGVbN2v_exp2 F GLIBC_2.35 _ZGVbN2v_expm1 F @@ -73,6 +74,7 @@ GLIBC_2.35 _ZGVbN4v_atanf F GLIBC_2.35 _ZGVbN4v_atanhf F GLIBC_2.35 _ZGVbN4v_cbrtf F GLIBC_2.35 _ZGVbN4v_coshf F +GLIBC_2.35 _ZGVbN4v_erfcf F GLIBC_2.35 _ZGVbN4v_erff F GLIBC_2.35 _ZGVbN4v_exp10f F GLIBC_2.35 _ZGVbN4v_exp2f F @@ -93,6 +95,7 @@ GLIBC_2.35 _ZGVcN4v_atanh F GLIBC_2.35 _ZGVcN4v_cbrt F GLIBC_2.35 _ZGVcN4v_cosh F GLIBC_2.35 _ZGVcN4v_erf F +GLIBC_2.35 _ZGVcN4v_erfc F GLIBC_2.35 _ZGVcN4v_exp10 F GLIBC_2.35 _ZGVcN4v_exp2 F GLIBC_2.35 _ZGVcN4v_expm1 F @@ -111,6 +114,7 @@ GLIBC_2.35 _ZGVcN8v_atanf F GLIBC_2.35 _ZGVcN8v_atanhf F GLIBC_2.35 _ZGVcN8v_cbrtf F GLIBC_2.35 _ZGVcN8v_coshf F +GLIBC_2.35 _ZGVcN8v_erfcf F GLIBC_2.35 _ZGVcN8v_erff F GLIBC_2.35 _ZGVcN8v_exp10f F GLIBC_2.35 _ZGVcN8v_exp2f F @@ -131,6 +135,7 @@ GLIBC_2.35 _ZGVdN4v_atanh F GLIBC_2.35 _ZGVdN4v_cbrt F GLIBC_2.35 _ZGVdN4v_cosh F GLIBC_2.35 _ZGVdN4v_erf F +GLIBC_2.35 _ZGVdN4v_erfc F GLIBC_2.35 _ZGVdN4v_exp10 F GLIBC_2.35 _ZGVdN4v_exp2 F GLIBC_2.35 _ZGVdN4v_expm1 F @@ -149,6 +154,7 @@ GLIBC_2.35 _ZGVdN8v_atanf F GLIBC_2.35 _ZGVdN8v_atanhf F GLIBC_2.35 _ZGVdN8v_cbrtf F GLIBC_2.35 _ZGVdN8v_coshf F +GLIBC_2.35 _ZGVdN8v_erfcf F GLIBC_2.35 _ZGVdN8v_erff F GLIBC_2.35 _ZGVdN8v_exp10f F GLIBC_2.35 _ZGVdN8v_exp2f F @@ -168,6 +174,7 @@ GLIBC_2.35 _ZGVeN16v_atanf F GLIBC_2.35 _ZGVeN16v_atanhf F GLIBC_2.35 _ZGVeN16v_cbrtf F GLIBC_2.35 _ZGVeN16v_coshf F +GLIBC_2.35 _ZGVeN16v_erfcf F GLIBC_2.35 _ZGVeN16v_erff F GLIBC_2.35 _ZGVeN16v_exp10f F GLIBC_2.35 _ZGVeN16v_exp2f F @@ -188,6 +195,7 @@ GLIBC_2.35 _ZGVeN8v_atanh F GLIBC_2.35 _ZGVeN8v_cbrt F GLIBC_2.35 _ZGVeN8v_cosh F GLIBC_2.35 _ZGVeN8v_erf F +GLIBC_2.35 _ZGVeN8v_erfc F GLIBC_2.35 _ZGVeN8v_exp10 F GLIBC_2.35 _ZGVeN8v_exp2 F GLIBC_2.35 _ZGVeN8v_expm1 F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 71b7d660db..9a55e2e542 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -134,6 +134,10 @@ # define __DECL_SIMD_asinh __DECL_SIMD_x86_64 # undef __DECL_SIMD_asinhf # define __DECL_SIMD_asinhf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_erfc +# define __DECL_SIMD_erfc __DECL_SIMD_x86_64 +# undef __DECL_SIMD_erfcf +# define __DECL_SIMD_erfcf __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index 4d3afdf753..818134dc75 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -66,6 +66,8 @@ !GCC$ builtin (tanhf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (asinh) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (erfc) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (erfcf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -117,3 +119,5 @@ !GCC$ builtin (tanhf) attributes simd (notinbranch) if('x32') !GCC$ builtin (asinh) attributes simd (notinbranch) if('x32') !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x32') +!GCC$ builtin (erfc) attributes simd (notinbranch) if('x32') +!GCC$ builtin (erfcf) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index 2ff33c7dd8..be1d6cbb92 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -33,6 +33,7 @@ libmvec-funcs = \ cos \ cosh \ erf \ + erfc \ exp \ exp10 \ exp2 \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index e6ead13085..b10ae69894 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -23,6 +23,7 @@ libmvec { _ZGVbN2v_cbrt; _ZGVcN4v_cbrt; _ZGVdN4v_cbrt; _ZGVeN8v_cbrt; _ZGVbN2v_cosh; _ZGVcN4v_cosh; _ZGVdN4v_cosh; _ZGVeN8v_cosh; _ZGVbN2v_erf; _ZGVcN4v_erf; _ZGVdN4v_erf; _ZGVeN8v_erf; + _ZGVbN2v_erfc; _ZGVcN4v_erfc; _ZGVdN4v_erfc; _ZGVeN8v_erfc; _ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10; _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1; @@ -41,6 +42,7 @@ libmvec { _ZGVbN4v_atanhf; _ZGVcN8v_atanhf; _ZGVdN8v_atanhf; _ZGVeN16v_atanhf; _ZGVbN4v_cbrtf; _ZGVcN8v_cbrtf; _ZGVdN8v_cbrtf; _ZGVeN16v_cbrtf; _ZGVbN4v_coshf; _ZGVcN8v_coshf; _ZGVdN8v_coshf; _ZGVeN16v_coshf; + _ZGVbN4v_erfcf; _ZGVcN8v_erfcf; _ZGVdN8v_erfcf; _ZGVeN16v_erfcf; _ZGVbN4v_erff; _ZGVcN8v_erff; _ZGVdN8v_erff; _ZGVeN16v_erff; _ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f; _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 71e9fced02..f3ee98358f 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1359,6 +1359,26 @@ float: 6 float128: 5 ldouble: 5 +Function: "erfc_vlen16": +float: 1 + +Function: "erfc_vlen2": +double: 1 + +Function: "erfc_vlen4": +double: 1 +float: 1 + +Function: "erfc_vlen4_avx2": +double: 1 + +Function: "erfc_vlen8": +double: 1 +float: 1 + +Function: "erfc_vlen8_avx2": +float: 1 + Function: "exp": double: 1 float: 1 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S new file mode 100644 index 0000000000..31aea74264 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized erfc, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_erfc _ZGVbN2v_erfc_sse2 +#include "../svml_d_erfc2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c new file mode 100644 index 0000000000..b457c5bc75 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized erfc, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_erfc +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_erfc, __GI__ZGVbN2v_erfc, __redirect__ZGVbN2v_erfc) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S new file mode 100644 index 0000000000..3fd172770a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc2_core_sse4.S |
