diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 08:59:16 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 11:37:49 -0800 |
| commit | 76ddc74e86f7bc36468736dd22c4c29e39cd62d8 (patch) | |
| tree | 41692fa99b3484c5445ef0ea2dfcad0f8990d800 | |
| parent | ef7ea9c1327be11c54462cb539ea8854979a21f3 (diff) | |
| download | glibc-76ddc74e86f7bc36468736dd22c4c29e39cd62d8.tar.xz glibc-76ddc74e86f7bc36468736dd22c4c29e39cd62d8.zip | |
x86-64: Add vector expm1/expm1f implementation to libmvec
Implement vectorized expm1/expm1f containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI. It also contains
accuracy and ABI tests for vector expm1/expm1f with regenerated ulps.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
50 files changed, 2725 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 35c6ac57a8..28dc4a82c5 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -175,4 +175,15 @@ #define __DECL_SIMD_coshf32x #define __DECL_SIMD_coshf64x #define __DECL_SIMD_coshf128x + +#define __DECL_SIMD_expm1 +#define __DECL_SIMD_expm1f +#define __DECL_SIMD_expm1l +#define __DECL_SIMD_expm1f16 +#define __DECL_SIMD_expm1f32 +#define __DECL_SIMD_expm1f64 +#define __DECL_SIMD_expm1f128 +#define __DECL_SIMD_expm1f32x +#define __DECL_SIMD_expm1f64x +#define __DECL_SIMD_expm1f128x #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 60a314f69e..c57adc8ace 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -116,7 +116,7 @@ __MATHCALL_VEC (exp10,, (_Mdouble_ __x)); #if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 /* Return exp(X) - 1. */ -__MATHCALL (expm1,, (_Mdouble_ __x)); +__MATHCALL_VEC (expm1,, (_Mdouble_ __x)); /* Return log(1 + X). */ __MATHCALL (log1p,, (_Mdouble_ __x)); diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index 4907680143..c9d3213bd3 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -52,6 +52,7 @@ GLIBC_2.35 _ZGVbN2v_atan F GLIBC_2.35 _ZGVbN2v_cosh F GLIBC_2.35 _ZGVbN2v_exp10 F GLIBC_2.35 _ZGVbN2v_exp2 F +GLIBC_2.35 _ZGVbN2v_expm1 F GLIBC_2.35 _ZGVbN2vv_hypot F GLIBC_2.35 _ZGVbN4v_acosf F GLIBC_2.35 _ZGVbN4v_asinf F @@ -59,6 +60,7 @@ GLIBC_2.35 _ZGVbN4v_atanf F GLIBC_2.35 _ZGVbN4v_coshf F GLIBC_2.35 _ZGVbN4v_exp10f F GLIBC_2.35 _ZGVbN4v_exp2f F +GLIBC_2.35 _ZGVbN4v_expm1f F GLIBC_2.35 _ZGVbN4vv_hypotf F GLIBC_2.35 _ZGVcN4v_acos F GLIBC_2.35 _ZGVcN4v_asin F @@ -66,6 +68,7 @@ GLIBC_2.35 _ZGVcN4v_atan F GLIBC_2.35 _ZGVcN4v_cosh F GLIBC_2.35 _ZGVcN4v_exp10 F GLIBC_2.35 _ZGVcN4v_exp2 F +GLIBC_2.35 _ZGVcN4v_expm1 F GLIBC_2.35 _ZGVcN4vv_hypot F GLIBC_2.35 _ZGVcN8v_acosf F GLIBC_2.35 _ZGVcN8v_asinf F @@ -73,6 +76,7 @@ GLIBC_2.35 _ZGVcN8v_atanf F GLIBC_2.35 _ZGVcN8v_coshf F GLIBC_2.35 _ZGVcN8v_exp10f F GLIBC_2.35 _ZGVcN8v_exp2f F +GLIBC_2.35 _ZGVcN8v_expm1f F GLIBC_2.35 _ZGVcN8vv_hypotf F GLIBC_2.35 _ZGVdN4v_acos F GLIBC_2.35 _ZGVdN4v_asin F @@ -80,6 +84,7 @@ GLIBC_2.35 _ZGVdN4v_atan F GLIBC_2.35 _ZGVdN4v_cosh F GLIBC_2.35 _ZGVdN4v_exp10 F GLIBC_2.35 _ZGVdN4v_exp2 F +GLIBC_2.35 _ZGVdN4v_expm1 F GLIBC_2.35 _ZGVdN4vv_hypot F GLIBC_2.35 _ZGVdN8v_acosf F GLIBC_2.35 _ZGVdN8v_asinf F @@ -87,6 +92,7 @@ GLIBC_2.35 _ZGVdN8v_atanf F GLIBC_2.35 _ZGVdN8v_coshf F GLIBC_2.35 _ZGVdN8v_exp10f F GLIBC_2.35 _ZGVdN8v_exp2f F +GLIBC_2.35 _ZGVdN8v_expm1f F GLIBC_2.35 _ZGVdN8vv_hypotf F GLIBC_2.35 _ZGVeN16v_acosf F GLIBC_2.35 _ZGVeN16v_asinf F @@ -94,6 +100,7 @@ GLIBC_2.35 _ZGVeN16v_atanf F GLIBC_2.35 _ZGVeN16v_coshf F GLIBC_2.35 _ZGVeN16v_exp10f F GLIBC_2.35 _ZGVeN16v_exp2f F +GLIBC_2.35 _ZGVeN16v_expm1f F GLIBC_2.35 _ZGVeN16vv_hypotf F GLIBC_2.35 _ZGVeN8v_acos F GLIBC_2.35 _ZGVeN8v_asin F @@ -101,4 +108,5 @@ GLIBC_2.35 _ZGVeN8v_atan F GLIBC_2.35 _ZGVeN8v_cosh F GLIBC_2.35 _ZGVeN8v_exp10 F GLIBC_2.35 _ZGVeN8v_exp2 F +GLIBC_2.35 _ZGVeN8v_expm1 F GLIBC_2.35 _ZGVeN8vv_hypot F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 708e81b3d0..e2f98e176f 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -86,6 +86,10 @@ # define __DECL_SIMD_cosh __DECL_SIMD_x86_64 # undef __DECL_SIMD_coshf # define __DECL_SIMD_coshf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_expm1 +# define __DECL_SIMD_expm1 __DECL_SIMD_x86_64 +# undef __DECL_SIMD_expm1f +# define __DECL_SIMD_expm1f __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index 81d0238ebf..43233059f6 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -42,6 +42,8 @@ !GCC$ builtin (exp10f) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cosh) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (coshf) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (expm1) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (expm1f) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -69,3 +71,5 @@ !GCC$ builtin (exp10f) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosh) attributes simd (notinbranch) if('x32') !GCC$ builtin (coshf) attributes simd (notinbranch) if('x32') +!GCC$ builtin (expm1) attributes simd (notinbranch) if('x32') +!GCC$ builtin (expm1f) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index 5bc2df134f..8de8214971 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -30,6 +30,7 @@ libmvec-funcs = \ exp \ exp10 \ exp2 \ + expm1 \ hypot \ log \ pow \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index 53346d16a2..58debb2dbe 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -20,6 +20,7 @@ libmvec { _ZGVbN2v_cosh; _ZGVcN4v_cosh; _ZGVdN4v_cosh; _ZGVeN8v_cosh; _ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10; _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; + _ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1; _ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot; _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf; _ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf; @@ -27,6 +28,7 @@ libmvec { _ZGVbN4v_coshf; _ZGVcN8v_coshf; _ZGVdN8v_coshf; _ZGVeN16v_coshf; _ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f; _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; + _ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f; _ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf; } } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index ac70f15208..f05ece8c8a 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1395,6 +1395,26 @@ float: 1 float128: 3 ldouble: 4 +Function: "expm1_vlen16": +float: 1 + +Function: "expm1_vlen2": +double: 1 + +Function: "expm1_vlen4": +double: 1 +float: 1 + +Function: "expm1_vlen4_avx2": +double: 1 + +Function: "expm1_vlen8": +double: 1 +float: 1 + +Function: "expm1_vlen8_avx2": +float: 1 + Function: "gamma": double: 4 float: 7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core-sse2.S new file mode 100644 index 0000000000..e8cb6faaca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized expm1, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_expm1 _ZGVbN2v_expm1_sse2 +#include "../svml_d_expm12_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core.c new file mode 100644 index 0000000000..9c794e932e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized expm1, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_expm1 +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_expm1, __GI__ZGVbN2v_expm1, __redirect__ZGVbN2v_expm1) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core_sse4.S new file mode 100644 index 0000000000..db763e3856 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_expm12_core_sse4.S @@ -0,0 +1,421 @@ +/* Function expm1 vectorized with SSE4 |
