diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 08:41:18 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2021-12-29 11:37:29 -0800 |
| commit | 3fc9ccc20b6d0d5e4517d2e766f14ce780a228a5 (patch) | |
| tree | 21dfad9c847f54ca417c735e09b95f7f6023d690 | |
| parent | 37475ba88303929e85704693455c7294e50aba77 (diff) | |
| download | glibc-3fc9ccc20b6d0d5e4517d2e766f14ce780a228a5.tar.xz glibc-3fc9ccc20b6d0d5e4517d2e766f14ce780a228a5.zip | |
x86-64: Add vector exp2/exp2f implementation to libmvec
Implement vectorized exp2/exp2f containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI. It also contains
accuracy and ABI tests for vector exp2/exp2f with regenerated ulps.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
50 files changed, 2293 insertions, 1 deletions
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index adf65f6bc2..36d6643eb9 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -142,4 +142,15 @@ #define __DECL_SIMD_hypotf32x #define __DECL_SIMD_hypotf64x #define __DECL_SIMD_hypotf128x + +#define __DECL_SIMD_exp2 +#define __DECL_SIMD_exp2f +#define __DECL_SIMD_exp2l +#define __DECL_SIMD_exp2f16 +#define __DECL_SIMD_exp2f32 +#define __DECL_SIMD_exp2f64 +#define __DECL_SIMD_exp2f128 +#define __DECL_SIMD_exp2f32x +#define __DECL_SIMD_exp2f64x +#define __DECL_SIMD_exp2f128x #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 2ed820a0dc..645088cbf3 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -127,7 +127,7 @@ __MATHCALL (logb,, (_Mdouble_ __x)); #ifdef __USE_ISOC99 /* Compute base-2 exponential of X. */ -__MATHCALL (exp2,, (_Mdouble_ __x)); +__MATHCALL_VEC (exp2,, (_Mdouble_ __x)); /* Compute base-2 logarithm of X. */ __MATHCALL (log2,, (_Mdouble_ __x)); diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index 12bb03245b..1717f2dee9 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -49,32 +49,40 @@ GLIBC_2.22 _ZGVeN8vvv_sincos F GLIBC_2.35 _ZGVbN2v_acos F GLIBC_2.35 _ZGVbN2v_asin F GLIBC_2.35 _ZGVbN2v_atan F +GLIBC_2.35 _ZGVbN2v_exp2 F GLIBC_2.35 _ZGVbN2vv_hypot F GLIBC_2.35 _ZGVbN4v_acosf F GLIBC_2.35 _ZGVbN4v_asinf F GLIBC_2.35 _ZGVbN4v_atanf F +GLIBC_2.35 _ZGVbN4v_exp2f F GLIBC_2.35 _ZGVbN4vv_hypotf F GLIBC_2.35 _ZGVcN4v_acos F GLIBC_2.35 _ZGVcN4v_asin F GLIBC_2.35 _ZGVcN4v_atan F +GLIBC_2.35 _ZGVcN4v_exp2 F GLIBC_2.35 _ZGVcN4vv_hypot F GLIBC_2.35 _ZGVcN8v_acosf F GLIBC_2.35 _ZGVcN8v_asinf F GLIBC_2.35 _ZGVcN8v_atanf F +GLIBC_2.35 _ZGVcN8v_exp2f F GLIBC_2.35 _ZGVcN8vv_hypotf F GLIBC_2.35 _ZGVdN4v_acos F GLIBC_2.35 _ZGVdN4v_asin F GLIBC_2.35 _ZGVdN4v_atan F +GLIBC_2.35 _ZGVdN4v_exp2 F GLIBC_2.35 _ZGVdN4vv_hypot F GLIBC_2.35 _ZGVdN8v_acosf F GLIBC_2.35 _ZGVdN8v_asinf F GLIBC_2.35 _ZGVdN8v_atanf F +GLIBC_2.35 _ZGVdN8v_exp2f F GLIBC_2.35 _ZGVdN8vv_hypotf F GLIBC_2.35 _ZGVeN16v_acosf F GLIBC_2.35 _ZGVeN16v_asinf F GLIBC_2.35 _ZGVeN16v_atanf F +GLIBC_2.35 _ZGVeN16v_exp2f F GLIBC_2.35 _ZGVeN16vv_hypotf F GLIBC_2.35 _ZGVeN8v_acos F GLIBC_2.35 _ZGVeN8v_asin F GLIBC_2.35 _ZGVeN8v_atan F +GLIBC_2.35 _ZGVeN8v_exp2 F GLIBC_2.35 _ZGVeN8vv_hypot F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 437977c5fd..c7a972521b 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -74,6 +74,10 @@ # define __DECL_SIMD_hypot __DECL_SIMD_x86_64 # undef __DECL_SIMD_hypotf # define __DECL_SIMD_hypotf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_exp2 +# define __DECL_SIMD_exp2 __DECL_SIMD_x86_64 +# undef __DECL_SIMD_exp2f +# define __DECL_SIMD_exp2f __DECL_SIMD_x86_64 # endif #endif diff --git a/sysdeps/x86/fpu/finclude/math-vector-fortran.h b/sysdeps/x86/fpu/finclude/math-vector-fortran.h index cda31479a6..0994e6dfac 100644 --- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h +++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h @@ -36,6 +36,8 @@ !GCC$ builtin (asinf) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (hypot) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (hypotf) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (exp2) attributes simd (notinbranch) if('x86_64') +!GCC$ builtin (exp2f) attributes simd (notinbranch) if('x86_64') !GCC$ builtin (cos) attributes simd (notinbranch) if('x32') !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32') @@ -57,3 +59,5 @@ !GCC$ builtin (asinf) attributes simd (notinbranch) if('x32') !GCC$ builtin (hypot) attributes simd (notinbranch) if('x32') !GCC$ builtin (hypotf) attributes simd (notinbranch) if('x32') +!GCC$ builtin (exp2) attributes simd (notinbranch) if('x32') +!GCC$ builtin (exp2f) attributes simd (notinbranch) if('x32') diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig index 7769a02731..03b2364417 100644 --- a/sysdeps/x86_64/fpu/Makeconfig +++ b/sysdeps/x86_64/fpu/Makeconfig @@ -27,6 +27,7 @@ libmvec-funcs = \ atan \ cos \ exp \ + exp2 \ hypot \ log \ pow \ diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index e359e5dc2c..12b7ad1830 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -17,10 +17,12 @@ libmvec { _ZGVbN2v_acos; _ZGVcN4v_acos; _ZGVdN4v_acos; _ZGVeN8v_acos; _ZGVbN2v_asin; _ZGVcN4v_asin; _ZGVdN4v_asin; _ZGVeN8v_asin; _ZGVbN2v_atan; _ZGVcN4v_atan; _ZGVdN4v_atan; _ZGVeN8v_atan; + _ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2; _ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot; _ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf; _ZGVbN4v_asinf; _ZGVcN8v_asinf; _ZGVdN8v_asinf; _ZGVeN16v_asinf; _ZGVbN4v_atanf; _ZGVcN8v_atanf; _ZGVdN8v_atanf; _ZGVeN16v_atanf; + _ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f; _ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf; } } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index a7513ec94e..bc4479ad39 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1276,6 +1276,26 @@ float: 1 float128: 2 ldouble: 1 +Function: "exp2_vlen16": +float: 1 + +Function: "exp2_vlen2": +double: 1 + +Function: "exp2_vlen4": +double: 1 +float: 1 + +Function: "exp2_vlen4_avx2": +double: 1 + +Function: "exp2_vlen8": +double: 1 +float: 1 + +Function: "exp2_vlen8_avx2": +float: 1 + Function: "exp_downward": double: 1 float: 1 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core-sse2.S new file mode 100644 index 0000000000..330260baaa --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized exp2, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_exp2 _ZGVbN2v_exp2_sse2 +#include "../svml_d_exp22_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core.c new file mode 100644 index 0000000000..e0cf198030 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp2, vector length is 2. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_exp2 +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_exp2, __GI__ZGVbN2v_exp2, __redirect__ZGVbN2v_exp2) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core_sse4.S new file mode 100644 index 0000000000..7388c242f6 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp22_core_sse4.S @@ -0,0 +1,325 @@ +/* Function exp2 vectorized with SSE4. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + https://www.gnu.org/licenses/. */ + +/* + * ALGORITHM DESCRIPTION: + * + * exp2(x) = 2^n * T[j] * (1 + P(y)) + * where + * x = m*(1/K) + y, y in [-1/K..1/K] + * m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] + * |
