diff options
| author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-10-25 15:21:54 -0300 |
|---|---|---|
| committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-11-01 11:27:40 -0300 |
| commit | c28f8d7f1943433b1673369d7432cec8abe9ca03 (patch) | |
| tree | 85bb4b49db6610d548e908246098091847c42514 | |
| parent | f338c7c5f526a86be2de7205d1e0876ff02e2087 (diff) | |
| download | glibc-c28f8d7f1943433b1673369d7432cec8abe9ca03.tar.xz glibc-c28f8d7f1943433b1673369d7432cec8abe9ca03.zip | |
x86_64: Add exp10m1f with FMA
The CORE-MATH exp10m1f implementation showed slight worse latency
when using x86_64 baseline ABI. This patch adds a ifunc variant
with similar performance for x86_64-v3.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
| -rw-r--r-- | sysdeps/ieee754/flt-32/s_exp10m1f.c | 2 | ||||
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/Makefile | 2 | ||||
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c | 4 | ||||
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c | 33 |
4 files changed, 41 insertions, 0 deletions
diff --git a/sysdeps/ieee754/flt-32/s_exp10m1f.c b/sysdeps/ieee754/flt-32/s_exp10m1f.c index 60b8348819..04a068ee32 100644 --- a/sysdeps/ieee754/flt-32/s_exp10m1f.c +++ b/sysdeps/ieee754/flt-32/s_exp10m1f.c @@ -222,4 +222,6 @@ __exp10m1f (float x) return (s - 1.0) + w * c0; } } +#ifndef __exp10m1f libm_alias_float (__exp10m1, exp10m1) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index cbe09d49f4..dcff4df2f1 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -11,6 +11,7 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2 CFLAGS-e_exp2f-fma.c = -mfma -mavx2 CFLAGS-e_expf-fma.c = -mfma -mavx2 @@ -72,6 +73,7 @@ libm-sysdep_routines += \ s_ceilf-sse4_1 \ s_cosf-fma \ s_cosf-sse2 \ + s_exp10m1f-fma \ s_expm1-fma \ s_floor-sse4_1 \ s_floorf-sse4_1 \ diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c new file mode 100644 index 0000000000..3dda04e2dd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c @@ -0,0 +1,4 @@ +#define __exp10m1f __exp10m1f_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c new file mode 100644 index 0000000000..8040b7ed79 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c @@ -0,0 +1,33 @@ +/* Multiple versions of exp10m1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include <libm-alias-float.h> + +extern float __redirect_exp10m1f (float); + +# define SYMBOL_NAME exp10m1f +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ()); +libm_alias_float (__exp10m1, exp10m1) + +# define __exp10m1f __exp10m1f_sse2 +#endif +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> |
