diff options
| author | Joe Ramsay <Joe.Ramsay@arm.com> | 2023-10-05 17:10:48 +0100 |
|---|---|---|
| committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2023-10-23 15:00:44 +0100 |
| commit | f554334c05a95c6b4df532ddc88cd3e72dc7d04c (patch) | |
| tree | 1ee426aaf6fbc68b5e7cb27286c8396738df9bc4 /sysdeps | |
| parent | 2aa0974d2573441bffd596b07bff8698b1f2f18c (diff) | |
| download | glibc-f554334c05a95c6b4df532ddc88cd3e72dc7d04c.tar.xz glibc-f554334c05a95c6b4df532ddc88cd3e72dc7d04c.zip | |
aarch64: Add vector implementations of tan routines
This includes some utility headers for evaluating polynomials using
various schemes.
Diffstat (limited to 'sysdeps')
| -rw-r--r-- | sysdeps/aarch64/fpu/Makefile | 3 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/Versions | 6 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/bits/math-vector.h | 4 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/poly_advsimd_f32.h | 36 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/poly_advsimd_f64.h | 36 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/poly_generic.h | 285 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/poly_sve_f32.h | 38 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/poly_sve_f64.h | 38 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/poly_sve_generic.h | 313 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/tan_advsimd.c | 123 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/tan_sve.c | 104 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/tanf_advsimd.c | 129 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/tanf_sve.c | 118 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c | 1 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/test-double-sve-wrappers.c | 1 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c | 1 | ||||
| -rw-r--r-- | sysdeps/aarch64/fpu/test-float-sve-wrappers.c | 1 | ||||
| -rw-r--r-- | sysdeps/aarch64/libm-test-ulps | 8 | ||||
| -rw-r--r-- | sysdeps/unix/sysv/linux/aarch64/libmvec.abilist | 4 |
19 files changed, 1248 insertions, 1 deletions
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index 04aa2e37ca..a1bbc9bcaa 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -1,7 +1,8 @@ libmvec-supported-funcs = cos \ exp \ log \ - sin + sin \ + tan float-advsimd-funcs = $(libmvec-supported-funcs) double-advsimd-funcs = $(libmvec-supported-funcs) diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index c85c0f3efb..f0ca0940a9 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -17,4 +17,10 @@ libmvec { _ZGVsMxv_sin; _ZGVsMxv_sinf; } + GLIBC_2.39 { + _ZGVnN4v_tanf; + _ZGVnN2v_tan; + _ZGVsMxv_tanf; + _ZGVsMxv_tan; + } } diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index 7c200599c1..6193213147 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -53,11 +53,13 @@ __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t); __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t); # undef __ADVSIMD_VEC_MATH_SUPPORTED #endif /* __ADVSIMD_VEC_MATH_SUPPORTED */ @@ -68,11 +70,13 @@ __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_expf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_exp (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_tan (__sv_f64_t, __sv_bool_t); # undef __SVE_VEC_MATH_SUPPORTED #endif /* __SVE_VEC_MATH_SUPPORTED */ diff --git a/sysdeps/aarch64/fpu/poly_advsimd_f32.h b/sysdeps/aarch64/fpu/poly_advsimd_f32.h new file mode 100644 index 0000000000..9e2ad9ad94 --- /dev/null +++ b/sysdeps/aarch64/fpu/poly_advsimd_f32.h @@ -0,0 +1,36 @@ +/* Helpers for evaluating polynomials on single-precision AdvSIMD input, using + various schemes. + + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_FPU_POLY_ADVSIMD_F32_H +#define AARCH64_FPU_POLY_ADVSIMD_F32_H + +#include <arm_neon.h> + +/* Wrap AdvSIMD f32 helpers: evaluation of some scheme/order has form: + v_[scheme]_[order]_f32. */ +#define VTYPE float32x4_t +#define FMA(x, y, z) vfmaq_f32 (z, x, y) +#define VWRAP(f) v_##f##_f32 +#include "poly_generic.h" +#undef VWRAP +#undef FMA +#undef VTYPE + +#endif diff --git a/sysdeps/aarch64/fpu/poly_advsimd_f64.h b/sysdeps/aarch64/fpu/poly_advsimd_f64.h new file mode 100644 index 0000000000..955cfc08ce --- /dev/null +++ b/sysdeps/aarch64/fpu/poly_advsimd_f64.h @@ -0,0 +1,36 @@ +/* Helpers for evaluating polynomials on double-precision AdvSIMD input, using + various schemes. + + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_FPU_POLY_ADVSIMD_F64_H +#define AARCH64_FPU_POLY_ADVSIMD_F64_H + +#include <arm_neon.h> + +/* Wrap AdvSIMD f64 helpers: evaluation of some scheme/order has form: + v_[scheme]_[order]_f64. */ +#define VTYPE float64x2_t +#define FMA(x, y, z) vfmaq_f64 (z, x, y) +#define VWRAP(f) v_##f##_f64 +#include "poly_generic.h" +#undef VWRAP +#undef FMA +#undef VTYPE + +#endif diff --git a/sysdeps/aarch64/fpu/poly_generic.h b/sysdeps/aarch64/fpu/poly_generic.h new file mode 100644 index 0000000000..84f042182b --- /dev/null +++ b/sysdeps/aarch64/fpu/poly_generic.h @@ -0,0 +1,285 @@ +/* Generic helpers for evaluating polynomials with various schemes. + + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#ifndef VTYPE +# error Cannot use poly_generic without defining VTYPE +#endif +#ifndef VWRAP +# error Cannot use poly_generic without defining VWRAP +#endif +#ifndef FMA +# error Cannot use poly_generic without defining FMA +#endif + +static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2, + const VTYPE *poly) +{ + /* At order 3, Estrin and Pairwise Horner are identical. */ + VTYPE p01 = FMA (poly[1], x, poly[0]); + VTYPE p23 = FMA (poly[3], x, poly[2]); + return FMA (p23, x2, p01); +} + +static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4, + const VTYPE *poly) +{ + VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); + return FMA (poly[4], x4, p03); +} +static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4, + const VTYPE *poly) +{ + VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); + VTYPE p45 = FMA (poly[5], x, poly[4]); + return FMA (p45, x4, p03); +} +static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4, + const VTYPE *poly) +{ + VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); + VTYPE p45 = FMA (poly[5], x, poly[4]); + VTYPE p46 = FMA (poly[6], x2, p45); + return FMA (p46, x4, p03); +} +static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4, + const VTYPE *poly) +{ + VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); + VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4); + return FMA (p47, x4, p03); +} +static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + VTYPE p89 = FMA (poly[9], x, poly[8]); + return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + VTYPE p89 = FMA (poly[9], x, poly[8]); + VTYPE p8_10 = FMA (poly[10], x2, p89); + return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8); + return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8, + VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8, + VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8, + VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + const VTYPE *poly) +{ + return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8, + VWRAP (estrin_7) (x, x2, x4, poly)); +} +static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + VTYPE x16, const VTYPE *poly) +{ + return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); +} +static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + VTYPE x16, const VTYPE *poly) +{ + VTYPE p16_17 = FMA (poly[17], x, poly[16]); + return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); +} +static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + VTYPE x16, const VTYPE *poly) +{ + VTYPE p16_17 = FMA (poly[17], x, poly[16]); + VTYPE p16_18 = FMA (poly[18], x2, p16_17); + return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); +} +static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, + VTYPE x16, const VTYPE *poly) +{ + VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16); + return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); +} + +static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly) +{ + VTYPE p = FMA (poly[3], x, poly[2]); + p = FMA (x, p, poly[1]); + p = FMA (x, p, poly[0]); + return p; +} +static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly) +{ + VTYPE p = FMA (poly[4], x, poly[3]); + p = FMA (x, p, poly[2]); + p = FMA (x, p, poly[1]); + p = FMA (x, p, poly[0]); + return p; +} +static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]); +} +static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly) +{ + return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]); +} + +static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p01 = FMA (poly[1], x, poly[0]); + VTYPE p23 = FMA (poly[3], x, poly[2]); + VTYPE p; + p = FMA (x2, poly[4], p23); + p = FMA (x2, p, p01); + return p; +} +static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p01 = FMA (poly[1], x, poly[0]); + VTYPE p23 = FMA (poly[3], x, poly[2]); + VTYPE p45 = FMA (poly[5], x, poly[4]); + VTYPE p; + p = FMA (x2, p45, p23); + p = FMA (x2, p, p01); + return p; +} +static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p26, p01); +} +static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p27, p01); +} +static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p28, p01); +} +static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p29, p01); +} +static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_10, p01); +} +static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_11, p01); +} +static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_12, p01); +} +static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_13, p01); +} +static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_14, p01); +} +static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_15, p01); +} +static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_16, p01); +} +static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_17, p01); +} +static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly) +{ + VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2); + VTYPE p01 = FMA (poly[1], x, poly[0]); + return FMA (x2, p2_18, p01); +} diff --git a/sysdeps/aarch64/fpu/poly_sve_f32.h b/sysdeps/aarch64/fpu/poly_sve_f32.h new file mode 100644 index 0000000000..dcf2fab8dd --- /dev/null +++ b/sysdeps/aarch64/fpu/poly_sve_f32.h @@ -0,0 +1,38 @@ +/* Helpers for evaluating polynomials on single-precision SVE input, using + various schemes. + + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_FPU_POLY_SVE_F32_H +#define AARCH64_FPU_POLY_SVE_F32_H + +#include <arm_sve.h> + +/* Wrap SVE f32 helpers: evaluation of some scheme/order has form: + sv_[scheme]_[order]_f32_x. */ +#define VTYPE svfloat32_t +#define STYPE float +#define VWRAP(f) sv_##f##_f32_x +#define DUP svdup_n_f32 +#include "poly_sve_generic.h" +#undef DUP +#undef VWRAP +#undef STYPE +#undef VTYPE + +#endif diff --git a/sysdeps/aarch64/fpu/poly_sve_f64.h b/sysdeps/aarch64/fpu/poly_sve_f64.h new file mode 100644 index 0000000000..97a0b76637 --- /dev/null +++ b/sysdeps/aarch64/fpu/poly_sve_f64.h @@ -0,0 +1,38 @@ +/* Helpers for evaluating polynomials on double-precision SVE input, using + various schemes. + + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_FPU_POLY_SVE_F64_H +#define AARCH64_FPU_POLY_SVE_F64_H + +#include <arm_sve.h> + +/* Wrap SVE f64 helpers: evaluation of some scheme/order has form: + sv_[scheme]_[order]_f64_x. */ +#define VTYPE svfloat64_t +#define STYPE double +#define VWRAP(f) sv_##f##_f64_x +#define DUP svdup_n_f64 +#include "poly_sve_generic.h" +#undef DUP +#undef VWRAP +#undef STYPE +#undef VTYPE + +#endif diff --git a/sysdeps/aarch64/fpu/poly_sve_generic.h b/sysdeps/aarch64/fpu/poly_sve_generic.h new file mode 100644 index 0000000000..0ecf5ce45b --- /dev/null +++ b/sysdeps/aarch64/fpu/poly_sve_generic.h @@ -0,0 +1,313 @@ +/* Helpers for evaluating polynomials with various schemes - specific to SVE + but precision-agnostic. |
