diff options
| author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-11-08 13:24:28 -0300 |
|---|---|---|
| committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-11-22 10:52:27 -0300 |
| commit | bccb0648ea29f89a7b1b64f3e5674d2338e3798e (patch) | |
| tree | 31849761a6d493625977c973db58c50669935fc9 | |
| parent | d846f4c12d7636efd5b7cff173456e616a185e24 (diff) | |
| download | glibc-bccb0648ea29f89a7b1b64f3e5674d2338e3798e.tar.xz glibc-bccb0648ea29f89a7b1b64f3e5674d2338e3798e.zip | |
math: Use tanf from CORE-MATH
The CORE-MATH implementation is correctly rounded (for any rounding mode)
and shows better performance to the generic tanf.
The code was adapted to glibc style, to use the definition of
math_config.h, to remove errno handling, and to use a generic
128 bit routine for ABIs that do not support it natively.
Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (neoverse1,
gcc 13.2.1), and powerpc (POWER10, gcc 13.2.1):
latency master patched improvement
x86_64 82.3961 54.8052 33.49%
x86_64v2 82.3415 54.8052 33.44%
x86_64v3 69.3661 50.4864 27.22%
i686 219.271 45.5396 79.23%
aarch64 29.2127 19.1951 34.29%
power10 19.5060 16.2760 16.56%
reciprocal-throughput master patched improvement
x86_64 28.3976 19.7334 30.51%
x86_64v2 28.4568 19.7334 30.65%
x86_64v3 21.1815 16.1811 23.61%
i686 105.016 15.1426 85.58%
aarch64 18.1573 10.7681 40.70%
power10 8.7207 8.7097 0.13%
Signed-off-by: Alexei Sibidanov <sibid@uvic.ca>
Signed-off-by: Paul Zimmermann <Paul.Zimmermann@inria.fr>
Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Reviewed-by: DJ Delorie <dj@redhat.com>
29 files changed, 321 insertions, 270 deletions
diff --git a/SHARED-FILES b/SHARED-FILES index 033ce7f092..9f4de81b66 100644 --- a/SHARED-FILES +++ b/SHARED-FILES @@ -288,3 +288,9 @@ sysdeps/ieee754/flt-32/e_lgammaf_r.c: - remove the errno stuff (this is done by the wrapper) - replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f) - add libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r) at the end +sysdeps/ieee754/flt-32/s_tanf.c: + (src/binary32/tan/tanf.c in CORE-MATH) + - The code was adapted to use glibc code style and internal + functions to handle errno, overflow, and underflow. It was changed + to use an internal wrapper for 128 bit unsigned integer operations + for ABIs that do not support the type natively. diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 1d3d1f9b6a..89b166b71b 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -1561,7 +1561,6 @@ float: 3 ldouble: 4 Function: "tan": -float: 1 ldouble: 1 Function: "tan_advsimd": @@ -1570,7 +1569,6 @@ float: 2 Function: "tan_downward": double: 1 -float: 2 ldouble: 1 Function: "tan_sve": @@ -1579,12 +1577,10 @@ float: 2 Function: "tan_towardzero": double: 1 -float: 1 ldouble: 1 Function: "tan_upward": double: 1 -float: 1 ldouble: 1 Function: "tanh": diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps index 7256e674bb..0f7628b75b 100644 --- a/sysdeps/alpha/fpu/libm-test-ulps +++ b/sysdeps/alpha/fpu/libm-test-ulps @@ -1342,22 +1342,18 @@ float: 3 ldouble: 4 Function: "tan": -float: 1 ldouble: 1 Function: "tan_downward": double: 1 -float: 2 ldouble: 1 Function: "tan_towardzero": double: 1 -float: 1 ldouble: 1 Function: "tan_upward": double: 1 -float: 1 ldouble: 1 Function: "tanh": diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps index 66a2b541c6..4d4b22db47 100644 --- a/sysdeps/arc/fpu/libm-test-ulps +++ b/sysdeps/arc/fpu/libm-test-ulps @@ -1081,19 +1081,15 @@ float: 3 Function: "tan": double: 1 -float: 1 Function: "tan_downward": double: 1 -float: 2 Function: "tan_towardzero": double: 1 -float: 2 Function: "tan_upward": double: 1 -float: 2 Function: "tanh": double: 3 diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps index 38836ddc38..4faf784aad 100644 --- a/sysdeps/arc/nofpu/libm-test-ulps +++ b/sysdeps/arc/nofpu/libm-test-ulps @@ -259,9 +259,6 @@ Function: "sinh": double: 2 float: 2 -Function: "tan": -float: 1 - Function: "tanh": double: 2 float: 2 diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps index 2651046cfa..c80122de79 100644 --- a/sysdeps/arm/libm-test-ulps +++ b/sysdeps/arm/libm-test-ulps @@ -1078,20 +1078,14 @@ Function: "sinh_upward": double: 3 float: 3 -Function: "tan": -float: 1 - Function: "tan_downward": double: 1 -float: 2 Function: "tan_towardzero": double: 1 -float: 1 Function: "tan_upward": double: 1 -float: 1 Function: "tanh": double: 2 diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps index 02b4cb4934..d67cfe1785 100644 --- a/sysdeps/csky/fpu/libm-test-ulps +++ b/sysdeps/csky/fpu/libm-test-ulps @@ -1000,20 +1000,14 @@ Function: "sinh_upward": double: 3 float: 3 -Function: "tan": -float: 1 - Function: "tan_downward": double: 1 -float: 2 Function: "tan_towardzero": double: 1 -float: 1 Function: "tan_upward": double: 1 -float: 1 Function: "tanh": double: 2 diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps index 34312f5a06..6cdf9fd034 100644 --- a/sysdeps/csky/nofpu/libm-test-ulps +++ b/sysdeps/csky/nofpu/libm-test-ulps @@ -1031,20 +1031,14 @@ Function: "sinh_upward": double: 3 float: 3 -Function: "tan": -float: 1 - Function: "tan_downward": double: 1 -float: 2 Function: "tan_towardzero": double: 1 -float: 1 Function: "tan_upward": double: 1 -float: 1 Function: "tanh": double: 2 diff --git a/sysdeps/generic/math_uint128.h b/sysdeps/generic/math_uint128.h new file mode 100644 index 0000000000..1251f598f7 --- /dev/null +++ b/sysdeps/generic/math_uint128.h @@ -0,0 +1,150 @@ +/* Internal 128 bit int support. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _MATH_INT128_H +#define _MATH_INT128_H + +/* Limited support for internal 128 bit integer, used on some math + implementations. It uses compiler builtin type if supported, otherwise + it is emulated. Only unsigned and some operations are currently supported: + + - u128_t: the 128 bit unsigned type. + - u128_high: return the high part of the number. + - u128_low: return the low part of the number. + - u128_from_u64: create a 128 bit number from a 64 bit one. + - u128_mul: multiply two 128 bit numbers. + - u128_add: add two 128 bit numbers. + - u128_lshift: left shift a number. + - u128_rshift: right shift a number. + */ + +#if defined __BITINT_MAXWIDTH__ && __BITINT_MAXWIDTH__ >= 128 +typedef unsigned _BitInt(128) u128; +# define __MATH_INT128_BUILTIN_TYPE 1 +#elif defined __SIZEOF_INT128__ +typedef unsigned __int128 u128; +# define __MATH_INT128_BUILTIN_TYPE 1 +#else +# define __MATH_INT128_BUILTIN_TYPE 0 +#endif + +#if __MATH_INT128_BUILTIN_TYPE +# define u128_high(__x) (uint64_t)((__x) >> 64) +# define u128_low(__x) (uint64_t)(__x) +# define u128_from_u64(__x) (u128)(__x) +# define u128_mul(__x, __y) (__x) * (__y) +# define u128_add(__x, __y) (__x) + (__y) +# define u128_lshift(__x, __y) (__x) << (__y) +# define u128_rshift(__x, __y) (__x) >> (__y) +#else +typedef struct +{ + uint64_t low; + uint64_t high; +} u128; + +# define u128_high(__x) (__x).high +# define u128_low(__x) (__x).low +# define u128_from_u64(__x) (u128){.low = (__x), .high = 0} + +# define MASK32 (UINT64_C(0xffffffff)) + +static u128 u128_add (u128 x, u128 y) +{ + bool carry = x.low + y.low < x.low; + return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low }; +} + +static u128 u128_lshift (u128 x, unsigned int n) +{ + switch (n) + { + case 0: return x; + case 1 ... 63: return (u128) { .high = (x.high << n) | (x.low >> (64 - n)), + .low = x.low << n }; + case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0}; + default: return (u128) { .high = 0, .low = 0 }; + } +} + +static u128 u128_rshift (u128 x, unsigned int n) +{ + switch (n) + { + case 0: return x; + case 1 ... 63: return (u128) { .high = x.high >> n, + .low = (x.high << (64 - n)) | (x.low >> n) }; + case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) }; + default: return (u128) { .high = 0, .low = 0 }; + } +} + +static u128 u128_mul (u128 x, u128 y) +{ + if (x.high == 0 && y.high == 0) + { + uint64_t x0 = x.low & MASK32; + uint64_t x1 = x.low >> 32; + uint64_t y0 = y.low & MASK32; + uint64_t y1 = y.low >> 32; + u128 x0y0 = { .high = 0, .low = x0 * y0 }; + u128 x0y1 = { .high = 0, .low = x0 * y1 }; + u128 x1y0 = { .high = 0, .low = x1 * y0 }; + u128 x1y1 = { .high = x1 * y1, .low = 0 }; + /* x0y0 + ((x0y1 + x1y0) << 32) + x1y1 */ + return u128_add (u128_add (x0y0, + u128_lshift (u128_add (x0y1, x1y0), + 32)), + x1y1); + } + else + { + uint64_t x0 = x.low & MASK32; + uint64_t x1 = x.low >> 32; + uint64_t x2 = x.high & MASK32; + uint64_t x3 = x.high >> 32; + uint64_t y0 = y.low & MASK32; + uint64_t y1 = y.low >> 32; + uint64_t y2 = y.high & MASK32; + uint64_t y3 = y.high >> 32; + u128 x0y0 = { .high = 0, .low = x0 * y0 }; + u128 x0y1 = { .high = 0, .low = x0 * y1 }; + u128 x0y2 = { .high = 0, .low = x0 * y2 }; + u128 x0y3 = { .high = 0, .low = x0 * y3 }; + u128 x1y0 = { .high = 0, .low = x1 * y0 }; + u128 x1y1 = { .high = 0, .low = x1 * y1 }; + u128 x1y2 = { .high = 0, .low = x1 * y2 }; + u128 x2y0 = { .high = 0, .low = x2 * y0 }; + u128 x2y1 = { .high = 0, .low = x2 * y1 }; + u128 x3y0 = { .high = 0, .low = x3 * y0 }; + /* x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) + + ((x0y3 + x1y2 + x2y1 + x3y0) << 96) */ + u128 r0 = u128_add (x0y0, + u128_lshift (u128_add (x0y1, x1y0), + 32)); + u128 r1 = u128_add (u128_lshift (u128_add (u128_add (x0y2, x1y1), x2y0), + 64), + u128_lshift (u128_add (u128_add (x0y3, x1y2), + u128_add (x2y1, x3y0)), + 96)); + return u128_add (r0, r1); + } +} +#endif /* __SIZEOF_INT128__ */ + +#endif diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps index 976661541b..9ed2204d38 100644 --- a/sysdeps/hppa/fpu/libm-test-ulps +++ b/sysdeps/hppa/fpu/libm-test-ulps @@ -1107,20 +1107,16 @@ float: 3 Function: "tan": double: 1 -float: 1 ldouble: 1 Function: "tan_downward": double: 1 -float: 2 Function: "tan_towardzero": double: 1 -float: 1 Function: "tan_upward": double: 1 -float: 1 Function: "tanh": double: 2 diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index 170e7cfc65..c06da68b45 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -1614,25 +1614,21 @@ float128: 4 ldouble: 5 Function: "tan": -float: 1 float128: 1 ldouble: 2 Function: "tan_downward": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "tan_towardzero": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "tan_upward": double: 1 -float: 2 float128: 1 ldouble: 2 diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps index a9ce632e6a..43ffbd7978 100644 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps @@ -1619,25 +1619,21 @@ float128: 4 ldouble: 5 Function: "tan": -float: 1 float128: 1 ldouble: 2 Function: "tan_downward": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "tan_towardzero": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "tan_upward": double: 1 -float: 2 float128: 1 ldouble: 2 diff --git a/sysdeps/ieee754/flt-32/k_tanf.c b/sysdeps/ieee754/flt-32/k_tanf.c index e1c9d14104..1cc8931700 100644 --- a/sysdeps/ieee754/flt-32/k_tanf.c +++ b/sysdeps/ieee754/flt-32/k_tanf.c @@ -1,101 +1 @@ -/* k_tanf.c -- float version of k_tan.c - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: k_tanf.c,v 1.4 1995/05/10 20:46:39 jtc Exp $"; -#endif - -#include <float.h> -#include <math.h> -#include <math_private.h> -#include <math-underflow.h> -static const float -one = 1.0000000000e+00, /* 0x3f800000 */ -pio4 = 7.8539812565e-01, /* 0x3f490fda */ -pio4lo= 3.7748947079e-08, /* 0x33222168 */ -T[] = { - 3.3333334327e-01, /* 0x3eaaaaab */ - 1.3333334029e-01, /* 0x3e088889 */ - 5.3968254477e-02, /* 0x3d5d0dd1 */ - 2.1869488060e-02, /* 0x3cb327a4 */ - 8.8632395491e-03, /* 0x3c11371f */ - 3.5920790397e-03, /* 0x3b6b6916 */ - 1.4562094584e-03, /* 0x3abede48 */ - 5.8804126456e-04, /* 0x3a1a26c8 */ - 2.4646313977e-04, /* 0x398137b9 */ - 7.8179444245e-05, /* 0x38a3f445 */ - 7.1407252108e-05, /* 0x3895c07a */ - -1.8558637748e-05, /* 0xb79bae5f */ - 2.5907305826e-05, /* 0x37d95384 */ -}; - -float __kernel_tanf(float x, float y, int iy) -{ - float z,r,v,w,s; - int32_t ix,hx; - GET_FLOAT_WORD(hx,x); - ix = hx&0x7fffffff; /* high word of |x| */ - if(ix<0x39000000) /* x < 2**-13 */ - {if((int)x==0) { /* generate inexact */ - if((ix|(iy+1))==0) return one/fabsf(x); - else if (iy == 1) - { - math_check_force_underflow (x); - return x; - } - else - return -one / x; - } - } - if(ix>=0x3f2ca140) { /* |x|>=0.6744 */ - if(hx<0) {x |
