aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-24 20:19:17 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-24 20:19:17 -0400
commitaf968f62f24c5c0ef4e7e5ab41acae946908c112 (patch)
treee1e0570eeb00c434cc751cbadfbeae150eeea11a
parent58985aa92f57ff46e96b32388ce65e7fdd8c8b9e (diff)
downloadglibc-af968f62f24c5c0ef4e7e5ab41acae946908c112.tar.xz
glibc-af968f62f24c5c0ef4e7e5ab41acae946908c112.zip
Optimize accurate 64-bit routines for FMA4 on x86-64
-rw-r--r--ChangeLog57
-rw-r--r--config.make.in1
-rwxr-xr-xconfigure2
-rw-r--r--configure.in1
-rw-r--r--math/Makefile2
-rw-r--r--sysdeps/ieee754/dbl-64/dosincos.c23
-rw-r--r--sysdeps/ieee754/dbl-64/e_asin.c4
-rw-r--r--sysdeps/ieee754/dbl-64/e_atan2.c13
-rw-r--r--sysdeps/ieee754/dbl-64/e_exp.c2
-rw-r--r--sysdeps/ieee754/dbl-64/e_log.c2
-rw-r--r--sysdeps/ieee754/dbl-64/e_pow.c2
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.c58
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.h27
-rw-r--r--sysdeps/ieee754/dbl-64/mpsqrt.c15
-rw-r--r--sysdeps/ieee754/dbl-64/s_atan.c16
-rw-r--r--sysdeps/ieee754/dbl-64/s_sin.c210
-rw-r--r--sysdeps/ieee754/dbl-64/sincostab.c (renamed from sysdeps/ieee754/dbl-64/sincos.tbl)9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile32
-rw-r--r--sysdeps/x86_64/fpu/multiarch/brandred-fma4.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/doasin-fma4.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_asin.c23
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_atan2.c16
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp.c15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log-fma4.c7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log.c15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_pow.c15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpa-fma4.c10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mplog-fma4.c7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mptan-fma4.c6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan.c14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c11
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sin.c22
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan.c14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c10
48 files changed, 595 insertions, 177 deletions
diff --git a/ChangeLog b/ChangeLog
index d4052a67f4..a2e155ab8c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,62 @@
2011-10-24 Ulrich Drepper <drepper@gmail.com>
+ * config.make.in: Add have-mfma4 entry.
+ * configure.in: Substitute libc_cv_cc_fma4.
+ * math/Makefile (dbl-only-routines): Add sincostab.
+ * sysdeps/ieee754/dbl-64/dosincos.c: Don't include sincos.tbl.
+ Use __sincostab not sincos.
+ * sysdeps/ieee754/dbl-64/e_asin.c: Don't define aliases when function
+ name is a macro.
+ * sysdeps/ieee754/dbl-64/e_exp.c: Likewise.
+ * sysdeps/ieee754/dbl-64/e_log.c: Likewise.
+ * sysdeps/ieee754/dbl-64/e_pow.c: Likewise.
+ * sysdeps/ieee754/dbl-64/e_atan2.c: Likewise. Define singArctan2
+ using __copysign.
+ * sysdeps/ieee754/dbl-64/mpa.c: Don't export __acr. Don't define
+ __cr and __cpymn. Define __cpy unless NO___CPY is defined. Define
+ norm, denorm, and __mp_dbl unless NO___MP_DBL is defined.
+ * sysdeps/ieee754/dbl-64/mpa.h: Don't declare __acr, __cr, __cpymn,
+ and __inv.
+ * sysdeps/ieee754/dbl-64/mpsqrt.c: Make fastiroot static.
+ * sysdeps/ieee754/dbl-64/s_atan.c: Define __signArctan using
+ __copysign.
+ * sysdeps/ieee754/dbl-64/s_sin.c: Use __sincostab not sincos. Don't
+ define aliases when function name is a macro.
+ * sysdeps/ieee754/dbl-64/sincostab.c: Renamed from
+ sysdeps/ieee754/dbl-64/sincos.tbl.
+ * sysdeps/x86_64/fpu/multiarch/Makefile: Add entries to build
+ fma4-enabled routines.
+ * sysdeps/x86_64/fpu/multiarch/brandred-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/doasin-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_asin.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_atan2.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_exp.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_log-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_log.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/e_pow.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mpa-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mplog-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/mptan-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/s_atan.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/s_sin.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/s_tan.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: New file.
+ * sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: New file.
+
* sysdeps/ieee754/dbl-64/doasin.c: Adjust for DLA_FMA -> DLA_FMS
rename.
* sysdeps/ieee754/dbl-64/dosincos.c: Likewise.
diff --git a/config.make.in b/config.make.in
index 2181d05ce3..d2baf6d3a9 100644
--- a/config.make.in
+++ b/config.make.in
@@ -59,6 +59,7 @@ have-cpp-asm-debuginfo = @libc_cv_cpp_asm_debuginfo@
enable-check-abi = @enable_check_abi@
have-forced-unwind = @libc_cv_forced_unwind@
have-fpie = @libc_cv_fpie@
+have-mfma4 = @libc_cv_cc_fma4@
gnu89-inline-CFLAGS = @gnu89_inline@
have-ssp = @libc_cv_ssp@
have-selinux = @have_selinux@
diff --git a/configure b/configure
index ec1d6514a4..55cb008354 100755
--- a/configure
+++ b/configure
@@ -623,6 +623,7 @@ elf
ldd_rewrite_script
use_ldconfig
libc_cv_as_i686
+libc_cv_cc_fma4
libc_cv_cc_novzeroupper
libc_cv_cc_avx
libc_cv_cc_sse4
@@ -7944,6 +7945,7 @@ fi
+
if test $elf = yes; then
$as_echo "#define HAVE_ELF 1" >>confdefs.h
diff --git a/configure.in b/configure.in
index 6977fe1015..9678cbe712 100644
--- a/configure.in
+++ b/configure.in
@@ -2339,6 +2339,7 @@ AC_SUBST(libc_cv_cpp_asm_debuginfo)
AC_SUBST(libc_cv_cc_sse4)
AC_SUBST(libc_cv_cc_avx)
AC_SUBST(libc_cv_cc_novzeroupper)
+AC_SUBST(libc_cv_cc_fma4)
AC_SUBST(libc_cv_as_i686)
AC_SUBST(use_ldconfig)
diff --git a/math/Makefile b/math/Makefile
index 431eb5aa4b..41340da1bd 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -66,7 +66,7 @@ include ../Makeconfig
dbl-only-routines := branred doasin dosincos halfulp mpa mpatan2 \
mpatan mpexp mplog mpsqrt mptan sincos32 slowexp \
- slowpow
+ slowpow sincostab
libm-routines = $(strip $(libm-support) $(libm-calls) \
$(patsubst %_rf,%f_r,$(libm-calls:=f)) \
$(long-m-$(long-double-fcts))) \
diff --git a/sysdeps/ieee754/dbl-64/dosincos.c b/sysdeps/ieee754/dbl-64/dosincos.c
index d5c6a14053..712d585b9e 100644
--- a/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/sysdeps/ieee754/dbl-64/dosincos.c
@@ -35,11 +35,16 @@
#include "endian.h"
#include "mydefs.h"
-#include "sincos.tbl"
#include <dla.h>
#include "dosincos.h"
#include "math_private.h"
+extern const union
+{
+ int4 i[880];
+ double x[440];
+} __sincostab attribute_hidden;
+
/***********************************************************************/
/* Routine receive Double-Length number (x+dx) and computing sin(x+dx) */
/* as Double-Length number and store it at array v .It computes it by */
@@ -66,10 +71,10 @@ void __dubsin(double x, double dx, double v[]) {
dd=(x-d)+dx;
/* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
- sn=sincos.x[k]; /* */
- ssn=sincos.x[k+1]; /* sin(Xi) and cos(Xi) */
- cs=sincos.x[k+2]; /* */
- ccs=sincos.x[k+3]; /* */
+ sn=__sincostab.x[k]; /* */
+ ssn=__sincostab.x[k+1]; /* sin(Xi) and cos(Xi) */
+ cs=__sincostab.x[k+2]; /* */
+ ccs=__sincostab.x[k+3]; /* */
MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* Taylor */
ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* series */
@@ -118,10 +123,10 @@ void __dubcos(double x, double dx, double v[]) {
d=x+dx;
dd=(x-d)+dx; /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
- sn=sincos.x[k]; /* */
- ssn=sincos.x[k+1]; /* sin(Xi) and cos(Xi) */
- cs=sincos.x[k+2]; /* */
- ccs=sincos.x[k+3]; /* */
+ sn=__sincostab.x[k]; /* */
+ ssn=__sincostab.x[k+1]; /* sin(Xi) and cos(Xi) */
+ cs=__sincostab.x[k+2]; /* */
+ ccs=__sincostab.x[k+3]; /* */
MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c
index 02efb7ad2e..cd4cc2e2c2 100644
--- a/sysdeps/ieee754/dbl-64/e_asin.c
+++ b/sysdeps/ieee754/dbl-64/e_asin.c
@@ -324,7 +324,9 @@ double __ieee754_asin(double x){
return u.x/v.x; /* NaN */
}
}
+#ifndef __ieee754_asin
strong_alias (__ieee754_asin, __asin_finite)
+#endif
/*******************************************************************/
/* */
@@ -636,4 +638,6 @@ double __ieee754_acos(double x)
return u.x/v.x;
}
}
+#ifndef __ieee754_acos
strong_alias (__ieee754_acos, __acos_finite)
+#endif
diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c
index 264791e0f9..9caacccf4c 100644
--- a/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -51,7 +51,11 @@
/* round to nearest mode of IEEE 754 standard. */
/************************************************************************/
static double atan2Mp(double ,double ,const int[]);
-static double signArctan2(double ,double);
+ /* Fix the sign and return after stage 1 or stage 2 */
+static double signArctan2(double y,double z)
+{
+ return __copysign(z, y);
+}
static double normalized(double ,double,double ,double);
void __mpatan2(mp_no *,mp_no *,mp_no *,int);
@@ -375,7 +379,9 @@ double __ieee754_atan2(double y,double x) {
}
}
}
+#ifndef __ieee754_atan2
strong_alias (__ieee754_atan2, __atan2_finite)
+#endif
/* Treat the Denormalized case */
static double normalized(double ax,double ay,double y, double z)
@@ -387,11 +393,6 @@ static double normalized(double ax,double ay,double y, double z)
__sub(&mpz,&mperr,&mpz2,p); __mp_dbl(&mpz2,&z,p);
return signArctan2(y,z);
}
- /* Fix the sign and return after stage 1 or stage 2 */
-static double signArctan2(double y,double z)
-{
- return ((y<ZERO) ? -z : z);
-}
/* Stage 3: Perform a multi-Precision computation */
static double atan2Mp(double x,double y,const int pr[])
{
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index f4b34a6363..48bbb05ed8 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -145,7 +145,9 @@ double __ieee754_exp(double x) {
else return __slowexp(x);
}
}
+#ifndef __ieee754_exp
strong_alias (__ieee754_exp, __exp_finite)
+#endif
/************************************************************************/
/* Compute e^(x+xx)(Double-Length number) .The routine also receive */
diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c
index b7df81b488..7a0a26f251 100644
--- a/sysdeps/ieee754/dbl-64/e_log.c
+++ b/sysdeps/ieee754/dbl-64/e_log.c
@@ -207,4 +207,6 @@ double __ieee754_log(double x) {
}
return y1;
}
+#ifndef __ieee754_log
strong_alias (__ieee754_log, __log_finite)
+#endif
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 0c7abb6eeb..94b1ab8961 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -153,7 +153,9 @@ double __ieee754_pow(double x, double y) {
if (y<0) return (x<1.0)?INF.x:0;
return 0; /* unreachable, to make the compiler happy */
}
+#ifndef __ieee754_pow
strong_alias (__ieee754_pow, __pow_finite)
+#endif
/**************************************************************************/
/* Computing x^y using more accurate but more slow log routine */
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 68647ba335..ad5a639c4b 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -1,8 +1,7 @@
-
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software F