From ee6189855aab3a9be8f3c2d95ce2b2cd17db4ec2 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 31 Mar 2005 10:02:53 +0000 Subject: * sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally equivalent, but shorter instructions. * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise. * sysdeps/unix/x86_64/sysdep.S: Likewise. * sysdeps/x86_64/strchr.S: Likewise. * sysdeps/x86_64/memset.S: Likewise. * sysdeps/x86_64/strcspn.S: Likewise. * sysdeps/x86_64/strcmp.S: Likewise. * sysdeps/x86_64/elf/start.S: Likewise. * sysdeps/x86_64/strspn.S: Likewise. * sysdeps/x86_64/dl-machine.h: Likewise. * sysdeps/x86_64/bsd-_setjmp.S: Likewise. * sysdeps/x86_64/bsd-setjmp.S: Likewise. * sysdeps/x86_64/strtok.S: Likewise. --- sysdeps/ia64/fpu/Makefile | 5 +- sysdeps/ia64/fpu/e_acos.S | 1 + sysdeps/ia64/fpu/e_acosf.S | 1 + sysdeps/ia64/fpu/e_acosh.S | 1 + sysdeps/ia64/fpu/e_acoshf.S | 1 + sysdeps/ia64/fpu/e_acoshl.S | 1 + sysdeps/ia64/fpu/e_acosl.S | 3 +- sysdeps/ia64/fpu/e_asin.S | 1 + sysdeps/ia64/fpu/e_asinf.S | 1 + sysdeps/ia64/fpu/e_asinl.S | 3 +- sysdeps/ia64/fpu/e_atan2.S | 6 +- sysdeps/ia64/fpu/e_atan2f.S | 1 + sysdeps/ia64/fpu/e_atanh.S | 1 + sysdeps/ia64/fpu/e_atanhf.S | 1 + sysdeps/ia64/fpu/e_atanhl.S | 1 + sysdeps/ia64/fpu/e_cosh.S | 1 + sysdeps/ia64/fpu/e_coshf.S | 1 + sysdeps/ia64/fpu/e_coshl.S | 1 + sysdeps/ia64/fpu/e_exp.S | 13 +- sysdeps/ia64/fpu/e_exp10.S | 75 +- sysdeps/ia64/fpu/e_exp10f.S | 70 +- sysdeps/ia64/fpu/e_exp10l.S | 72 +- sysdeps/ia64/fpu/e_exp2.S | 3 +- sysdeps/ia64/fpu/e_exp2f.S | 3 +- sysdeps/ia64/fpu/e_exp2l.S | 1 + sysdeps/ia64/fpu/e_expf.S | 13 +- sysdeps/ia64/fpu/e_fmod.S | 1 + sysdeps/ia64/fpu/e_fmodf.S | 1 + sysdeps/ia64/fpu/e_fmodl.S | 865 ++++++++------ sysdeps/ia64/fpu/e_hypot.S | 2 + sysdeps/ia64/fpu/e_hypotf.S | 2 + sysdeps/ia64/fpu/e_hypotl.S | 2 + sysdeps/ia64/fpu/e_lgamma_r.c | 2 +- sysdeps/ia64/fpu/e_lgammaf_r.c | 2 +- sysdeps/ia64/fpu/e_lgammal_r.c | 2 +- sysdeps/ia64/fpu/e_log.S | 2 + sysdeps/ia64/fpu/e_log2.S | 1 + sysdeps/ia64/fpu/e_log2f.S | 1 + sysdeps/ia64/fpu/e_log2l.S | 1 + sysdeps/ia64/fpu/e_logf.S | 2 + sysdeps/ia64/fpu/e_logl.S | 2 + sysdeps/ia64/fpu/e_pow.S | 1 + sysdeps/ia64/fpu/e_powf.S | 984 +++++++-------- sysdeps/ia64/fpu/e_powl.S | 4 +- sysdeps/ia64/fpu/e_remainder.S | 1 + sysdeps/ia64/fpu/e_remainderf.S | 1 + sysdeps/ia64/fpu/e_remainderl.S | 1 + sysdeps/ia64/fpu/e_scalb.S | 630 +++++----- sysdeps/ia64/fpu/e_scalbf.S | 630 +++++----- sysdeps/ia64/fpu/e_scalbl.S | 630 +++++----- sysdeps/ia64/fpu/e_sinh.S | 1 + sysdeps/ia64/fpu/e_sinhf.S | 1 + sysdeps/ia64/fpu/e_sinhl.S | 1 + sysdeps/ia64/fpu/e_sqrt.S | 1 + sysdeps/ia64/fpu/e_sqrtf.S | 1 + sysdeps/ia64/fpu/e_sqrtl.S | 1 + sysdeps/ia64/fpu/gen_import_file_list | 14 +- sysdeps/ia64/fpu/import_file.awk | 19 +- sysdeps/ia64/fpu/import_intel_libm | 70 +- sysdeps/ia64/fpu/libm_error.c | 1392 ++++++++++----------- sysdeps/ia64/fpu/libm_lgamma.S | 80 +- sysdeps/ia64/fpu/libm_lgammaf.S | 23 +- sysdeps/ia64/fpu/libm_lgammal.S | 1 + sysdeps/ia64/fpu/libm_scalblnf.S | 384 +++--- sysdeps/ia64/fpu/libm_sincos.S | 164 +-- sysdeps/ia64/fpu/libm_sincos_large.S | 3 + sysdeps/ia64/fpu/libm_sincosf.S | 24 +- sysdeps/ia64/fpu/libm_sincosl.S | 51 +- sysdeps/ia64/fpu/libm_support.h | 1008 ++++++++++------ sysdeps/ia64/fpu/s_asinh.S | 1 + sysdeps/ia64/fpu/s_asinhl.S | 1 + sysdeps/ia64/fpu/s_atanf.S | 1 + sysdeps/ia64/fpu/s_atanl.S | 2 + sysdeps/ia64/fpu/s_cbrt.S | 2 +- sysdeps/ia64/fpu/s_cbrtf.S | 3 +- sysdeps/ia64/fpu/s_cbrtl.S | 1660 ++++++++++++++------------ sysdeps/ia64/fpu/s_cos.S | 147 ++- sysdeps/ia64/fpu/s_cosf.S | 2 + sysdeps/ia64/fpu/s_cosl.S | 46 +- sysdeps/ia64/fpu/s_erf.S | 1 + sysdeps/ia64/fpu/s_erfc.S | 1 + sysdeps/ia64/fpu/s_erfcf.S | 1 + sysdeps/ia64/fpu/s_erfcl.S | 1 + sysdeps/ia64/fpu/s_erfl.S | 1 + sysdeps/ia64/fpu/s_expm1.S | 1 + sysdeps/ia64/fpu/s_expm1f.S | 1 + sysdeps/ia64/fpu/s_expm1l.S | 19 +- sysdeps/ia64/fpu/s_fdim.S | 1 + sysdeps/ia64/fpu/s_fdimf.S | 1 + sysdeps/ia64/fpu/s_fdiml.S | 1 + sysdeps/ia64/fpu/s_frexp.c | 3 +- sysdeps/ia64/fpu/s_frexpf.c | 3 +- sysdeps/ia64/fpu/s_frexpl.c | 3 +- sysdeps/ia64/fpu/s_ilogb.S | 1 + sysdeps/ia64/fpu/s_ilogbf.S | 1 + sysdeps/ia64/fpu/s_ilogbl.S | 1 + sysdeps/ia64/fpu/s_ldexp.c | 3 +- sysdeps/ia64/fpu/s_ldexpf.c | 3 +- sysdeps/ia64/fpu/s_ldexpl.c | 3 +- sysdeps/ia64/fpu/s_libm_ldexp.S | 370 +++--- sysdeps/ia64/fpu/s_libm_ldexpf.S | 368 +++--- sysdeps/ia64/fpu/s_libm_ldexpl.S | 369 +++--- sysdeps/ia64/fpu/s_libm_scalbn.S | 369 +++--- sysdeps/ia64/fpu/s_libm_scalbnf.S | 369 +++--- sysdeps/ia64/fpu/s_libm_scalbnl.S | 369 +++--- sysdeps/ia64/fpu/s_log1p.S | 1 + sysdeps/ia64/fpu/s_log1pf.S | 2 + sysdeps/ia64/fpu/s_log1pl.S | 1 + sysdeps/ia64/fpu/s_logb.S | 1 + sysdeps/ia64/fpu/s_logbf.S | 1 + sysdeps/ia64/fpu/s_logbl.S | 1 + sysdeps/ia64/fpu/s_nearbyint.S | 229 ++-- sysdeps/ia64/fpu/s_nearbyintf.S | 227 ++-- sysdeps/ia64/fpu/s_nearbyintl.S | 226 ++-- sysdeps/ia64/fpu/s_nextafter.S | 271 ++--- sysdeps/ia64/fpu/s_nextafterf.S | 274 ++--- sysdeps/ia64/fpu/s_nextafterl.S | 266 +++-- sysdeps/ia64/fpu/s_nexttoward.S | 270 ++--- sysdeps/ia64/fpu/s_nexttowardf.S | 276 ++--- sysdeps/ia64/fpu/s_nexttowardl.S | 266 +++-- sysdeps/ia64/fpu/s_round.S | 68 +- sysdeps/ia64/fpu/s_roundf.S | 68 +- sysdeps/ia64/fpu/s_roundl.S | 68 +- sysdeps/ia64/fpu/s_scalblnf.c | 3 +- sysdeps/ia64/fpu/s_scalbn.c | 3 +- sysdeps/ia64/fpu/s_scalbnf.c | 3 +- sysdeps/ia64/fpu/s_scalbnl.c | 3 +- sysdeps/ia64/fpu/s_tan.S | 2 + sysdeps/ia64/fpu/s_tanf.S | 2 + sysdeps/ia64/fpu/s_tanh.S | 1 + sysdeps/ia64/fpu/s_tanhl.S | 1 + sysdeps/ia64/fpu/s_tanl.S | 54 +- sysdeps/ia64/fpu/w_lgamma.c | 2 +- sysdeps/ia64/fpu/w_lgammaf.c | 2 +- sysdeps/ia64/fpu/w_lgammal.c | 2 +- sysdeps/ia64/fpu/w_tgamma.S | 1 + sysdeps/ia64/fpu/w_tgammaf.S | 10 +- sysdeps/ia64/fpu/w_tgammal.S | 1 + sysdeps/unix/sysv/linux/x86_64/clone.S | 8 +- sysdeps/unix/sysv/linux/x86_64/getcontext.S | 16 +- sysdeps/unix/sysv/linux/x86_64/setcontext.S | 12 +- sysdeps/unix/sysv/linux/x86_64/swapcontext.S | 10 +- sysdeps/unix/sysv/linux/x86_64/sysdep.h | 12 +- sysdeps/unix/x86_64/sysdep.S | 4 +- sysdeps/x86_64/bsd-_setjmp.S | 5 +- sysdeps/x86_64/bsd-setjmp.S | 5 +- sysdeps/x86_64/dl-machine.h | 2 +- sysdeps/x86_64/elf/start.S | 4 +- sysdeps/x86_64/memset.S | 2 +- sysdeps/x86_64/strchr.S | 4 +- sysdeps/x86_64/strcmp.S | 4 +- sysdeps/x86_64/strcspn.S | 9 +- sysdeps/x86_64/strspn.S | 7 +- sysdeps/x86_64/strtok.S | 8 +- 154 files changed, 7702 insertions(+), 6435 deletions(-) (limited to 'sysdeps') diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile index 7ec30c43d3..384fc836af 100644 --- a/sysdeps/ia64/fpu/Makefile +++ b/sysdeps/ia64/fpu/Makefile @@ -27,7 +27,8 @@ sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \ $(duplicated-routines) sysdep-CPPFLAGS += -include libm-symbols.h \ - -D__POSIX__ \ + -D__POSIX__ -Dopensource \ -D_LIB_VERSIONIMF=_LIB_VERSION \ - -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 + -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \ + -DSIZE_LONG_64 -DIA64 endif diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S index b515f01a1e..c2b31ab85e 100644 --- a/sysdeps/ia64/fpu/e_acos.S +++ b/sysdeps/ia64/fpu/e_acos.S @@ -824,6 +824,7 @@ acos_abs_gt_1: GLOBAL_LIBM_END(acos) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S index 417f5b7ddc..68b0b2ee8d 100644 --- a/sysdeps/ia64/fpu/e_acosf.S +++ b/sysdeps/ia64/fpu/e_acosf.S @@ -601,6 +601,7 @@ ACOSF_ABS_ONE: GLOBAL_LIBM_END(acosf) + // Stack operations when calling error support. // (1) (2) // sp -> + psp -> + diff --git a/sysdeps/ia64/fpu/e_acosh.S b/sysdeps/ia64/fpu/e_acosh.S index 675d5fe799..b55a6ab43c 100644 --- a/sysdeps/ia64/fpu/e_acosh.S +++ b/sysdeps/ia64/fpu/e_acosh.S @@ -1139,6 +1139,7 @@ ACOSH_LESS_ONE: GLOBAL_LIBM_END(acosh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_acoshf.S b/sysdeps/ia64/fpu/e_acoshf.S index 4a54c264c1..58ef5f2adb 100644 --- a/sysdeps/ia64/fpu/e_acoshf.S +++ b/sysdeps/ia64/fpu/e_acoshf.S @@ -968,6 +968,7 @@ ACOSH_LESS_ONE: GLOBAL_LIBM_END(acoshf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_acoshl.S b/sysdeps/ia64/fpu/e_acoshl.S index 85282d16d0..5eb2b3466b 100644 --- a/sysdeps/ia64/fpu/e_acoshl.S +++ b/sysdeps/ia64/fpu/e_acoshl.S @@ -1650,6 +1650,7 @@ acoshl_lt_pone: GLOBAL_LIBM_END(acoshl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S index daa75b18a5..4fd345bedd 100644 --- a/sysdeps/ia64/fpu/e_acosl.S +++ b/sysdeps/ia64/fpu/e_acosl.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -2482,6 +2482,7 @@ acosl_SPECIAL_CASES: GLOBAL_LIBM_END(acosl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue // (1) diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S index 398079eae4..f995c597f4 100644 --- a/sysdeps/ia64/fpu/e_asin.S +++ b/sysdeps/ia64/fpu/e_asin.S @@ -800,6 +800,7 @@ asin_abs_gt_1: GLOBAL_LIBM_END(asin) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S index f9a1312b26..af24165d8e 100644 --- a/sysdeps/ia64/fpu/e_asinf.S +++ b/sysdeps/ia64/fpu/e_asinf.S @@ -583,6 +583,7 @@ ASINF_ABS_ONE: ;; GLOBAL_LIBM_END(asinf) + // Stack operations when calling error support. // (1) (2) // sp -> + psp -> + diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S index bf5feba155..ad65a731fc 100644 --- a/sysdeps/ia64/fpu/e_asinl.S +++ b/sysdeps/ia64/fpu/e_asinl.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -2459,6 +2459,7 @@ SMALL_S: GLOBAL_LIBM_END(asinl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue // (1) diff --git a/sysdeps/ia64/fpu/e_atan2.S b/sysdeps/ia64/fpu/e_atan2.S index 8be7c6cec5..7a17fbfed4 100644 --- a/sysdeps/ia64/fpu/e_atan2.S +++ b/sysdeps/ia64/fpu/e_atan2.S @@ -52,6 +52,7 @@ // 08/20/02 Corrected inexact flag and directed rounding symmetry bugs // 02/06/03 Reordered header: .section, .global, .proc, .align // 04/17/03 Added missing mutex directive +// 12/23/03 atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l // // API //============================================================== @@ -142,7 +143,7 @@ // -0 -0 -pi // // Nan anything quiet Y -// anything NaN quiet X +// Not NaN NaN quiet X // atan2(+-0/+-0) sets double error tag to 37 @@ -388,7 +389,7 @@ GLOBAL_IEEE754_ENTRY(atan2) } { .mfb ldfe atan2_P21 = [EXP_AD_P2],16 -(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y +(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y (p10) br.ret.spnt b0 // Exit if y=nan ;; } @@ -985,6 +986,7 @@ ATAN2_ERROR: } GLOBAL_IEEE754_END(atan2) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue // (1) diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S index c483a7ad34..67618f0437 100644 --- a/sysdeps/ia64/fpu/e_atan2f.S +++ b/sysdeps/ia64/fpu/e_atan2f.S @@ -827,6 +827,7 @@ ATAN2F_XY_INF_NAN_ZERO: GLOBAL_IEEE754_END(atan2f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue mov GR_Parameter_TAG = 38 diff --git a/sysdeps/ia64/fpu/e_atanh.S b/sysdeps/ia64/fpu/e_atanh.S index 7ddc3e3023..5ae96dc90b 100644 --- a/sysdeps/ia64/fpu/e_atanh.S +++ b/sysdeps/ia64/fpu/e_atanh.S @@ -1008,6 +1008,7 @@ atanh_ge_one: GLOBAL_LIBM_END(atanh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_atanhf.S b/sysdeps/ia64/fpu/e_atanhf.S index 3675c5f4c1..1ec1408e35 100644 --- a/sysdeps/ia64/fpu/e_atanhf.S +++ b/sysdeps/ia64/fpu/e_atanhf.S @@ -782,6 +782,7 @@ atanhf_ge_one: GLOBAL_LIBM_END(atanhf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_atanhl.S b/sysdeps/ia64/fpu/e_atanhl.S index 8266bd56fb..cee1ba17b1 100644 --- a/sysdeps/ia64/fpu/e_atanhl.S +++ b/sysdeps/ia64/fpu/e_atanhl.S @@ -1101,6 +1101,7 @@ atanhl_gt_one: };; GLOBAL_LIBM_END(atanhl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S index 0c6c5b451e..38bd80e146 100644 --- a/sysdeps/ia64/fpu/e_cosh.S +++ b/sysdeps/ia64/fpu/e_cosh.S @@ -811,6 +811,7 @@ COSH_UNORM: GLOBAL_IEEE754_END(cosh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S index 91846e4717..6d30064256 100644 --- a/sysdeps/ia64/fpu/e_coshf.S +++ b/sysdeps/ia64/fpu/e_coshf.S @@ -652,6 +652,7 @@ COSH_UNORM: GLOBAL_IEEE754_END(coshf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S index cef8be0b1a..b5872d0b24 100644 --- a/sysdeps/ia64/fpu/e_coshl.S +++ b/sysdeps/ia64/fpu/e_coshl.S @@ -1033,6 +1033,7 @@ COSH_HUGE: GLOBAL_IEEE754_END(coshl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S index 5ae8afeb99..d22fd18b77 100644 --- a/sysdeps/ia64/fpu/e_exp.S +++ b/sysdeps/ia64/fpu/e_exp.S @@ -1,7 +1,7 @@ .file "exp.s" -// Copyright (c) 2000 - 2002, Intel Corporation +// Copyright (c) 2000 - 2003, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -52,6 +52,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 09/07/02 Force inexact flag // 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path +// 05/30/03 Set inexact flag on unmasked overflow/underflow // API //============================================================== @@ -602,7 +603,7 @@ EXP_CERTAIN_OVERFLOW: } { .mfb mov GR_Parameter_TAG = 14 - fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result + fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result br.cond.sptk __libm_error_region } ;; @@ -685,6 +686,13 @@ EXP_CERTAIN_UNDERFLOW: } ;; +{ .mfi + nop.m 0 + fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1 + nop.i 0 +} +;; + { .mfb nop.m 0 fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result @@ -730,6 +738,7 @@ EXP_UNDERFLOW_ZERO: GLOBAL_IEEE754_END(exp) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_exp10.S b/sysdeps/ia64/fpu/e_exp10.S index 1cc5bef406..6bfc21879d 100644 --- a/sysdeps/ia64/fpu/e_exp10.S +++ b/sysdeps/ia64/fpu/e_exp10.S @@ -1,7 +1,7 @@ .file "exp10.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -43,6 +43,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 09/06/02 Improved performance; no inexact flags on exact cases // 01/29/03 Added missing } to bundle templates +// 12/16/04 Call error handling on underflow. // // API //============================================================== @@ -81,8 +82,8 @@ // Registers used //============================================================== // r2-r3, r14-r40 -// f6-f15, f32-f51 -// p6-p9, p12 +// f6-f15, f32-f52 +// p6-p12 // @@ -104,6 +105,7 @@ GR_EXPMAX = r24 GR_BIAS53 = r25 GR_ROUNDVAL = r26 +GR_SNORM_LIMIT = r26 GR_MASK = r27 GR_KF0 = r28 GR_MASK_low = r29 @@ -161,6 +163,7 @@ FR_E = f49 FR_exact_limit = f50 FR_int_x = f51 +FR_SNORM_LIMIT = f52 // Data tables @@ -256,8 +259,12 @@ GLOBAL_IEEE754_ENTRY(exp10) } ;; -{.mib +{.mlx ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63) + movl GR_SNORM_LIMIT= 0xc0733a7146f72a41 // Smallest normal threshold +} +{.mib + nop.m 0 nop.i 0 (p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero } @@ -284,7 +291,7 @@ GLOBAL_IEEE754_ENTRY(exp10) ;; {.mfi - nop.m 0 + setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi nop.i 0 } @@ -388,6 +395,13 @@ GLOBAL_IEEE754_ENTRY(exp10) } ;; +{.mfi + nop.m 0 + fcmp.ge.s1 p11,p0= f8, FR_SNORM_LIMIT // Test x for normal range + nop.i 0 +} +;; + {.mfi nop.m 0 fma.s1 FR_E= FR_E0, FR_COEFF1, f0 // E= C_1*e @@ -431,10 +445,17 @@ GLOBAL_IEEE754_ENTRY(exp10) {.mfb nop.m 0 (p9) fma.d.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1 - br.ret.sptk b0 // return + (p11) br.ret.sptk b0 // return, if result normal } ;; +// Here if result in denormal range (and not zero) +{.mib + nop.m 0 + mov GR_Parameter_TAG= 265 + br.cond.sptk __libm_error_region // Branch to error handling +} +;; SPECIAL_exp10: {.mfi @@ -487,53 +508,35 @@ SPECIAL_exp10: OUT_RANGE_exp10: +// underflow: p6= 1 // overflow: p8= 1 -{.mii +.pred.rel "mutex",p6,p8 +{.mmi (p8) mov GR_EXPMAX= 0x1fffe - nop.i 0 - nop.i 0 -} -;; - - -{.mmb - (p8) mov GR_Parameter_TAG= 166 - (p8) setf.exp FR_R= GR_EXPMAX - nop.b 999 -} -;; - -{.mfi - nop.m 999 - (p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow - nop.i 999 -} -// underflow: p6= 1 -{.mii - nop.m 0 (p6) mov GR_EXPMAX= 1 nop.i 0 } ;; -{.mmb - nop.m 0 - (p6) setf.exp FR_R= GR_EXPMAX - nop.b 999 +{.mii + setf.exp FR_R= GR_EXPMAX + (p8) mov GR_Parameter_TAG= 166 + (p6) mov GR_Parameter_TAG= 265 } ;; {.mfb - nop.m 999 - (p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow - (p6) br.ret.sptk b0 // will not call libm_error for underflow + nop.m 0 + fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow + br.cond.sptk __libm_error_region // Branch to error handling } ;; GLOBAL_IEEE754_END(exp10) weak_alias (exp10, pow10) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp10f.S b/sysdeps/ia64/fpu/e_exp10f.S index f069b3afab..46615e98ff 100644 --- a/sysdeps/ia64/fpu/e_exp10f.S +++ b/sysdeps/ia64/fpu/e_exp10f.S @@ -1,7 +1,7 @@ .file "exp10f.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -43,6 +43,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 09/06/02 Improved performance and accuracy; no inexact flags on exact cases // 01/29/03 Added missing } to bundle templates +// 12/16/04 Call error handling on underflow. // // API //============================================================== @@ -80,8 +81,8 @@ // Registers used //============================================================== // r2-r3, r14-r40 -// f6-f15, f32-f51 -// p6-p9, p12 +// f6-f15, f32-f52 +// p6-p12 // @@ -102,6 +103,7 @@ GR_Fh_ADDR = r23 GR_EXPMAX = r24 GR_ROUNDVAL = r26 +GR_SNORM_LIMIT = r26 GR_MASK = r27 GR_KF0 = r28 GR_MASK_low = r29 @@ -153,6 +155,7 @@ FR_E = f49 FR_exact_limit = f50 FR_int_x = f51 +FR_SNORM_LIMIT = f52 // Data tables @@ -246,8 +249,12 @@ GLOBAL_IEEE754_ENTRY(exp10f) } ;; -{.mib +{.mlx ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63) + movl GR_SNORM_LIMIT= 0xc217b818 // Smallest normal threshold +} +{.mib + nop.m 0 nop.i 0 (p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero } @@ -261,7 +268,7 @@ GLOBAL_IEEE754_ENTRY(exp10f) ;; {.mfi - nop.m 0 + setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit (p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer nop.i 0 } @@ -335,7 +342,7 @@ GLOBAL_IEEE754_ENTRY(exp10f) {.mfb ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high} - nop.f 0 + fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT // Test x for normal range (p12) br.cond.spnt OUT_RANGE_exp10 } ;; @@ -390,10 +397,17 @@ GLOBAL_IEEE754_ENTRY(exp10f) {.mfb nop.m 0 (p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1 - br.ret.sptk b0 // return + (p11) br.ret.sptk b0 // return, if result normal } ;; +// Here if result in denormal range (and not zero) +{.mib + nop.m 0 + mov GR_Parameter_TAG= 266 + br.cond.sptk __libm_error_region // Branch to error handling +} +;; SPECIAL_exp10: {.mfi @@ -446,53 +460,35 @@ SPECIAL_exp10: OUT_RANGE_exp10: +// underflow: p6= 1 // overflow: p8= 1 -{.mii +.pred.rel "mutex",p6,p8 +{.mmi (p8) mov GR_EXPMAX= 0x1fffe - nop.i 0 - nop.i 0 -} -;; - - -{.mmb - (p8) mov GR_Parameter_TAG= 167 - (p8) setf.exp FR_R= GR_EXPMAX - nop.b 999 -} -;; - -{.mfi - nop.m 999 - (p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow - nop.i 999 -} -// underflow: p6= 1 -{.mii - nop.m 0 (p6) mov GR_EXPMAX= 1 nop.i 0 } ;; -{.mmb - nop.m 0 - (p6) setf.exp FR_R= GR_EXPMAX - nop.b 999 +{.mii + setf.exp FR_R= GR_EXPMAX + (p8) mov GR_Parameter_TAG= 167 + (p6) mov GR_Parameter_TAG= 266 } ;; {.mfb - nop.m 999 - (p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow - (p6) br.ret.sptk b0 // will not call libm_error for underflow + nop.m 0 + fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow + br.cond.sptk __libm_error_region // Branch to error handling } ;; GLOBAL_IEEE754_END(exp10f) weak_alias (exp10f, pow10f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp10l.S b/sysdeps/ia64/fpu/e_exp10l.S index 1b47258e73..a2e84b377c 100644 --- a/sysdeps/ia64/fpu/e_exp10l.S +++ b/sysdeps/ia64/fpu/e_exp10l.S @@ -1,7 +1,7 @@ .file "exp10l.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -44,6 +44,7 @@ // 02/06/03 Reordered header: .section, .global, .proc, .align // 05/08/03 Reformatted assembly source; corrected overflow result for round to // -inf and round to zero; exact results now don't set inexact flag +// 12/16/04 Call error handling on underflow. // // API //============================================================== @@ -79,9 +80,9 @@ // Registers used //============================================================== -// f6-f15, f32-f62 +// f6-f15, f32-f63 // r14-r30, r32-r40 -// p6-p8, p12-p14 +// p6-p8, p11-p14 // @@ -129,6 +130,7 @@ FR_4 = f60 FR_28 = f61 FR_32 = f62 + FR_SNORM_LIMIT = f63 GR_ADDR0 = r14 @@ -178,6 +180,7 @@ LOCAL_OBJECT_START(poly_coeffs) data8 0x3f55d87fe78a6731 // C_5 data8 0x3f2430912f86c787 // C_6 data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127) + data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold LOCAL_OBJECT_END(poly_coeffs) @@ -435,7 +438,7 @@ GLOBAL_IEEE754_ENTRY(exp10l) {.mmf // GR_D_ADDR = pointer to D table - add GR_D_ADDR = 2048-64+96+16, GR_ADDR0 + add GR_D_ADDR = 2048-64+96+32, GR_ADDR0 // load C_3, C_4 ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16 // y = x*log2(10)*2^8 @@ -471,7 +474,8 @@ GLOBAL_IEEE754_ENTRY(exp10l) } {.mfi - nop.m 0 + // load smallest normal limit + ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16 // x>overflow threshold ? fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST nop.i 0 ;; @@ -596,6 +600,13 @@ GLOBAL_IEEE754_ENTRY(exp10l) nop.i 0 ;; } +{.mfi + nop.m 0 + // test if x >= smallest normal limit + fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT + nop.i 0 ;; +} + {.mfi nop.m 0 // P36 = P34+r2*P56 @@ -646,9 +657,16 @@ GLOBAL_IEEE754_ENTRY(exp10l) // result = T+T*P (p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST // return - br.ret.sptk b0 ;; + (p11) br.ret.sptk b0 ;; // return, if result normal } +// Here if result in denormal range (and not zero) +{.mib + nop.m 0 + mov GR_Parameter_TAG= 264 + br.cond.sptk __libm_error_region // Branch to error handling +} +;; SPECIAL_EXP10: @@ -703,47 +721,35 @@ SPECIAL_EXP10: OUT_RANGE_EXP10: -{.mii - // overflow: p8 = 1 +// underflow: p6 = 1 +// overflow: p8 = 1 + +.pred.rel "mutex",p6,p8 +{.mmi (p8) mov GR_CONST1 = 0x1fffe + (p6) mov GR_CONST1 = 1 nop.i 0 - nop.i 0 ;; } +;; -{.mmb - (p8) mov GR_Parameter_TAG = 165 - (p8) setf.exp FR_KF0 = GR_CONST1 - nop.b 999 ;; -} - -{.mfi - nop.m 999 - (p8) fma.s0 f8 = FR_KF0, FR_KF0, f0 - nop.i 999 -} {.mii - nop.m 0 - // underflow: p6 = 1 - (p6) mov GR_CONST1 = 1 - nop.i 0 ;; -} - -{.mmb - nop.m 0 - (p6) setf.exp FR_KF0 = GR_CONST1 - nop.b 999 ;; + setf.exp FR_KF0 = GR_CONST1 + (p8) mov GR_Parameter_TAG = 165 + (p6) mov GR_Parameter_TAG = 264 } +;; {.mfb nop.m 999 - (p6) fma.s0 f8 = FR_KF0, FR_KF0, f0 - // will not call libm_error for underflow - (p6) br.ret.sptk b0 ;; + fma.s0 f8 = FR_KF0, FR_KF0, f0 // Create overflow/underflow + br.cond.sptk __libm_error_region // Branch to error handling } +;; GLOBAL_IEEE754_END(exp10l) weak_alias (exp10l, pow10l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue {.mfi diff --git a/sysdeps/ia64/fpu/e_exp2.S b/sysdeps/ia64/fpu/e_exp2.S index e4a1dadd73..46fca2d3cd 100644 --- a/sysdeps/ia64/fpu/e_exp2.S +++ b/sysdeps/ia64/fpu/e_exp2.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -495,6 +495,7 @@ OUT_RANGE_exp2: GLOBAL_LIBM_END(exp2) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp2f.S b/sysdeps/ia64/fpu/e_exp2f.S index f785b70e65..8ee600c554 100644 --- a/sysdeps/ia64/fpu/e_exp2f.S +++ b/sysdeps/ia64/fpu/e_exp2f.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -470,6 +470,7 @@ OUT_RANGE_exp2: GLOBAL_LIBM_END(exp2f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp2l.S b/sysdeps/ia64/fpu/e_exp2l.S index 6e2a62ad91..743ed3558b 100644 --- a/sysdeps/ia64/fpu/e_exp2l.S +++ b/sysdeps/ia64/fpu/e_exp2l.S @@ -747,6 +747,7 @@ OUT_RANGE_exp2l: GLOBAL_LIBM_END(exp2l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue {.mfi diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S index 8d620b6ffa..3dc0ba9bf6 100644 --- a/sysdeps/ia64/fpu/e_expf.S +++ b/sysdeps/ia64/fpu/e_expf.S @@ -1,7 +1,7 @@ .file "expf.s" -// Copyright (c) 2000 - 2002, Intel Corporation +// Copyright (c) 2000 - 2003, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -52,6 +52,7 @@ // 09/26/02 support of higher precision inputs added, underflow threshold // corrected // 11/15/02 Improved performance on Itanium 2, added possible over/under paths +// 05/30/03 Set inexact flag on unmasked overflow/underflow // // // API @@ -521,7 +522,7 @@ EXP_CERTAIN_OVERFLOW: } { .mfb mov GR_Parameter_TAG = 16 - fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result + fma.s.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result br.cond.sptk __libm_error_region } ;; @@ -604,6 +605,13 @@ EXP_CERTAIN_UNDERFLOW: } ;; +{ .mfi + nop.m 0 + fmerge.se fTmp = fTmp, f64DivLn2 // Small with non-trial signif + nop.i 0 +} +;; + { .mfb nop.m 0 fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result @@ -649,6 +657,7 @@ EXP_UNDERFLOW_ZERO: GLOBAL_IEEE754_END(expf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S index d801e0c128..dbd0a29698 100644 --- a/sysdeps/ia64/fpu/e_fmod.S +++ b/sysdeps/ia64/fpu/e_fmod.S @@ -499,6 +499,7 @@ FMOD_Y_ZERO: } GLOBAL_IEEE754_END(fmod) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S index fe1ec0304d..36e5807291 100644 --- a/sysdeps/ia64/fpu/e_fmodf.S +++ b/sysdeps/ia64/fpu/e_fmodf.S @@ -514,6 +514,7 @@ EXP_ERROR_RETURN: } GLOBAL_IEEE754_END(fmodf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S index da08ae3f5c..3e87eb090f 100644 --- a/sysdeps/ia64/fpu/e_fmodl.S +++ b/sysdeps/ia64/fpu/e_fmodl.S @@ -1,7 +1,7 @@ .file "fmodl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -43,56 +43,88 @@ // 03/02/00 New Algorithm // 04/04/00 Unwind support added // 08/15/00 Bundle added after call to __libm_error_support to properly -// set [the previously overwritten] GR_Parameter_RESULT. +// set [ the previously overwritten ] GR_Parameter_RESULT. // 11/28/00 Set FR_Y to f9 -// 03/11/02 Fixed flags for fmodl(qnan,zero) +// 03/11/02 Fixed flags for fmodl(qnan, zero) // 05/20/02 Cleaned up namespace and sf0 syntax -// 02/10/03 Reordered header: .section, .global, .proc, .align -// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno +// 02/10/03 Reordered header:.section,.global,.proc,.align +// 04/28/03 Fix: fmod(sNaN, 0) no longer sets errno +// 11/23/04 Reformatted routine and improved speed // // API //==================================================================== -// long double fmodl(long double,long double); +// long double fmodl(long double, long double); // // Overview of operation //==================================================================== -// fmod(a,b)=a-i*b, -// where i is an integer such that, if b!=0, -// |i|<|a/b| and |a/b-i|<1 +// fmod(a, b)= a-i*b, +// where i is an integer such that, if b!= 0, +// |i|<|a/b| and |a/b-i|<1 // // Algorithm //==================================================================== // a). if |a|<|b|, return a // b). get quotient and reciprocal overestimates accurate to -// 33 bits (q2,y2) +// 33 bits (q2, y2) // c). if the exponent difference (exponent(a)-exponent(b)) -// is less than 32, truncate quotient to integer and -// finish in one iteration -// d). if exponent(a)-exponent(b)>=32 (q2>=2^32) -// round quotient estimate to single precision (k=RN(q2)), -// calculate partial remainder (a'=a-k*b), -// get quotient estimate (a'*y2), and repeat from c). +// is less than 32, truncate quotient to integer and +// finish in one iteration +// d). if exponent(a)-exponent(b)>= 32 (q2>= 2^32) +// round quotient estimate to single precision (k= RN(q2)), +// calculate partial remainder (a'= a-k*b), +// get quotient estimate (a'*y2), and repeat from c). // // Registers used //==================================================================== -// Predicate registers: p6-p11 -// General registers: r2,r29,r32 (ar.pfs), r33-r39 -// Floating point registers: f6-f15 - -GR_SAVE_B0 = r33 -GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 -GR_SAVE_SP = r36 - -GR_Parameter_X = r37 -GR_Parameter_Y = r38 -GR_Parameter_RESULT = r39 -GR_Parameter_TAG = r40 - -FR_X = f10 -FR_Y = f9 -FR_RESULT = f8 +GR_SMALLBIASEXP = r2 +GR_2P32 = r3 +GR_SMALLBIASEXP = r20 +GR_ROUNDCONST = r21 +GR_SIG_B = r22 +GR_ARPFS = r23 +GR_TMP1 = r24 +GR_TMP2 = r25 +GR_TMP3 = r26 + +GR_SAVE_B0 = r33 +GR_SAVE_PFS = r34 +GR_SAVE_GP = r35 +GR_SAVE_SP = r36 + +GR_Parameter_X = r37 +GR_Parameter_Y = r38 +GR_Parameter_RESULT = r39 +GR_Parameter_TAG = r40 + +FR_X = f10 +FR_Y = f9 +FR_RESULT = f8 + +FR_ABS_A = f6 +FR_ABS_B = f7 +FR_Y_INV = f10 +FR_SMALLBIAS = f11 +FR_E0 = f12 +FR_Q = f13 +FR_E1 = f14 +FR_2P32 = f15 +FR_TMPX = f32 +FR_TMPY = f33 +FR_ROUNDCONST = f34 +FR_QINT = f35 +FR_QRND24 = f36 +FR_NORM_B = f37 +FR_TMP = f38 +FR_TMP2 = f39 +FR_DFLAG = f40 +FR_Y_INV0 = f41 +FR_Y_INV1 = f42 +FR_Q0 = f43 +FR_Q1 = f44 +FR_QINT_Z = f45 +FR_QREM = f46 +FR_B_SGN_A = f47 .section .text GLOBAL_IEEE754_ENTRY(fmodl) @@ -101,495 +133,540 @@ GLOBAL_IEEE754_ENTRY(fmodl) // result in f8 { .mfi - alloc r32=ar.pfs,1,4,4,0 - // f6=|a| - fmerge.s f6=f0,f8 - mov r2 = 0x0ffdd + getf.sig GR_SIG_B = f9 + // FR_ABS_A = |a| + fmerge.s FR_ABS_A = f0, f8 + mov GR_SMALLBIASEXP = 0x0ffdd } - {.mfi - getf.sig r29=f9 - // f7=|b| - fmerge.s f7=f0,f9 - nop.i 0;; +{ .mfi + nop.m 0 + // FR_ABS_B = |b| + fmerge.s FR_ABS_B = f0, f9 + nop.i 0 } +;; { .mfi - setf.exp f11 = r2 - // (1) y0 - frcpa.s1 f10,p6=f6,f7 - nop.i 0;; + setf.exp FR_SMALLBIAS = GR_SMALLBIASEXP + // (1) y0 + frcpa.s1 FR_Y_INV0, p6 = FR_ABS_A, FR_ABS_B + nop.i 0 +} +;; + +{ .mlx + nop.m 0 + movl GR_ROUNDCONST = 0x33a00000 } +;; // eliminate special cases -{.mmi -nop.m 0 -nop.m 0 -// y pseudo-zero ? -cmp.eq p7,p10=r29,r0;; +{ .mmi + nop.m 0 + nop.m 0 + // y pseudo-zero ? + cmp.eq p7, p10 = GR_SIG_B, r0 } +;; -// Y +-NAN, +-inf, +-0? p7 +// set p7 if b +/-NAN, +/-inf, +/-0 { .mfi - nop.m 999 -(p10) fclass.m p7,p10 = f9, 0xe7 - nop.i 999;; + nop.m 0 + (p10) fclass.m p7, p10 = f9, 0xe7 + nop.i 0 } +;; -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 0 11 -// e 3 -// X +-NAN, +-inf, ? p9 - { .mfi - nop.m 999 - fclass.m.unc p9,p11 = f8, 0xe3 - nop.i 999 + mov GR_2P32 = 0x1001f + // (2) q0 = a*y0 + (p6) fma.s1 FR_Q0 = FR_ABS_A, FR_Y_INV0, f0 + nop.i 0 +} +{ .mfi + nop.m 0 + // (3) e0 = 1 - b * y0 + (p6) fnma.s1 FR_E0 = FR_ABS_B, FR_Y_INV0, f1 + nop.i 0 } +;; -// |x| < |y|? Return x p8 +// set p9 if a +/-NAN, +/-inf +{ .mfi + nop.m 0 + fclass.m.unc p9, p11 = f8, 0xe3 + nop.i 0 +} + // |a| < |b|? Return a, p8=1 { .mfi - nop.m 999 -(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7 - nop.i 999 ;; + nop.m 0 + (p10) fcmp.lt.unc.s1 p8, p0 = FR_ABS_A, FR_ABS_B + nop.i 0 } +;; - { .mfi - mov r2=0x1001f - // (2) q0=a*y0 - (p6) fma.s1 f13=f6,f10,f0 - nop.i 0 -} { .mfi - nop.m 0 - // (3) e0 = 1 - b * y0 - (p6) fnma.s1 f12=f7,f10,f1 - nop.i 0;; +// set p7 if b +/-NAN, +/-inf, +/-0 +{ .mfi + nop.m 0 + // pseudo-NaN ? + (p10) fclass.nm p7, p0 = f9, 0xff + nop.i 0 } +;; -// Y +-NAN, +-inf, +-0? p7 +// set p9 if a is +/-NaN, +/-Inf +{ .mfi + nop.m 0 + (p11) fclass.nm p9, p0 = f8, 0xff + nop.i 0 +} { .mfi - nop.m 999 - // pseudo-NaN ? -(p10) fclass.nm p7,p0 = f9, 0xff - nop.i 999 + nop.m 0 + // b denormal ? set D flag (if |a|<|b|) + (p8) fnma.s0 FR_DFLAG = f9, f1, f9 + nop.i 0 } +;; -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 0 11 -// e 3 -// X +-NAN, +-inf, ? p9 +{ .mfi + // FR_2P32 = 2^32 + setf.exp FR_2P32 = GR_2P32 + // (4) q1 = q0+e0*q0 + (p6) fma.s1 FR_Q1 = FR_E0, FR_Q0, FR_Q0 + nop.i 0 +} +{ .mfi + nop.m 0 + // (5) e1 = e0 * e0 + 2^-34 + (p6) fma.s1 FR_E1 = FR_E0, FR_E0, FR_SMALLBIAS + nop.i 0 +} +;; { .mfi - nop.m 999 -(p11) fclass.nm p9,p0 = f8, 0xff - nop.i 999;; + nop.m 0 + // normalize a (if |a|<|b|) + (p8) fma.s0 f8 = f8, f1, f0 + nop.i 0 +} +{ .bbb + (p9) br.cond.spnt FMOD_A_NAN_INF + (p7) br.cond.spnt FMOD_B_NAN_INF_ZERO + // if |a|<|b|, return + (p8) br.ret.spnt b0 } +;; + { .mfi - nop.m 0 - // y denormal ? set D flag (if |x|<|y|) - (p8) fnma.s0 f10=f9,f1,f9 - nop.i 0;; + nop.m 0 + // (6) y1 = y0 + e0 * y0 + (p6) fma.s1 FR_Y_INV1 = FR_E0, FR_Y_INV0, FR_Y_INV0 + nop.i 0 } +;; +{ .mfi + nop.m 0 + // a denormal ? set D flag + // b denormal ? set D flag + fcmp.eq.s0 p12,p0 = FR_ABS_A, FR_ABS_B + nop.i 0 +} +{ .mfi + // set FR_ROUNDCONST = 1.25*2^{-24} + setf.s FR_ROUNDCONST = GR_ROUNDCONST + // (7) q2 = q1+e1*q1 + (p6) fma.s1 FR_Q = FR_Q1, FR_E1, FR_Q1 + nop.i 0 +} +;; -{.mfi - nop.m 0 - // normalize x (if |x|<|y|) - (p8) fma.s0 f8=f8,f1,f0 - nop.i 0 +{ .mfi + nop.m 0 + fmerge.s FR_B_SGN_A = f8, f9 + nop.i 0 } -{.bbb - (p9) br.cond.spnt FMOD_X_NAN_INF - (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO - // if |x|<|y|, return - (p8) br.ret.spnt b0;; +{ .mfi + nop.m 0 + // (8) y2 = y1 + e1 * y1 + (p6) fma.s1 FR_Y_INV = FR_E1, FR_Y_INV1, FR_Y_INV1 + // set p6 = 0, p10 = 0 + cmp.ne.and p6, p10 = r0, r0 } +;; - {.mfi - nop.m 0 - // x denormal ? set D flag - fnma.s0 f32=f6,f1,f6 - nop.i 0 +// will compute integer quotient bits (24 bits per iteration) +.align 32 +loop64: +{ .mfi + nop.m 0 + // compare q2, 2^32 + fcmp.lt.unc.s1 p8, p7 = FR_Q, FR_2P32 + nop.i 0 } -{.mfi - nop.m 0 - // y denormal ? set D flag - fnma.s0 f33=f7,f1,f7 - nop.i 0;; +{ .mfi + nop.m 0 + // will truncate quotient to integer, if exponent<32 (in advance) + fcvt.fx.trunc.s1 FR_QINT = FR_Q + nop.i 0 } +;; - {.mfi - // f15=2^32 - setf.exp f15=r2 - // (4) q1=q0+e0*q0 - (p6) fma.s1 f13=f12,f13,f13 - nop.i 0 +{ .mfi + nop.m 0 + // if exponent>32 round quotient to single precision (perform in advance) + fma.s.s1 FR_QRND24 = FR_Q, f1, f0 + nop.i 0 } +;; + { .mfi - nop.m 0 - // (5) e1 = e0 * e0 + 2^-34 - (p6) fma.s1 f14=f12,f12,f11 - nop.i 0;; + nop.m 0 + // set FR_ROUNDCONST = sgn(a) + (p8) fmerge.s FR_ROUNDCONST = f8, f1 + nop.i 0 } -{.mlx - nop.m 0 - movl r2=0x33a00000;; +{ .mfi + nop.m 0 + // normalize truncated quotient + (p8) fcvt.xf FR_QRND24 = FR_QINT + nop.i 0 } +;; + { .mfi - nop.m 0 - // (6) y1 = y0 + e0 * y0 - (p6) fma.s1 f10=f12,f10,f10 - nop.i 0;; + nop.m 0 + // calculate remainder (assuming FR_QRND24 = RZ(Q)) + (p7) fnma.s1 FR_E1 = FR_QRND24, FR_ABS_B, FR_ABS_A + nop.i 0 } -{.mfi - // set f12=1.25*2^{-24} - setf.s f12=r2 - // (7) q2=q1+e1*q1 - (p6) fma.s1 f13=f13,f14,f13 - nop.i 0;; +{ .mfi + nop.m 0 + // also if exponent>32, round quotient to single precision + // and subtract 1 ulp: q = q-q*(1.25*2^{-24}) + (p7) fnma.s.s1 FR_QINT_Z = FR_QRND24, FR_ROUNDCONST, FR_QRND24 + nop.i 0 } -{.mfi - nop.m 0 - fmerge.s f9=f8,f9 - nop.i 0 +;; + +{ .mfi + nop.m 0 + // (p8) calculate remainder (82-bit format) + (p8) fnma.s1 FR_QREM = FR_QRND24, FR_ABS_B, FR_ABS_A + nop.i 0 } { .mfi - nop.m 0 - // (8) y2 = y1 + e1 * y1 - (p6) fma.s1 f10=f14,f10,f10 - // set p6=0, p10=0 - cmp.ne.and p6,p10=r0,r0;; + nop.m 0 + // (p7) calculate remainder (assuming FR_QINT_Z = RZ(Q)) + (p7) fnma.s1 FR_ABS_A = FR_QINT_Z, FR_ABS_B, FR_ABS_A + nop.i 0 } +;; +{ .mfi + nop.m 0 + // Final iteration (p8): is FR_ABS_A the correct remainder + // (quotient was not overestimated) ? + (p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0 + nop.i 0 +} +;; -.align 32 -loop64: - {.mfi - nop.m 0 - // compare q2, 2^32 - fcmp.lt.unc.s1 p8,p7=f13,f15 - nop.i 0 -} - {.mfi - nop.m 0 - // will truncate quotient to integer, if exponent<32 (in advance) - fcvt.fx.trunc.s1 f11=f13 - nop.i 0;; -} - {.mfi - nop.m 0 - // if exponent>32, round quotient to single precision (perform in advance) - fma.s.s1 f13=f13,f1,f0 - nop.i 0;; -} - - - {.mfi - nop.m 0 - // set f12=sgn(a) - (p8) fmerge.s f12=f8,f1 - nop.i 0 -} - {.mfi - nop.m 0 - // normalize truncated quotient - (p8) fcvt.xf f13=f11 - nop.i 0;; -} - { .mfi - nop.m 0 - // calculate remainder (assuming f13=RZ(Q)) - (p7) fnma.s1 f14=f13,f7,f6 - nop.i 0 -} - {.mfi - nop.m 0 - // also if exponent>32, round quotient to single precision - // and subtract 1 ulp: q=q-q*(1.25*2^{-24}) - (p7) fnma.s.s1 f11=f13,f12,f13 - nop.i 0;; -} - - {.mfi - nop.m 0 - // (p8) calculate remainder (82-bit format) - (p8) fnma.s1 f11=f13,f7,f6 - nop.i 0 -} - {.mfi - nop.m 0 - // (p7) calculate remainder (assuming f11=RZ(Q)) - (p7) fnma.s1 f6=f11,f7,f6 - nop.i 0;; -} - - - {.mfi - nop.m 0 - // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ? - (p8) fcmp.lt.unc.s1 p6,p10=f11,f0 - nop.i 0;; -} - {.mfi - nop.m 0 - // get new quotient estimation: a'*y2 - (p7) fma.s1 f13=f14,f10,f0 - nop.i 0 -} - {.mfb - nop.m 0 - // was f13=RZ(Q) ? (then new remainder f14>=0) - (p7) fcmp.lt.unc.s1 p7,p9=f14,f0 - nop.b 0;; -} - - -.pred.rel "mutex",p6,p10 - {.mfb - nop.m 0 - // add b to estimated remainder (to cover the case when the quotient was overestimated) - // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a) - (p6) fma.s0 f8=f11,f12,f9 - nop.b 0 -} - {.mfb - nop.m 0 - // set correct sign of result before returning: f12=sgn(a) - (p10) fma.s0 f8=f11,f12,f0 - (p8) br.ret.sptk b0;; -} - {.mfi - nop.m 0 - // if f13!=RZ(Q), get alternative quotient estimation: a''*y2 - (p7) fma.s1 f13=f6,f10,f0 - nop.i 0 -} - {.mfb - nop.m 0 - // if f14 was RZ(Q), set remainder to f14 - (p9) mov f6=f14 - br.cond.sptk loop64;; +{ .mfi + nop.m 0 + // get new quotient estimation: a'*y2 + (p7) fma.s1 FR_Q = FR_E1, FR_Y_INV, f0 + nop.i 0 } +{ .mfb + nop.m 0 + // was FR_Q = RZ(Q) ? (then new remainder FR_E1> = 0) + (p7) fcmp.lt.unc.s1 p7, p9 = FR_E1, f0 + nop.b 0 +} +;; +.pred.rel "mutex", p6, p10 +{ .mfb + nop.m 0 + // add b to estimated remainder (to cover the case when the quotient was + // overestimated) + // also set correct sign by using + // FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a) + (p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A + nop.b 0 +} +{ .mfb + nop.m 0 + // set correct sign of result before returning: FR_ROUNDCONST = sgn(a) + (p10) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, f0 + (p8) br.ret.sptk b0 +} +;; +{ .mfi + nop.m 0 + // if f13! = RZ(Q), get alternative quotient estimation: a''*y2 + (p7) fma.s1 FR_Q = FR_ABS_A, FR_Y_INV, f0 + nop.i 0 +} +{ .mfb + nop.m 0 + // if FR_E1 was RZ(Q), set remainder to FR_E1 + (p9) fma.s1 FR_ABS_A = FR_E1, f1, f0 + br.cond.sptk loop64 +} +;; -FMOD_X_NAN_INF: +FMOD_A_NAN_INF: -// Y zero ? -{.mfi - nop.m 0 - fclass.m p10,p0=f8,0xc3 // Test x=nan - nop.i 0 +// b zero ? +{ .mfi + nop.m 0 + fclass.m p10, p0 = f8, 0xc3 // Test a = nan + nop.i 0 } -{.mfi - nop.m 0 - fma.s1 f10=f9,f1,f0 - nop.i 0;; +{ .mfi + nop.m 0 + fma.s1 FR_NORM_B = f9, f1, f0 + nop.i 0 } +;; -{.mfi - nop.m 0 - fma.s0 f8=f8,f1,f0 - nop.i 0 +{ .mfi + nop.m 0 + fma.s0 f8 = f8, f1, f0 + nop.i 0 } -{.mfi - nop.m 0 -(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero - nop.i 0;; +{ .mfi + nop.m 0 + (p10) fclass.m p10, p0 = f9, 0x07 // Test x = nan, and y = zero + nop.i 0 } -{.mfb - nop.m 0 - fcmp.eq.unc.s1 p11,p0=f10,f0 -(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero +;; + +{ .mfb + nop.m 0 + fcmp.eq.unc.s1 p11, p0 = FR_NORM_B, f0 + (p10) br.ret.spnt b0 // Exit with result = a if a = nan and b = zero } -{.mib - nop.m 0 - nop.i 0 - // if Y zero - (p11) br.cond.spnt FMOD_Y_ZERO;; +;; + +{ .mib + nop.m 0 + nop.i 0 + // if Y zero + (p11) br.cond.spnt FMOD_B_ZERO } +;; -// X infinity? Return QNAN indefinite +// a= infinity? Return QNAN indefinite { .mfi - // set p7 t0 0 - cmp.ne p7,p0=r0,r0 - fclass.m.unc p8,p9 = f8, 0x23 - nop.i 999;; + // set p7 t0 0 + cmp.ne p7, p0 = r0, r0 + fclass.m.unc p8, p9 = f8, 0x23 + nop.i 0 } -// Y NaN ? -{.mfi - nop.m 999 -(p8) fclass.m p9,p8=f9,0xc3 - nop.i 0;; +;; + +// b NaN ? +{ .mfi + nop.m 0 + (p8) fclass.m p9, p8 = f9, 0xc3 + nop.i 0 } -// Y not pseudo-zero ? (r29 holds significand) -{.mii - nop.m 999 -(p8) cmp.ne p7,p0=r29,r0 - nop.i 0;; +;; + +// b not pseudo-zero ? (GR_SIG_B holds significand) +{ .mii + nop.m 0 + (p8) cmp.ne p7, p0 = GR_SIG_B, r0 + nop.i 0 } -{.mfi - nop.m 999 -(p8) frcpa.s0 f8,p0 = f8,f8 - nop.i 0 +;; + +{ .mfi + nop.m 0 + (p8) frcpa.s0 f8, p0 = f8, f8 + nop.i 0 } { .mfi - nop.m 999 - // also set Denormal flag if necessary -(p7) fnma.s0 f9=f9,f1,f9 - nop.i 999 ;; + nop.m 0 + // also set Denormal flag if necessary + (p7) fnma.s0 f9 = f9, f1, f9 + nop.i 0 } +;; { .mfb - nop.m 999 -(p8) fma.s0 f8=f8,f1,f0 - nop.b 999 ;; + nop.m 0 + (p8) fma.s0 f8 = f8, f1, f0 + nop.b 0 } +;; { .mfb - nop.m 999 -(p9) frcpa.s0 f8,p7=f8,f9 - br.ret.sptk b0 ;; + nop.m 0 + (p9) frcpa.s0 f8, p7 = f8, f9 + br.ret.sptk b0 } +;; - -FMOD_Y_NAN_INF_ZERO: -// Y INF +FMOD_B_NAN_INF_ZERO: +// b INF { .mfi - nop.m 999 - fclass.m.unc p7,p0 = f9, 0x23 - nop.i 999 ;; + nop.m 0 + fclass.m.unc p7, p0 = f9, 0x23 + nop.i 0 } +;; { .mfb - nop.m 999 -(p7) fma.s0 f8=f8,f1,f0 -(p7) br.ret.spnt b0 ;; + nop.m 0 + (p7) fma.s0 f8 = f8, f1, f0 + (p7) br.ret.spnt b0 } +;; -// Y NAN? +// b NAN? { .mfi - nop.m 999 - fclass.m.unc p9,p10 = f9, 0xc3 - nop.i 999 ;; + nop.m 0 + fclass.m.unc p9, p10 = f9, 0xc3 + nop.i 0 } +;; + { .mfi - nop.m 999 -(p10) fclass.nm p9,p0 = f9, 0xff - nop.i 999 ;; + nop.m 0 + (p10) fclass.nm p9, p0 = f9, 0xff + nop.i 0 } +;; { .mfb - nop.m 999 -(p9) fma.s0 f8=f9,f1,f0 -(p9) br.ret.spnt b0 ;; + nop.m 0 + (p9) fma.s0 f8 = f9, f1, f0 + (p9) br.ret.spnt b0 } +;; -FMOD_Y_ZERO: +FMOD_B_ZERO: // Y zero? Must be zero at this point // because it is the only choice left. // Return QNAN indefinite -{.mfi - nop.m 0 - // set Invalid - frcpa.s0 f12,p0=f0,f0 - nop.i 0 -} -// X NAN? { .mfi - nop.m 999 - fclass.m.unc p9,p10 = f8, 0xc3 - nop.i 999 ;; + nop.m 0 + // set Invalid + frcpa.s0 FR_TMP, p0 = f0, f0 + nop.i 0 } +;; + +// a NAN? { .mfi - nop.m 999 -(p10) fclass.nm p9,p10 = f8, 0xff - nop.i 999 ;; + nop.m 0 + fclass.m.unc p9, p10 = f8, 0xc3 + nop.i 0 } +;; -{.mfi - nop.m 999 - (p9) frcpa.s0 f11,p7=f8,f0 - nop.i 0;; +{ .mfi + alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0 + (p10) fclass.nm p9, p10 = f8, 0xff + nop.i 0 } - +;; { .mfi - nop.m 999 -(p10) frcpa.s0 f11,p7 = f9,f9 - mov GR_Parameter_TAG = 120 ;; + nop.m 0 + (p9) frcpa.s0 FR_TMP2, p7 = f8, f0 + nop.i 0 } +;; { .mfi - nop.m 999 - fmerge.s f10 = f8, f8 - nop.i 999 + nop.m 0 + (p10) frcpa.s0 FR_TMP2, p7 = f9, f9 + mov GR_Parameter_TAG = 120 } +;; +{ .mfi + nop.m 0 + fmerge.s FR_X = f8, f8 + nop.i 0 +} { .mfb - nop.m 999 - fma.s0 f8=f11,f1,f0 - br.sptk __libm_error_region;; + nop.m 0 + fma.s0 f8 = FR_TMP2, f1, f0 + br.sptk __libm_error_region } +;; GLOBAL_IEEE754_END(fmodl) - LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi - add GR_Parameter_Y=-32,sp // Parameter 2 value - nop.f 0 -.save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + add GR_Parameter_Y = -32, sp // Parameter 2 value + nop.f 0 +.save ar.pfs, GR_SAVE_PFS + mov GR_SAVE_PFS = ar.pfs // Save ar.pfs } { .mfi .fframe 64 - add sp=-64,sp // Create new stack - nop.f 0 - mov GR_SAVE_GP=gp // Save gp -};; + add sp = -64, sp // Create new stack + nop.f 0 + mov GR_SAVE_GP = gp // Save gp +} +;; + { .mmi - stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack - add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 -};; + stfe [ GR_Parameter_Y ] = FR_Y, 16 // Save Parameter 2 on stack + add GR_Parameter_X = 16, sp // Parameter 1 address +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0 = b0 // Save b0 +} +;; + .body { .mib - stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y - nop.b 0 // Parameter 3 address + stfe [ GR_Parameter_X ] = FR_X // Store Parameter 1 on stack + add GR_Parameter_RESULT = 0, GR_Parameter_Y + nop.b 0 // Parameter 3 address } { .mib - stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# // Call error handling function -};; + stfe [ GR_Parameter_Y ] = FR_RESULT // Store Parameter 3 on stack + add GR_Parameter_Y = -16, GR_Parameter_Y + br.call.sptk b0 = __libm_error_support# // Call error handling function +} +;; + { .mmi - nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp -};; + nop.m 0 + nop.m 0 + add GR_Parameter_RESULT = 48, sp +} +;; + { .mmi - ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack + ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack .restore sp - add sp = 64,sp // Restore stack pointer - mov b0 = GR_SAVE_B0 // Restore return address -};; + add sp = 64, sp // Restore stack pointer + mov b0 = GR_SAVE_B0 // Restore return address +} +;; + { .mib - mov gp = GR_SAVE_GP // Restore gp - mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs - br.ret.sptk b0 // Return -};; + mov gp = GR_SAVE_GP // Restore gp + mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs + br.ret.sptk b0 // Return +} +;; LOCAL_LIBM_END(__libm_error_region) - - - -.type __libm_error_support#,@function +.type __libm_error_support#, @function .global __libm_error_support# - - diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S index 885c819326..36cfd1e667 100644 --- a/sysdeps/ia64/fpu/e_hypot.S +++ b/sysdeps/ia64/fpu/e_hypot.S @@ -106,6 +106,7 @@ FR_RESULT = f8 LOCAL_LIBM_ENTRY(cabs) LOCAL_LIBM_END(cabs) + GLOBAL_IEEE754_ENTRY(hypot) {.mfi @@ -384,6 +385,7 @@ GLOBAL_IEEE754_ENTRY(hypot) (p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypot) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S index 633bb67e59..d6fcbd1a01 100644 --- a/sysdeps/ia64/fpu/e_hypotf.S +++ b/sysdeps/ia64/fpu/e_hypotf.S @@ -106,6 +106,7 @@ FR_RESULT = f8 LOCAL_LIBM_ENTRY(cabsf) LOCAL_LIBM_END(cabsf) + GLOBAL_IEEE754_ENTRY(hypotf) {.mfi alloc r32= ar.pfs,0,4,4,0 @@ -337,6 +338,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) (p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypotf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mii diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S index 0aa94b69b8..988b86e761 100644 --- a/sysdeps/ia64/fpu/e_hypotl.S +++ b/sysdeps/ia64/fpu/e_hypotl.S @@ -105,6 +105,7 @@ FR_RESULT = f8 LOCAL_LIBM_ENTRY(cabsl) LOCAL_LIBM_END(cabsl) + GLOBAL_IEEE754_ENTRY(hypotl) {.mfi alloc r32= ar.pfs,0,4,4,0 @@ -421,6 +422,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) (p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypotl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_lgamma_r.c b/sysdeps/ia64/fpu/e_lgamma_r.c index e892635eae..ebc90fc8f3 100644 --- a/sysdeps/ia64/fpu/e_lgamma_r.c +++ b/sysdeps/ia64/fpu/e_lgamma_r.c @@ -1,5 +1,6 @@ /* file: lgamma_r.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/e_lgammaf_r.c b/sysdeps/ia64/fpu/e_lgammaf_r.c index e5d4d2e0d8..4efa84064c 100644 --- a/sysdeps/ia64/fpu/e_lgammaf_r.c +++ b/sysdeps/ia64/fpu/e_lgammaf_r.c @@ -1,5 +1,6 @@ /* file: lgammaf_r.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/e_lgammal_r.c b/sysdeps/ia64/fpu/e_lgammal_r.c index a2b36d6394..3fbea703c8 100644 --- a/sysdeps/ia64/fpu/e_lgammal_r.c +++ b/sysdeps/ia64/fpu/e_lgammal_r.c @@ -1,5 +1,6 @@ /* file: lgammal_r.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S index f80f153679..7b277f8a40 100644 --- a/sysdeps/ia64/fpu/e_log.S +++ b/sysdeps/ia64/fpu/e_log.S @@ -1386,6 +1386,7 @@ GLOBAL_IEEE754_ENTRY(log10) };; GLOBAL_IEEE754_END(log10) + GLOBAL_IEEE754_ENTRY(log) { .mfi getf.exp GR_Exp = f8 // if x is unorm then must recompute @@ -1667,6 +1668,7 @@ log_libm_err: };; GLOBAL_IEEE754_END(log) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_log2.S b/sysdeps/ia64/fpu/e_log2.S index 76793574ea..660a9526b6 100644 --- a/sysdeps/ia64/fpu/e_log2.S +++ b/sysdeps/ia64/fpu/e_log2.S @@ -655,6 +655,7 @@ SPECIAL_LOG2: GLOBAL_LIBM_END(log2) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_log2f.S b/sysdeps/ia64/fpu/e_log2f.S index 6de2f38720..17d710a951 100644 --- a/sysdeps/ia64/fpu/e_log2f.S +++ b/sysdeps/ia64/fpu/e_log2f.S @@ -493,6 +493,7 @@ SPECIAL_log2f: GLOBAL_LIBM_END(log2f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_log2l.S b/sysdeps/ia64/fpu/e_log2l.S index 37af2f2553..b3fe63f182 100644 --- a/sysdeps/ia64/fpu/e_log2l.S +++ b/sysdeps/ia64/fpu/e_log2l.S @@ -761,6 +761,7 @@ LOG2_PSEUDO_ZERO: GLOBAL_IEEE754_END(log2l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S index 0ca6d3f2c8..186edab501 100644 --- a/sysdeps/ia64/fpu/e_logf.S +++ b/sysdeps/ia64/fpu/e_logf.S @@ -841,6 +841,7 @@ GLOBAL_IEEE754_ENTRY(log10f) br.cond.sptk logf_log10f_common };; GLOBAL_IEEE754_END(log10f) + GLOBAL_IEEE754_ENTRY(logf) { .mfi getf.exp GR_Exp = f8 // if x is unorm then must recompute @@ -1087,6 +1088,7 @@ logf_libm_err: };; GLOBAL_IEEE754_END(logf) + // Stack operations when calling error support. // (1) (2) (3) (call) (4) // sp -> + psp -> + psp -> + sp -> + diff --git a/sysdeps/ia64/fpu/e_logl.S b/sysdeps/ia64/fpu/e_logl.S index ba6b55bb9c..3ebb20a632 100644 --- a/sysdeps/ia64/fpu/e_logl.S +++ b/sysdeps/ia64/fpu/e_logl.S @@ -634,6 +634,7 @@ GLOBAL_IEEE754_ENTRY(logl) GLOBAL_IEEE754_END(logl) + GLOBAL_IEEE754_ENTRY(log10l) { .mfi alloc r32 = ar.pfs,0,21,4,0 @@ -1144,6 +1145,7 @@ LOGL_64_negative: GLOBAL_IEEE754_END(log10l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S index 11fae53d72..86005f2f59 100644 --- a/sysdeps/ia64/fpu/e_pow.S +++ b/sysdeps/ia64/fpu/e_pow.S @@ -2234,6 +2234,7 @@ POW_OVER_UNDER_ERROR: GLOBAL_LIBM_END(pow) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S index 275843f1e2..4c839cba71 100644 --- a/sysdeps/ia64/fpu/e_powf.S +++ b/sysdeps/ia64/fpu/e_powf.S @@ -64,6 +64,8 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 08/29/02 Improved Itanium 2 performance // 02/10/03 Reordered header: .section, .global, .proc, .align +// 10/09/03 Modified algorithm to improve performance, reduce table size, and +// fix boundary case powf(2.0,-150.0) // // API //============================================================== @@ -106,37 +108,33 @@ // // Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255. // -// We tabluate as two doubles, T and t, where T +t is the value itself. +// We tabluate as one double, T for single precision power // -// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1)) -// Log(x) = G + delta + Log( 1 + (Bx-1)) +// Log(x) = (K Log(2)_hi + T) + (K Log(2)_lo) + Log( 1 + (Bx-1)) +// Log(x) = G + delta + Log( 1 + (Bx-1)) // // The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1. // // Log( 1 + (Bx-1)) = r - rsq/2 + p +// where p = r^3(P0 + P1*r + P2*r^2) // // Then, // // yLog(x) = yG + y delta + y(r-rsq/2) + yp -// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3) +// yLog(x) = Z1 + e3 + Z2 + Z3 // // -// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3) +// exp(yLog(x)) = exp(Z1 + Z2) exp(Z3) exp(e3) // // // exp(Z3) is another series. -// exp(e1 + e2 + e3) is approximated as f3 = 1 + (e1 + e2 + e3) +// exp(e3) is approximated as f3 = 1 + e3 // -// Z1 (128/log2) = number of log2/128 in Z1 is N1 -//