From 0347518d6326846cd5fdbe4b472dfc85a2dfc78c Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 16 Apr 2012 22:08:04 -0400 Subject: ia64: strip trailing whitespace Many ia64 files have trailing whitespace which gets in the way and annoys me. So strip it away: find `find sysdeps/ -name ia64` -type f -exec sed -i 's:[[:space:]]*$::' {} + Signed-off-by: Mike Frysinger --- ChangeLog.ia64 | 107 + sysdeps/ia64/fpu/e_acosf.S | 238 +- sysdeps/ia64/fpu/e_acoshl.S | 482 ++-- sysdeps/ia64/fpu/e_acosl.S | 126 +- sysdeps/ia64/fpu/e_asinf.S | 254 +-- sysdeps/ia64/fpu/e_asinl.S | 126 +- sysdeps/ia64/fpu/e_atan2f.S | 256 +-- sysdeps/ia64/fpu/e_atanhl.S | 130 +- sysdeps/ia64/fpu/e_coshl.S | 108 +- sysdeps/ia64/fpu/e_exp.S | 2 +- sysdeps/ia64/fpu/e_expf.S | 4 +- sysdeps/ia64/fpu/e_fmodl.S | 4 +- sysdeps/ia64/fpu/e_hypot.S | 46 +- sysdeps/ia64/fpu/e_hypotf.S | 52 +- sysdeps/ia64/fpu/e_hypotl.S | 44 +- sysdeps/ia64/fpu/e_log.S | 6 +- sysdeps/ia64/fpu/e_log2.S | 78 +- sysdeps/ia64/fpu/e_log2f.S | 78 +- sysdeps/ia64/fpu/e_log2l.S | 92 +- sysdeps/ia64/fpu/e_logl.S | 226 +- sysdeps/ia64/fpu/e_powf.S | 6 +- sysdeps/ia64/fpu/e_remainder.S | 134 +- sysdeps/ia64/fpu/e_remainderf.S | 148 +- sysdeps/ia64/fpu/e_remainderl.S | 140 +- sysdeps/ia64/fpu/e_scalb.S | 2 +- sysdeps/ia64/fpu/e_scalbf.S | 2 +- sysdeps/ia64/fpu/e_scalbl.S | 2 +- sysdeps/ia64/fpu/e_sinhl.S | 110 +- sysdeps/ia64/fpu/e_sqrt.S | 32 +- sysdeps/ia64/fpu/e_sqrtf.S | 38 +- sysdeps/ia64/fpu/e_sqrtl.S | 26 +- sysdeps/ia64/fpu/libm_cpu_defs.h | 4 +- sysdeps/ia64/fpu/libm_error_codes.h | 10 +- sysdeps/ia64/fpu/libm_frexp.S | 34 +- sysdeps/ia64/fpu/libm_frexpf.S | 34 +- sysdeps/ia64/fpu/libm_frexpl.S | 34 +- sysdeps/ia64/fpu/libm_scalblnf.S | 4 +- sysdeps/ia64/fpu/libm_tan.S | 94 +- sysdeps/ia64/fpu/s_asinhl.S | 194 +- sysdeps/ia64/fpu/s_atanf.S | 248 +-- sysdeps/ia64/fpu/s_atanl.S | 150 +- sysdeps/ia64/fpu/s_cbrtl.S | 14 +- sysdeps/ia64/fpu/s_cos.S | 46 +- sysdeps/ia64/fpu/s_cosf.S | 36 +- sysdeps/ia64/fpu/s_erf.S | 114 +- sysdeps/ia64/fpu/s_erfc.S | 256 +-- sysdeps/ia64/fpu/s_erfcf.S | 186 +- sysdeps/ia64/fpu/s_erfcl.S | 448 ++-- sysdeps/ia64/fpu/s_erff.S | 66 +- sysdeps/ia64/fpu/s_erfl.S | 236 +- sysdeps/ia64/fpu/s_expm1.S | 2 +- sysdeps/ia64/fpu/s_expm1f.S | 6 +- sysdeps/ia64/fpu/s_expm1l.S | 186 +- sysdeps/ia64/fpu/s_fabs.S | 30 +- sysdeps/ia64/fpu/s_fabsf.S | 30 +- sysdeps/ia64/fpu/s_fabsl.S | 30 +- sysdeps/ia64/fpu/s_finite.S | 2 +- sysdeps/ia64/fpu/s_fma.S | 24 +- sysdeps/ia64/fpu/s_fmaf.S | 24 +- sysdeps/ia64/fpu/s_fmal.S | 24 +- sysdeps/ia64/fpu/s_fmax.S | 28 +- sysdeps/ia64/fpu/s_fmaxf.S | 28 +- sysdeps/ia64/fpu/s_fmaxl.S | 28 +- sysdeps/ia64/fpu/s_fpclassify.S | 2 +- sysdeps/ia64/fpu/s_frexp.c | 10 +- sysdeps/ia64/fpu/s_frexpf.c | 10 +- sysdeps/ia64/fpu/s_frexpl.c | 10 +- sysdeps/ia64/fpu/s_ldexp.c | 12 +- sysdeps/ia64/fpu/s_ldexpf.c | 12 +- sysdeps/ia64/fpu/s_ldexpl.c | 12 +- sysdeps/ia64/fpu/s_log1pl.S | 236 +- sysdeps/ia64/fpu/s_modf.S | 42 +- sysdeps/ia64/fpu/s_modff.S | 42 +- sysdeps/ia64/fpu/s_modfl.S | 42 +- sysdeps/ia64/fpu/s_nextafter.S | 66 +- sysdeps/ia64/fpu/s_nextafterf.S | 66 +- sysdeps/ia64/fpu/s_nextafterl.S | 66 +- sysdeps/ia64/fpu/s_nexttoward.S | 66 +- sysdeps/ia64/fpu/s_nexttowardf.S | 66 +- sysdeps/ia64/fpu/s_nexttowardl.S | 64 +- sysdeps/ia64/fpu/s_round.S | 2 +- sysdeps/ia64/fpu/s_roundf.S | 2 +- sysdeps/ia64/fpu/s_roundl.S | 2 +- sysdeps/ia64/fpu/s_scalblnf.c | 12 +- sysdeps/ia64/fpu/s_scalbn.c | 12 +- sysdeps/ia64/fpu/s_scalbnf.c | 12 +- sysdeps/ia64/fpu/s_scalbnl.c | 12 +- sysdeps/ia64/fpu/s_signbit.S | 2 +- sysdeps/ia64/fpu/s_significand.S | 34 +- sysdeps/ia64/fpu/s_significandf.S | 32 +- sysdeps/ia64/fpu/s_significandl.S | 38 +- sysdeps/ia64/fpu/s_tan.S | 18 +- sysdeps/ia64/fpu/s_tanf.S | 18 +- sysdeps/ia64/fpu/s_tanh.S | 130 +- sysdeps/ia64/fpu/s_tanhf.S | 70 +- sysdeps/ia64/fpu/s_tanhl.S | 234 +- sysdeps/ia64/fpu/s_tanl.S | 22 +- sysdeps/ia64/fpu/w_tgamma.S | 196 +- sysdeps/ia64/fpu/w_tgammaf.S | 112 +- sysdeps/ia64/fpu/w_tgammal.S | 3094 +++++++++++++------------- sysdeps/ia64/softpipe.h | 2 +- sysdeps/ia64/strchr.S | 8 +- sysdeps/ia64/strlen.S | 6 +- sysdeps/ia64/strncmp.S | 2 +- sysdeps/unix/sysv/linux/ia64/register-dump.h | 2 +- 105 files changed, 5377 insertions(+), 5270 deletions(-) diff --git a/ChangeLog.ia64 b/ChangeLog.ia64 index 198dd0be0b..4c9b9f136b 100644 --- a/ChangeLog.ia64 +++ b/ChangeLog.ia64 @@ -1,3 +1,110 @@ +2012-04-22 Mike Frysinger + + * sysdeps/ia64/fpu/e_acosf.S: Trim trailing whitespace. + * sysdeps/ia64/fpu/e_acoshl.S: Likewise. + * sysdeps/ia64/fpu/e_acosl.S: Likewise. + * sysdeps/ia64/fpu/e_asinf.S: Likewise. + * sysdeps/ia64/fpu/e_asinl.S: Likewise. + * sysdeps/ia64/fpu/e_atan2f.S: Likewise. + * sysdeps/ia64/fpu/e_atanhl.S: Likewise. + * sysdeps/ia64/fpu/e_coshl.S: Likewise. + * sysdeps/ia64/fpu/e_exp.S: Likewise. + * sysdeps/ia64/fpu/e_expf.S: Likewise. + * sysdeps/ia64/fpu/e_fmodl.S: Likewise. + * sysdeps/ia64/fpu/e_hypot.S: Likewise. + * sysdeps/ia64/fpu/e_hypotf.S: Likewise. + * sysdeps/ia64/fpu/e_hypotl.S: Likewise. + * sysdeps/ia64/fpu/e_log.S: Likewise. + * sysdeps/ia64/fpu/e_log2.S: Likewise. + * sysdeps/ia64/fpu/e_log2f.S: Likewise. + * sysdeps/ia64/fpu/e_log2l.S: Likewise. + * sysdeps/ia64/fpu/e_logl.S: Likewise. + * sysdeps/ia64/fpu/e_powf.S: Likewise. + * sysdeps/ia64/fpu/e_remainder.S: Likewise. + * sysdeps/ia64/fpu/e_remainderf.S: Likewise. + * sysdeps/ia64/fpu/e_remainderl.S: Likewise. + * sysdeps/ia64/fpu/e_scalb.S: Likewise. + * sysdeps/ia64/fpu/e_scalbf.S: Likewise. + * sysdeps/ia64/fpu/e_scalbl.S: Likewise. + * sysdeps/ia64/fpu/e_sinhl.S: Likewise. + * sysdeps/ia64/fpu/e_sqrt.S: Likewise. + * sysdeps/ia64/fpu/e_sqrtf.S: Likewise. + * sysdeps/ia64/fpu/e_sqrtl.S: Likewise. + * sysdeps/ia64/fpu/libm_cpu_defs.h: Likewise. + * sysdeps/ia64/fpu/libm_error_codes.h: Likewise. + * sysdeps/ia64/fpu/libm_frexp.S: Likewise. + * sysdeps/ia64/fpu/libm_frexpf.S: Likewise. + * sysdeps/ia64/fpu/libm_frexpl.S: Likewise. + * sysdeps/ia64/fpu/libm_scalblnf.S: Likewise. + * sysdeps/ia64/fpu/libm_tan.S: Likewise. + * sysdeps/ia64/fpu/s_asinhl.S: Likewise. + * sysdeps/ia64/fpu/s_atanf.S: Likewise. + * sysdeps/ia64/fpu/s_atanl.S: Likewise. + * sysdeps/ia64/fpu/s_cbrtl.S: Likewise. + * sysdeps/ia64/fpu/s_cos.S: Likewise. + * sysdeps/ia64/fpu/s_cosf.S: Likewise. + * sysdeps/ia64/fpu/s_erf.S: Likewise. + * sysdeps/ia64/fpu/s_erfc.S: Likewise. + * sysdeps/ia64/fpu/s_erfcf.S: Likewise. + * sysdeps/ia64/fpu/s_erfcl.S: Likewise. + * sysdeps/ia64/fpu/s_erff.S: Likewise. + * sysdeps/ia64/fpu/s_erfl.S: Likewise. + * sysdeps/ia64/fpu/s_expm1.S: Likewise. + * sysdeps/ia64/fpu/s_expm1f.S: Likewise. + * sysdeps/ia64/fpu/s_expm1l.S: Likewise. + * sysdeps/ia64/fpu/s_fabs.S: Likewise. + * sysdeps/ia64/fpu/s_fabsf.S: Likewise. + * sysdeps/ia64/fpu/s_fabsl.S: Likewise. + * sysdeps/ia64/fpu/s_finite.S: Likewise. + * sysdeps/ia64/fpu/s_fma.S: Likewise. + * sysdeps/ia64/fpu/s_fmaf.S: Likewise. + * sysdeps/ia64/fpu/s_fmal.S: Likewise. + * sysdeps/ia64/fpu/s_fmax.S: Likewise. + * sysdeps/ia64/fpu/s_fmaxf.S: Likewise. + * sysdeps/ia64/fpu/s_fmaxl.S: Likewise. + * sysdeps/ia64/fpu/s_fpclassify.S: Likewise. + * sysdeps/ia64/fpu/s_frexp.c: Likewise. + * sysdeps/ia64/fpu/s_frexpf.c: Likewise. + * sysdeps/ia64/fpu/s_frexpl.c: Likewise. + * sysdeps/ia64/fpu/s_ldexp.c: Likewise. + * sysdeps/ia64/fpu/s_ldexpf.c: Likewise. + * sysdeps/ia64/fpu/s_ldexpl.c: Likewise. + * sysdeps/ia64/fpu/s_log1pl.S: Likewise. + * sysdeps/ia64/fpu/s_modf.S: Likewise. + * sysdeps/ia64/fpu/s_modff.S: Likewise. + * sysdeps/ia64/fpu/s_modfl.S: Likewise. + * sysdeps/ia64/fpu/s_nextafter.S: Likewise. + * sysdeps/ia64/fpu/s_nextafterf.S: Likewise. + * sysdeps/ia64/fpu/s_nextafterl.S: Likewise. + * sysdeps/ia64/fpu/s_nexttoward.S: Likewise. + * sysdeps/ia64/fpu/s_nexttowardf.S: Likewise. + * sysdeps/ia64/fpu/s_nexttowardl.S: Likewise. + * sysdeps/ia64/fpu/s_round.S: Likewise. + * sysdeps/ia64/fpu/s_roundf.S: Likewise. + * sysdeps/ia64/fpu/s_roundl.S: Likewise. + * sysdeps/ia64/fpu/s_scalblnf.c: Likewise. + * sysdeps/ia64/fpu/s_scalbn.c: Likewise. + * sysdeps/ia64/fpu/s_scalbnf.c: Likewise. + * sysdeps/ia64/fpu/s_scalbnl.c: Likewise. + * sysdeps/ia64/fpu/s_signbit.S: Likewise. + * sysdeps/ia64/fpu/s_significand.S: Likewise. + * sysdeps/ia64/fpu/s_significandf.S: Likewise. + * sysdeps/ia64/fpu/s_significandl.S: Likewise. + * sysdeps/ia64/fpu/s_tan.S: Likewise. + * sysdeps/ia64/fpu/s_tanf.S: Likewise. + * sysdeps/ia64/fpu/s_tanh.S: Likewise. + * sysdeps/ia64/fpu/s_tanhf.S: Likewise. + * sysdeps/ia64/fpu/s_tanhl.S: Likewise. + * sysdeps/ia64/fpu/s_tanl.S: Likewise. + * sysdeps/ia64/fpu/w_tgamma.S: Likewise. + * sysdeps/ia64/fpu/w_tgammaf.S: Likewise. + * sysdeps/ia64/fpu/w_tgammal.S: Likewise. + * sysdeps/ia64/softpipe.h: Likewise. + * sysdeps/ia64/strchr.S: Likewise. + * sysdeps/ia64/strlen.S: Likewise. + * sysdeps/ia64/strncmp.S: Likewise. + * sysdeps/unix/sysv/linux/ia64/register-dump.h: Likewise. + 2012-04-22 Mike Frysinger * sysdeps/ia64/Implies: Copied from the main tree. diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S index 68b0b2ee8d..bdcac59d22 100644 --- a/sysdeps/ia64/fpu/e_acosf.S +++ b/sysdeps/ia64/fpu/e_acosf.S @@ -61,7 +61,7 @@ // The acosf function returns the arc cosine in the range [0, +pi] radians. // acos(1) returns +0 -// acos(x) returns a Nan and raises the invalid exception for |x| >1 +// acos(x) returns a Nan and raises the invalid exception for |x| >1 // |x| <= sqrt(2)/2. get Ax and Bx @@ -249,355 +249,355 @@ LOCAL_OBJECT_END(acosf_coeff_2_table) .section .text GLOBAL_LIBM_ENTRY(acosf) - + // Load the addresses of the two tables. // Then, load the coefficients and other constants. -{ .mfi +{ .mfi alloc r32 = ar.pfs,1,8,4,0 fnma.s1 acosf_t = f8,f8,f1 dep.z ACOSF_GR_1by2 = 0x3f,24,8 // 0x3f000000 -} -{ .mfi +} +{ .mfi addl ACOSF_Addr1 = @ltoff(acosf_coeff_1_table),gp fma.s1 acosf_x2 = f8,f8,f0 addl ACOSF_Addr2 = @ltoff(acosf_coeff_2_table),gp ;; } - -{ .mfi + +{ .mfi ld8 ACOSF_Addr1 = [ACOSF_Addr1] fmerge.s acosf_abs_x = f1,f8 dep ACOSF_GR_3by2 = -1,r0,22,8 // 0x3fc00000 -} -{ .mlx +} +{ .mlx nop.m 999 movl ACOSF_GR_5by2 = 0x40200000;; } - -{ .mfi + +{ .mfi setf.s acosf_1by2 = ACOSF_GR_1by2 fmerge.s acosf_sgn_x = f8,f1 nop.i 999 -} -{ .mfi +} +{ .mfi ld8 ACOSF_Addr2 = [ACOSF_Addr2] nop.f 0 nop.i 999;; } - -{ .mfi + +{ .mfi setf.s acosf_5by2 = ACOSF_GR_5by2 fcmp.lt.s1 p11,p12 = f8,f0 nop.i 999;; } -{ .mmf +{ .mmf ldfpd acosf_coeff_P1,acosf_coeff_P4 = [ACOSF_Addr1],16 setf.s acosf_3by2 = ACOSF_GR_3by2 fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan } - -{ .mfi + +{ .mfi ldfpd acosf_coeff_P7,acosf_coeff_P6 = [ACOSF_Addr1],16 fma.s1 acosf_t2 = acosf_t,acosf_t,f0 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd acosf_coeff_P3,acosf_coeff_P8 = [ACOSF_Addr2],16 fma.s1 acosf_x4 = acosf_x2,acosf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd acosf_coeff_P9,acosf_const_sqrt2by2 = [ACOSF_Addr1] fclass.m.unc p10,p0 = f8, 0x07 //@zero nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd acosf_coeff_P5,acosf_coeff_P2 = [ACOSF_Addr2],16 fma.s1 acosf_x3 = f8,acosf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfd acosf_const_piby2 = [ACOSF_Addr2] frsqrta.s1 acosf_B,p0 = acosf_t nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 (p8) fma.s.s0 f8 = f8,f1,f0 (p8) br.ret.spnt b0 ;; // Exit if x=nan } - -{ .mfb + +{ .mfb nop.m 999 fcmp.eq.s1 p6,p0 = acosf_abs_x,f1 (p10) br.cond.spnt ACOSF_ZERO ;; // Branch if x=0 -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.gt.s1 p9,p0 = acosf_abs_x,f1 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 acosf_x8 = acosf_x4,acosf_x4,f0 nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0 (p6) br.cond.spnt ACOSF_ABS_ONE ;; // Branch if |x|=1 -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_x5 = acosf_x2,acosf_x3,f0 nop.i 999 } -{ .mfb +{ .mfb (p9) mov GR_Parameter_TAG = 59 fma.s1 acosf_yby2 = acosf_t,acosf_1by2,f0 (p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1 } -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_Az = acosf_t,acosf_B,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_B2 = acosf_B,acosf_B,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p1 = f8,acosf_coeff_P1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p1 = acosf_coeff_P1,acosf_t,f1 nop.i 999;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_poly_p3 = acosf_coeff_P4,acosf_x2,acosf_coeff_P3 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p6 = acosf_coeff_P7,acosf_t,acosf_coeff_P6 nop.i 999;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_poly_p7 = acosf_x2,acosf_coeff_P8,acosf_coeff_P7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p2 = acosf_coeff_P3,acosf_t,acosf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p5 = acosf_x2,acosf_coeff_P6,acosf_coeff_P5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p4 = acosf_coeff_P5,acosf_t,acosf_coeff_P4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_x11 = acosf_x8,acosf_x3,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fnma.s1 acosf_dz = acosf_B2,acosf_yby2,acosf_1by2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p1a = acosf_x2,acosf_poly_p1,f8 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_2poly_p8 = acosf_coeff_P9,acosf_t,acosf_coeff_P8 nop.i 999;; } - + // Get the absolute value of x and determine the region in which x lies -{ .mfi +{ .mfi nop.m 999 fcmp.le.s1 p7,p8 = acosf_abs_x,acosf_const_sqrt2by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_poly_p2 = acosf_x2,acosf_poly_p3,acosf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p7a = acosf_x4,acosf_coeff_P9,acosf_poly_p7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p2a = acosf_2poly_p2,acosf_t2,acosf_2poly_p1 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 acosf_sgnx_t4 = acosf_sgn_x,acosf_t4,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 acosf_2poly_p4a = acosf_2poly_p6,acosf_t2,acosf_2poly_p4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 acosf_Sz = acosf_5by2,acosf_dz,acosf_3by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 acosf_d2z = acosf_dz,acosf_dz,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fnma.d.s1 acosf_sgn_x_piby2 = acosf_sgn_x,acosf_const_piby2,acosf_const_piby2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 acosf_poly_Ax = acosf_x5,acosf_poly_p2,acosf_poly_p1a nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p7) fma.s1 acosf_poly_Bx = acosf_x4,acosf_poly_p7a,acosf_poly_p5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 acosf_sgnx_2poly_p2 = acosf_sgn_x,acosf_2poly_p2a,f0 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal nop.i 999 } -{ .mfi +{ .mfi nop.m 999 (p8) fma.s1 acosf_2poly_p4b = acosf_2poly_p8,acosf_t4,acosf_2poly_p4a nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 acosf_Fz = acosf_d2z,acosf_Sz,acosf_dz nop.i 999;; -} +} - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.d.s1 acosf_Pt = acosf_2poly_p4b,acosf_sgnx_t4,acosf_sgnx_2poly_p2 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p8) fma.d.s1 acosf_z = acosf_Az,acosf_Fz,acosf_Az nop.i 999 ;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p7) fma.d.s1 acosf_sinf1 = acosf_x11,acosf_poly_Bx,acosf_poly_Ax nop.i 999;; -} - +} + .pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2 -{ .mfi +{ .mfi nop.m 999 (p8) fma.s.s0 f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2 nop.i 999 -} - -{ .mfb +} + +{ .mfb nop.m 999 (p7) fms.s.s0 f8 = acosf_const_piby2,f1,acosf_sinf1 br.ret.sptk b0 ;; -} +} ACOSF_ZERO: // Here if x=0 -{ .mfb +{ .mfb nop.m 999 fma.s.s0 f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2 br.ret.sptk b0 ;; -} +} ACOSF_ABS_ONE: .pred.rel "mutex",p11,p12 // Here if |x|=1 -{ .mfi +{ .mfi nop.m 999 (p11) fma.s.s0 f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 (p12) fma.s.s0 f8 = f1,f0,f0 // acosf(1)=0 br.ret.sptk b0 ;; -} +} GLOBAL_LIBM_END(acosf) diff --git a/sysdeps/ia64/fpu/e_acoshl.S b/sysdeps/ia64/fpu/e_acoshl.S index 42e1f394ef..1ce292c88e 100644 --- a/sysdeps/ia64/fpu/e_acoshl.S +++ b/sysdeps/ia64/fpu/e_acoshl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 10/01/01 Initial version // 10/10/01 Performance inproved // 12/11/01 Changed huges_logp to not be global @@ -57,7 +57,7 @@ // // Overview of operation //============================================================== -// +// // There are 6 paths: // 1. x = 1 // Return acoshl(x) = 0; @@ -67,37 +67,37 @@ // // 3. x = [S,Q]Nan or +INF // Return acoshl(x) = x + x; -// +// // 4. 'Near 1': 1 < x < 1+1/8 -// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), +// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), // where y = 1, P(y)/Q(y) - rational approximation // // 5. 'Huges': x > 0.5*2^64 // Return acoshl(x) = (logl(2*x-1)); -// +// // 6. 'Main path': 1+1/8 < x < 0.5*2^64 // b_hi + b_lo = x + sqrt(x^2 - 1); // acoshl(x) = logl_special(b_hi, b_lo); -// -// Algorithm description +// +// Algorithm description //============================================================== // // I. Near 1 path algorithm // ************************************************************** -// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), +// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), // where y = 1, P(y)/Q(y) - rational approximation // // 1) y = x - 1, y2 = 2 * y // // 2) Compute in parallel sqrtl(2*y) and P(y)/Q(y) // a) sqrtl computation method described below (main path algorithm, item 2)) -// As result we obtain (gg+gl) - multiprecision result +// As result we obtain (gg+gl) - multiprecision result // as pair of double extended values // b) P(y) and Q(y) calculated without any extra precision manipulations // c) P/Q division: // y = frcpa(Q) initial approximation of 1/Q // z = P*y initial approximation of P/Q -// +// // e = 1 - b*y // e2 = e + e^2 // e1 = e^2 @@ -121,7 +121,7 @@ // b) res = ((((gl + ll) + lh) + hl) + hh) + gg; // (exactly in this order) // -// II. Main path algorithm +// II. Main path algorithm // ( thanks to Peter Markstein for the idea of sqrt(x^2+1) computation! ) // ********************************************************************** // @@ -130,19 +130,19 @@ // 1) m2 = (m2_hi+m2_lo) = x^2-1 obtaining // ------------------------------------ // m2_hi = x2_hi - 1, where x2_hi = x * x; -// m2_lo = x2_lo + p1_lo, where -// x2_lo = FMS(x*x-x2_hi), +// m2_lo = x2_lo + p1_lo, where +// x2_lo = FMS(x*x-x2_hi), // p1_lo = (1 + m2_hi) - x2_hi; // // 2) g = (g_hi+g_lo) = sqrt(m2) = sqrt(m2_hi+m2_lo) // ---------------------------------------------- // r = invsqrt(m2_hi) (8-bit reciprocal square root approximation); // g = m2_hi * r (first 8 bit-approximation of sqrt); -// +// // h = 0.5 * r; // e = 0.5 - g * h; // g = g * e + g (second 16 bit-approximation of sqrt); -// +// // h = h * e + h; // e = 0.5 - g * h; // g = g * e + g (third 32 bit-approximation of sqrt); @@ -150,7 +150,7 @@ // h = h * e + h; // e = 0.5 - g * h; // g_hi = g * e + g (fourth 64 bit-approximation of sqrt); -// +// // Remainder computation: // h = h * e + h; // d = (m2_hi - g_hi * g_hi) + m2_lo; @@ -160,15 +160,15 @@ // ------------------------------------------------------------------- // b_hi = (g_hi + x) + gl; // b_lo = (x - b_hi) + g_hi + gl; -// +// // Now we pass b presented as sum b_hi + b_lo to special version // of logl function which accept a pair of arguments as -// mutiprecision value. -// +// mutiprecision value. +// // Special log algorithm overview // ================================ // Here we use a table lookup method. The basic idea is that in -// order to compute logl(Arg) for an argument Arg in [1,2), +// order to compute logl(Arg) for an argument Arg in [1,2), // we construct a value G such that G*Arg is close to 1 and that // logl(1/G) is obtainable easily from a table of values calculated // beforehand. Thus @@ -198,7 +198,7 @@ // G := G_1 * G_2 * G_3 // r := (G * S_hi - 1) + G * S_lo // -// These G_j's have the property that the product is exactly +// These G_j's have the property that the product is exactly // representable and that |r| < 2^(-12) as a result. // // Step 2: Approximation @@ -217,11 +217,11 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f32 -> f95 (64 registers) -// General registers used: +// General registers used: // r32 -> r67 (36 registers) // Predicate registers used: @@ -229,15 +229,15 @@ // p7 for 'NaNs, Inf' path // p8 for 'near 1' path // p9 for 'huges' path -// p10 for x = 1 +// p10 for x = 1 // p11 for x < 1 // //********************************************************************* // IEEE Special Conditions: // // acoshl(+inf) = +inf -// acoshl(-inf) = QNaN -// acoshl(1) = 0 +// acoshl(-inf) = QNaN +// acoshl(1) = 0 // acoshl(x<1) = QNaN // acoshl(SNaN) = QNaN // acoshl(QNaN) = QNaN @@ -245,38 +245,38 @@ // Data tables //============================================================== - + RODATA .align 64 // Near 1 path rational aproximation coefficients LOCAL_OBJECT_START(Poly_P) -data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4 -data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2 -data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1 -data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1 -data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1 -data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26 +data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4 +data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2 +data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1 +data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1 +data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1 +data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26 LOCAL_OBJECT_END(Poly_P) // LOCAL_OBJECT_START(Poly_Q) -data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3 -data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2 -data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1 -data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302 -data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526 -data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748 +data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3 +data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2 +data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1 +data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302 +data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526 +data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748 LOCAL_OBJECT_END(Poly_Q) -// Q coeffs +// Q coeffs LOCAL_OBJECT_START(Constants_Q) -data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 +data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 -data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 +data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 LOCAL_OBJECT_END(Constants_Q) // Z1 - 16 bit fixed @@ -391,7 +391,7 @@ data4 0x3F71D488,0x3D693B9D data8 0xBE049391B6B7C239 LOCAL_OBJECT_END(Constants_G_H_h2) -// G3 and H3 - IEEE single and h3 - IEEE double +// G3 and H3 - IEEE single and h3 - IEEE double LOCAL_OBJECT_START(Constants_G_H_h3) data4 0x3F7FFC00,0x38800100 data8 0x3D355595562224CD @@ -481,16 +481,16 @@ FR_QQ3 = f41 FR_QQ4 = f42 FR_QQ5 = f43 -FR_Q1 = f44 -FR_Q2 = f45 -FR_Q3 = f46 -FR_Q4 = f47 +FR_Q1 = f44 +FR_Q2 = f45 +FR_Q3 = f46 +FR_Q4 = f47 FR_Half = f48 FR_Two = f49 -FR_log2_hi = f50 -FR_log2_lo = f51 +FR_log2_hi = f50 +FR_log2_lo = f51 FR_X2 = f52 @@ -512,14 +512,14 @@ FR_XM12 = f64 // Special logl registers -FR_XLog_Hi = f65 -FR_XLog_Lo = f66 +FR_XLog_Hi = f65 +FR_XLog_Lo = f66 -FR_Y_hi = f67 +FR_Y_hi = f67 FR_Y_lo = f68 -FR_S_hi = f69 -FR_S_lo = f70 +FR_S_hi = f69 +FR_S_lo = f70 FR_poly_lo = f71 FR_poly_hi = f72 @@ -530,19 +530,19 @@ FR_h = f75 FR_G2 = f76 FR_H2 = f77 -FR_h2 = f78 +FR_h2 = f78 -FR_r = f79 -FR_rsq = f80 -FR_rcub = f81 +FR_r = f79 +FR_rsq = f80 +FR_rcub = f81 -FR_float_N = f82 +FR_float_N = f82 -FR_G3 = f83 -FR_H3 = f84 -FR_h3 = f85 +FR_G3 = f83 +FR_H3 = f84 +FR_h3 = f85 -FR_2_to_minus_N = f86 +FR_2_to_minus_N = f86 // Near 1 registers @@ -561,7 +561,7 @@ FR_QV3 = f75 FR_QV2 = f76 FR_Y0 = f77 -FR_Q0 = f78 +FR_Q0 = f78 FR_E0 = f79 FR_E2 = f80 FR_E1 = f81 @@ -601,23 +601,23 @@ GR_Poly_P = r37 GR_Poly_Q = r38 // Special logl registers -GR_Index1 = r39 -GR_Index2 = r40 -GR_signif = r41 -GR_X_0 = r42 -GR_X_1 = r43 -GR_X_2 = r44 +GR_Index1 = r39 +GR_Index2 = r40 +GR_signif = r41 +GR_X_0 = r42 +GR_X_1 = r43 +GR_X_2 = r44 GR_minus_N = r45 -GR_Z_1 = r46 -GR_Z_2 = r47 -GR_N = r48 -GR_Bias = r49 -GR_M = r50 -GR_Index3 = r51 -GR_exp_2tom80 = r52 -GR_exp_mask = r53 -GR_exp_2tom7 = r54 -GR_ad_ln10 = r55 +GR_Z_1 = r46 +GR_Z_2 = r47 +GR_N = r48 +GR_Bias = r49 +GR_M = r50 +GR_Index3 = r51 +GR_exp_2tom80 = r52 +GR_exp_mask = r53 +GR_exp_2tom7 = r54 +GR_ad_ln10 = r55 GR_ad_tbl_1 = r56 GR_ad_tbl_2 = r57 GR_ad_tbl_3 = r58 @@ -652,29 +652,29 @@ GLOBAL_LIBM_ENTRY(acoshl) addl GR_Poly_Q = @ltoff(Poly_Q), gp // Address of Q-coeff table fma.s1 FR_X2 = FR_Arg, FR_Arg, f0 // Obtain x^2 addl GR_Poly_P = @ltoff(Poly_P), gp // Address of P-coeff table -};; +};; -{ .mfi +{ .mfi getf.d GR_Arg = FR_Arg // get arument as double (int64) fma.s0 FR_Two = f1, f1, f1 // construct 2.0 addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp // logl tables } -{ .mlx - nop.m 0 +{ .mlx + nop.m 0 movl GR_TwoP63 = 0x43E8000000000000 // 0.5*2^63 (huge arguments) -};; +};; -{ .mfi +{ .mfi ld8 GR_Poly_P = [GR_Poly_P] // get actual P-coeff table address fcmp.eq.s1 p10, p0 = FR_Arg, f1 // if arg == 1 (return 0) nop.i 0 } -{ .mlx +{ .mlx ld8 GR_Poly_Q = [GR_Poly_Q] // get actual Q-coeff table address movl GR_OneP125 = 0x3FF2000000000000 // 1.125 (near 1 path bound) };; -{ .mfi +{ .mfi ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1 fclass.m p7,p0 = FR_Arg, 0xe3 // if arg NaN inf cmp.le p9, p0 = GR_TwoP63, GR_Arg // if arg > 0.5*2^63 ('huges') @@ -683,31 +683,31 @@ GLOBAL_LIBM_ENTRY(acoshl) cmp.ge p8, p0 = GR_OneP125, GR_Arg // if arg<1.125 -near 1 path fms.s1 FR_XM1 = FR_Arg, f1, f1 // X0 = X-1 (for near 1 path) (p11) br.cond.spnt acoshl_lt_pone // error branch (less than 1) -};; +};; -{ .mmi +{ .mmi setf.exp FR_Half = GR_Half // construct 0.5 (p9) setf.s FR_XLog_Lo = r0 // Low of logl arg=0 (Huges path) mov GR_exp_mask = 0x1FFFF // Create exponent mask -};; +};; -{ .mmf +{ .mmf (p8) ldfe FR_PP5 = [GR_Poly_P],16 // Load P5 (p8) ldfe FR_QQ5 = [GR_Poly_Q],16 // Load Q5 fms.s1 FR_M2 = FR_X2, f1, f1 // m2 = x^2 - 1 };; -{ .mfi +{ .mfi (p8) ldfe FR_QQ4 = [GR_Poly_Q],16 // Load Q4 - fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of + fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of // m2 = fma(X*X - m2) add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1 } { .mfb -(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4 +(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4 (p7) fma.s0 FR_Res = FR_Arg,f1,FR_Arg // r = a + a (Nan, Inf) (p7) br.ret.spnt b0 // return (Nan, Inf) -};; +};; { .mfi (p8) ldfe FR_PP3 = [GR_Poly_P],16 // Load P3 @@ -719,9 +719,9 @@ GLOBAL_LIBM_ENTRY(acoshl) (p9) fms.s1 FR_XLog_Hi = FR_Two, FR_Arg, f1 // Hi of log arg = 2*X-1 (p9) br.cond.spnt huges_logl // special version of log } -;; +;; -{ .mfi +{ .mfi (p8) ldfe FR_PP2 = [GR_Poly_P],16 // Load P2 (p8) fma.s1 FR_2XM1 = FR_Two, FR_XM1, f0 // 2X0 = 2 * X0 add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2 @@ -729,18 +729,18 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfb (p8) ldfe FR_QQ2 = [GR_Poly_Q],16 // Load Q2 (p10) fma.s0 FR_Res = f0,f1,f0 // r = 0 (arg = 1) -(p10) br.ret.spnt b0 // return (arg = 1) -};; +(p10) br.ret.spnt b0 // return (arg = 1) +};; -{ .mmi +{ .mmi (p8) ldfe FR_PP1 = [GR_Poly_P],16 // Load P1 (p8) ldfe FR_QQ1 = [GR_Poly_Q],16 // Load Q1 add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2 } ;; -{ .mfi -(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0 +{ .mfi +(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0 fma.s1 FR_Tmp = f1, f1, FR_M2 // Tmp = 1 + m2 add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3 } @@ -748,17 +748,17 @@ GLOBAL_LIBM_ENTRY(acoshl) (p8) ldfe FR_QQ0 = [GR_Poly_Q] nop.f 0 (p8) br.cond.spnt near_1 // near 1 path -};; -{ .mfi +};; +{ .mfi ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi nop.f 0 mov GR_Bias = 0x0FFFF // Create exponent bias };; -{ .mfi +{ .mfi nop.m 0 frsqrta.s1 FR_Rcp, p0 = FR_M2 // Rcp = 1/m2 reciprocal appr. nop.i 0 -};; +};; { .mfi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo @@ -773,7 +773,7 @@ GLOBAL_LIBM_ENTRY(acoshl) nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp nop.i 0 };; @@ -783,14 +783,14 @@ GLOBAL_LIBM_ENTRY(acoshl) nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_M2L = FR_Tmp, f1, FR_M2L // low part of m2 = Tmp+m2l nop.i 0 };; { .mfi ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 16 bit Newton Raphson iteration nop.i 0 } @@ -807,7 +807,7 @@ GLOBAL_LIBM_ENTRY(acoshl) };; { .mfi nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 32 bit Newton Raphson iteration nop.i 0 } @@ -825,7 +825,7 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfi nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 64 bit Newton Raphson iteration nop.i 0 } @@ -920,7 +920,7 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfi nop.m 0 nop.f 0 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };; { .mfi @@ -952,7 +952,7 @@ GLOBAL_LIBM_ENTRY(acoshl) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2 };; -// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) +// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) // BECAUSE OF POSSIBLE 10 CLOCKS STALL! // (Just nops added - nothing to do here) @@ -1093,7 +1093,7 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfi nop.m 0 - fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo + fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0 };; @@ -1166,7 +1166,7 @@ huges_logl: { .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo - sub GR_N = GR_N, GR_Bias + sub GR_N = GR_N, GR_Bias mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80 };; @@ -1185,7 +1185,7 @@ huges_logl: { .mmi ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 nop.m 0 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };; { .mmi @@ -1218,7 +1218,7 @@ huges_logl: pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1*Z_2 };; -// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) +// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) // BECAUSE OF POSSIBLE 10 CLOCKS STALL! // (Just nops added - nothing to do here) @@ -1344,7 +1344,7 @@ huges_logl: };; { .mfi nop.m 0 - fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo + fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0 };; { .mfb @@ -1356,279 +1356,279 @@ huges_logl: // NEAR ONE INTERVAL near_1: -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 frsqrta.s1 FR_Rcp, p0 = FR_2XM1 // Rcp = 1/x reciprocal appr. &SQRT& - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV6 = FR_PP5, FR_XM1, FR_PP4 // pv6 = P5*xm1+P4 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV6 = FR_QQ5, FR_XM1, FR_QQ4 // qv6 = Q5*xm1+Q4 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV4 = FR_PP3, FR_XM1, FR_PP2 // pv4 = P3*xm1+P2 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV4 = FR_QQ3, FR_XM1, FR_QQ2 // qv4 = Q3*xm1+Q2 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_XM12 = FR_XM1, FR_XM1, f0 // xm1^2 = xm1 * xm1 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV2 = FR_PP1, FR_XM1, FR_PP0 // pv2 = P1*xm1+P0 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV2 = FR_QQ1, FR_XM1, FR_QQ0 // qv2 = Q1*xm1+Q0 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 - fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT& - nop.i 0 +{ .mfi + nop.m 0 + fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT& + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp &SQRT& - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV3 = FR_XM12, FR_PV6, FR_PV4//pv3=pv6*xm1^2+pv4 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV3 = FR_XM12, FR_QV6, FR_QV4//qv3=qv6*xm1^2+qv4 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT& - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PP = FR_XM12, FR_PV3, FR_PV2 //pp=pv3*xm1^2+pv2 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QQ = FR_XM12, FR_QV3, FR_QV2 //qq=qv3*xm1^2+qv2 $POLY$ - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 frcpa.s1 FR_Y0,p0 = f1,FR_QQ // y = frcpa(b) #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g*h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Q0 = FR_PP,FR_Y0,f0 // q = a*y #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_E0 = FR_Y0,FR_QQ,f1 // e = 1 - b*y #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& - nop.i 0 + nop.m 0 + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_E2 = FR_E0,FR_E0,FR_E0 // e2 = e+e^2 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_E1 = FR_E0,FR_E0,f0 // e1 = e^2 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT& - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Y1 = FR_Y0,FR_E2,FR_Y0 // y1 = y+y*e2 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_E3 = FR_E1,FR_E1,FR_E0 // e3 = e+e1^2 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_GG = FR_DD, FR_HH, FR_GG // g = d * h + g &SQRT& - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Y2 = FR_Y1,FR_E3,FR_Y0 // y2 = y+y1*e3 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_R0 = FR_QQ,FR_Q0,FR_PP // r = a-b*q #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 - fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& - nop.i 0 + nop.m 0 + fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_E4 = FR_QQ,FR_Y2,f1 // e4 = 1-b*y2 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_X_Hi = FR_R0,FR_Y2,FR_Q0 // x = q+r*y2 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_GL = FR_DD, FR_HH, f0 // gl = d * h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Y3 = FR_Y2,FR_E4,FR_Y2 // y3 = y2+y2*e4 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_R1 = FR_QQ,FR_X_Hi,FR_PP // r1 = a-b*x #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_GG, FR_X_Hi, f0 // hh = gg * x_hi - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_LH = FR_GL, FR_X_Hi, f0 // lh = gl * x_hi - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_LL = FR_GL, FR_X_lo, f0 // ll = gl*x_lo - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HL = FR_GG, FR_X_lo, f0 // hl = gg * x_lo - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_GL, f1, FR_LL // res = gl + ll - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_LH // res = res + lh - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_HL // res = res + hl - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_HH // res = res + hh - nop.i 0 + nop.i 0 };; { .mfb - nop.m 0 + nop.m 0 fma.s0 FR_Res = FR_Res, f1, FR_GG // result = res + gg br.ret.sptk b0 // Exit for near 1 path };; @@ -1639,9 +1639,9 @@ near_1: acoshl_lt_pone: { .mfi - nop.m 0 + nop.m 0 fmerge.s FR_Arg_X = FR_Arg, FR_Arg - nop.i 0 + nop.i 0 };; { .mfb mov GR_Parameter_TAG = 135 @@ -1679,7 +1679,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfe [GR_Parameter_X] = FR_Arg_X // Parameter 1 to stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = FR_Res // Parameter 3 to stack diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S index 4fd345bedd..0983bc42ef 100644 --- a/sysdeps/ia64/fpu/e_acosl.S +++ b/sysdeps/ia64/fpu/e_acosl.S @@ -690,70 +690,70 @@ F_CS6 = f36 F_CS7 = f37 F_CS8 = f38 F_CS9 = f39 -F_S23 = f40 -F_S45 = f41 -F_S67 = f42 -F_S89 = f43 -F_S25 = f44 -F_S69 = f45 -F_S29 = f46 -F_X2 = f47 -F_X4 = f48 -F_TSQRT = f49 -F_DTX = f50 -F_R = f51 -F_R2 = f52 -F_R3 = f53 -F_R4 = f54 - -F_C3 = f55 -F_C5 = f56 -F_C7 = f57 -F_C9 = f58 -F_P79 = f59 -F_P35 = f60 -F_P39 = f61 - -F_ATHI = f62 -F_ATLO = f63 - -F_T1 = f64 -F_Y = f65 -F_Y2 = f66 -F_ANDMASK = f67 -F_ORMASK = f68 -F_S = f69 -F_05 = f70 -F_SQRT_1S2 = f71 -F_DS = f72 -F_Z = f73 -F_1T2 = f74 -F_DZ = f75 -F_ZE = f76 -F_YZ = f77 -F_Y1S2 = f78 -F_Y1S2X = f79 -F_1X = f80 -F_ST = f81 -F_1T2_ST = f82 -F_TSS = f83 -F_Y1S2X2 = f84 -F_DZ_TERM = f85 -F_DTS = f86 -F_DS2X = f87 -F_T2 = f88 -F_ZY1S2S = f89 -F_Y1S2_1X = f90 +F_S23 = f40 +F_S45 = f41 +F_S67 = f42 +F_S89 = f43 +F_S25 = f44 +F_S69 = f45 +F_S29 = f46 +F_X2 = f47 +F_X4 = f48 +F_TSQRT = f49 +F_DTX = f50 +F_R = f51 +F_R2 = f52 +F_R3 = f53 +F_R4 = f54 + +F_C3 = f55 +F_C5 = f56 +F_C7 = f57 +F_C9 = f58 +F_P79 = f59 +F_P35 = f60 +F_P39 = f61 + +F_ATHI = f62 +F_ATLO = f63 + +F_T1 = f64 +F_Y = f65 +F_Y2 = f66 +F_ANDMASK = f67 +F_ORMASK = f68 +F_S = f69 +F_05 = f70 +F_SQRT_1S2 = f71 +F_DS = f72 +F_Z = f73 +F_1T2 = f74 +F_DZ = f75 +F_ZE = f76 +F_YZ = f77 +F_Y1S2 = f78 +F_Y1S2X = f79 +F_1X = f80 +F_ST = f81 +F_1T2_ST = f82 +F_TSS = f83 +F_Y1S2X2 = f84 +F_DZ_TERM = f85 +F_DTS = f86 +F_DS2X = f87 +F_T2 = f88 +F_ZY1S2S = f89 +F_Y1S2_1X = f90 F_TS = f91 -F_PI2_LO = f92 -F_PI2_HI = f93 -F_S19 = f94 -F_INV1T2_2 = f95 -F_CORR = f96 -F_DZ0 = f97 - -F_C11 = f98 -F_C13 = f99 +F_PI2_LO = f92 +F_PI2_HI = f93 +F_S19 = f94 +F_INV1T2_2 = f95 +F_CORR = f96 +F_DZ0 = f97 + +F_C11 = f98 +F_C13 = f99 F_C15 = f100 F_C17 = f101 F_P1113 = f102 diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S index af24165d8e..74a18dd24a 100644 --- a/sysdeps/ia64/fpu/e_asinf.S +++ b/sysdeps/ia64/fpu/e_asinf.S @@ -40,9 +40,9 @@ // History //============================================================== // 02/02/00 Initial version -// 06/28/00 Improved speed +// 06/28/00 Improved speed // 06/31/00 Changed register allocation because of some duplicate macros -// moved nan exit bundle up to gain a cycle. +// moved nan exit bundle up to gain a cycle. // 08/08/00 Improved speed by avoiding SIR flush. // 08/15/00 Bundle added after call to __libm_error_support to properly // set [the previously overwritten] GR_Parameter_RESULT. @@ -53,13 +53,13 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 02/06/03 Reordered header: .section, .global, .proc, .align - + // Description //========================================= // The asinf function computes the arc sine of x in the range [-pi,+pi]. // A doman error occurs for arguments not in the range [-1,+1]. // asinf(+-0) returns +-0 -// asinf(x) returns a Nan and raises the invalid exception for |x| >1 +// asinf(x) returns a Nan and raises the invalid exception for |x| >1 // The acosf function returns the arc cosine in the range [0, +pi] radians. // A doman error occurs for arguments not in the range [-1,+1]. @@ -252,351 +252,351 @@ LOCAL_OBJECT_END(asinf_coeff_2_table) .section .text GLOBAL_LIBM_ENTRY(asinf) - + // Load the addresses of the two tables. // Then, load the coefficients and other constants. -{ .mfi +{ .mfi alloc r32 = ar.pfs,1,8,4,0 fnma.s1 asinf_t = f8,f8,f1 dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000 -} -{ .mfi +} +{ .mfi addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp fma.s1 asinf_x2 = f8,f8,f0 addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;; } - -{ .mfi + +{ .mfi ld8 ASINF_Addr1 = [ASINF_Addr1] fmerge.s asinf_abs_x = f1,f8 dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000 -} -{ .mlx +} +{ .mlx nop.m 999 movl ASINF_GR_5by2 = 0x40200000;; } - -{ .mfi + +{ .mfi setf.s asinf_1by2 = ASINF_GR_1by2 fmerge.s asinf_sgn_x = f8,f1 nop.i 999 -} -{ .mfi +} +{ .mfi ld8 ASINF_Addr2 = [ASINF_Addr2] nop.f 0 nop.i 999;; } - -{ .mfi + +{ .mfi setf.s asinf_5by2 = ASINF_GR_5by2 fcmp.lt.s1 p11,p12 = f8,f0 nop.i 999;; } -{ .mmf +{ .mmf ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16 setf.s asinf_3by2 = ASINF_GR_3by2 fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan } - -{ .mfi + +{ .mfi ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16 fma.s1 asinf_t2 = asinf_t,asinf_t,f0 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16 fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1] fclass.m.unc p10,p0 = f8, 0x07 //@zero nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16 fma.s1 asinf_x3 = f8,asinf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfd asinf_const_piby2 = [ASINF_Addr2] frsqrta.s1 asinf_B,p0 = asinf_t nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 (p8) fma.s.s0 f8 = f8,f1,f0 (p8) br.ret.spnt b0 ;; // Exit if x=nan } - -{ .mfb + +{ .mfb nop.m 999 fcmp.eq.s1 p6,p0 = asinf_abs_x,f1 (p10) br.ret.spnt b0 ;; // Exit if x=0 -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.gt.s1 p9,p0 = asinf_abs_x,f1 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0 nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0 (p6) br.cond.spnt ASINF_ABS_ONE ;; // Branch if |x|=1 -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0 nop.i 999 } -{ .mfb +{ .mfb (p9) mov GR_Parameter_TAG = 62 fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0 (p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1 } -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_Az = asinf_t,asinf_B,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_B2 = asinf_B,asinf_B,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1 nop.i 999;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6 nop.i 999;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8 nop.i 999;; } - + // Get the absolute value of x and determine the region in which x lies -{ .mfi +{ .mfi nop.m 999 fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal nop.i 999 } -{ .mfi +{ .mfi nop.m 999 (p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz nop.i 999;; -} +} + - -{ .mfi +{ .mfi nop.m 999 (p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az nop.i 999;; -} - +} + .pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2 -{ .mfi +{ .mfi nop.m 999 (p8) fnma.s.s0 f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2 nop.i 999 -} - -{ .mfb +} + +{ .mfb nop.m 999 (p7) fma.s.s0 f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax br.ret.sptk b0 ;; -} +} ASINF_ABS_ONE: // Here for short exit if |x|=1 -{ .mfb +{ .mfb nop.m 999 fma.s.s0 f8 = asinf_sgn_x,asinf_const_piby2,