diff options
| author | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-06-09 11:16:36 -0700 |
|---|---|---|
| committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-06-09 12:51:22 -0700 |
| commit | bcc41f66a48bf764ee85fea56b8e32719e230a0a (patch) | |
| tree | 227a84a2baf472eac75532ef4c10ec4bf884dae7 | |
| parent | 3a49ce8799e7233946f8154ba05c185277f33f11 (diff) | |
| download | glibc-bcc41f66a48bf764ee85fea56b8e32719e230a0a.tar.xz glibc-bcc41f66a48bf764ee85fea56b8e32719e230a0a.zip | |
x86: Optimize svml_s_tanhf8_core_avx2.S
Optimizations are:
1. Reduce code size (-81 bytes).
2. Remove redundant move instructions.
3. Slightly improve instruction selection/scheduling where
possible.
4. Prefer registers which get short instruction encoding.
5. Reduce rodata size (-32 bytes).
Result is roughly a 17-18% speedup:
Function, New Time, Old Time, New / Old
_ZGVdN8v_tanhf, 1.977, 2.402, 0.823
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S | 912 |
1 files changed, 171 insertions, 741 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S index c5c87bf5b0..55df346a00 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S @@ -70,773 +70,203 @@ * */ -/* Offsets for data table __svml_stanh_data_internal - */ -#define _dbP 0 -#define _sSignMask 4288 -#define _sAbsMask 4320 -#define _iExpMantMask 4352 -#define _iExpMask 4384 -#define _iMinIdxOfsMask 4416 -#define _iMaxIdxMask 4448 - #include <sysdep.h> +/* tanhf data tables for avx2 and sse4 implementatins defined here. + */ +#include "svml_s_tanhf_rodata.S" + .section .text.avx2, "ax", @progbits ENTRY(_ZGVdN8v_tanhf_avx2) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-32, %rsp - pushq %r12 - subq $120, %rsp - lea _dbP+16+__svml_stanh_data_internal(%rip), %r10 - vmovaps %ymm0, %ymm12 - /* Here huge arguments, INF and NaNs are filtered out to callout. */ - vpand _iExpMantMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm14 + vpand TANHF_DATA(_iExpMantMask)(%rip), %ymm0, %ymm4 + vpsubd TANHF_DATA(_iMinIdxOfsMask)(%rip), %ymm4, %ymm2 + + /* Selection of arguments between [0, 0x04280000] into ymm2. */ + vpxor %ymm3, %ymm3, %ymm3 + vpmaxsd %ymm3, %ymm2, %ymm2 + vpminsd TANHF_DATA(_iMaxIdxMask)(%rip), %ymm2, %ymm2 /* * small table specific variables * * Constant loading */ - vmovups _iMaxIdxMask+__svml_stanh_data_internal(%rip), %ymm8 - vpsubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm9 - - /* if VMIN, VMAX is defined for I type */ - vxorps %ymm15, %ymm15, %ymm15 - vpcmpgtd %ymm15, %ymm9, %ymm0 - vpand %ymm0, %ymm9, %ymm7 - vpcmpgtd %ymm8, %ymm9, %ymm6 - vblendvps %ymm6, %ymm8, %ymm7, %ymm3 - vpsrld $14, %ymm3, %ymm1 - vpcmpgtd _iExpMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm13 - vmovmskps %ymm13, %r11d - vandps _sAbsMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm10 - vandps _sSignMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm11 - vextractf128 $1, %ymm1, %xmm2 - vmovd %xmm1, %r9d - vmovd %xmm2, %ecx - vpextrd $1, %xmm2, %edx - vpextrd $1, %xmm1, %r8d - movslq %r9d, %r9 - movslq %edx, %rdx - movslq %r8d, %r8 - vpextrd $2, %xmm1, %edi - movslq %ecx, %rcx - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 - vpextrd $3, %xmm2, %r12d - vpextrd $3, %xmm1, %esi - vpextrd $2, %xmm2, %eax - movslq %edi, %rdi - movslq %r12d, %r12 - movslq %esi, %rsi - movslq %eax, %rax - vmovupd -16(%r9, %r10), %xmm5 - vmovupd -16(%rdx, %r10), %xmm14 - vmovupd -16(%rcx, %r10), %xmm13 - vmovupd (%r9, %r10), %xmm1 - vmovupd (%r8, %r10), %xmm2 - vmovupd -16(%r8, %r10), %xmm4 - vinsertf128 $1, -16(%rdi, %r10), %ymm5, %ymm15 - vinsertf128 $1, -16(%r12, %r10), %ymm14, %ymm3 - vinsertf128 $1, -16(%rax, %r10), %ymm13, %ymm6 - vinsertf128 $1, (%rdi, %r10), %ymm1, %ymm5 - vinsertf128 $1, (%rsi, %r10), %ymm2, %ymm14 - vunpcklpd %ymm3, %ymm6, %ymm8 + vpsrld $14, %ymm2, %ymm1 + + /* We are splitting xmm1 into 8 GPRs. This may be faster to do with + store/load as we can take advantage of store-forwarding. */ + vmovq %xmm1, %r8 + /* We have eliminated all negative values for ymm1 so no need to sign + extend. */ + movl %r8d, %r9d + shrq $32, %r8 + + /* Store base of lookup table in rax. */ + leaq TANHF_DATA(_lookupTable)(%rip), %rax + + /* Instead of using cross-lane permutes on ymm vectors, use vpinsertf128 + with memory operand. This helps alleviate bottleneck on p5. */ + vmovupd 16(%r9, %rax), %xmm5 + + vpextrq $1, %xmm1, %rsi + movl %esi, %edi + shrq $32, %rsi + + vinsertf128 $1, 16(%rdi, %rax), %ymm5, %ymm5 + + vextracti128 $1, %ymm1, %xmm2 + vmovq %xmm2, %rdx + movl %edx, %ecx + shrq $32, %rdx + + vmovupd (%rcx, %rax), %xmm6 + + vpextrq $1, %xmm2, %r10 + movl %r10d, %r11d + shrq $32, %r10 + + vinsertf128 $1, (%r11, %rax), %ymm6, %ymm6 + + vmovupd 16(%r8, %rax), %xmm1 + vinsertf128 $1, 16(%rsi, %rax), %ymm1, %ymm1 + vmovupd (%rdx, %rax), %xmm3 + vinsertf128 $1, (%r10, %rax), %ymm3, %ymm3 + + vunpcklpd %ymm3, %ymm6, %ymm7 vunpckhpd %ymm3, %ymm6, %ymm6 - vunpcklpd %ymm14, %ymm5, %ymm3 - vunpckhpd %ymm14, %ymm5, %ymm2 - vmovupd (%rcx, %r10), %xmm13 - vcvtps2pd %xmm10, %ymm5 - vextractf128 $1, %ymm10, %xmm10 - vfmadd213pd %ymm3, %ymm5, %ymm2 - vinsertf128 $1, -16(%rsi, %r10), %ymm4, %ymm0 - vmovupd (%rdx, %r10), %xmm4 - vunpcklpd %ymm0, %ymm15, %ymm9 - vunpckhpd %ymm0, %ymm15, %ymm7 - vfmadd213pd %ymm7, %ymm5, %ymm2 - vfmadd213pd %ymm9, %ymm5, %ymm2 - vinsertf128 $1, (%r12, %r10), %ymm4, %ymm0 - vcvtps2pd %xmm10, %ymm4 - vinsertf128 $1, (%rax, %r10), %ymm13, %ymm15 - vunpcklpd %ymm0, %ymm15, %ymm1 - vunpckhpd %ymm0, %ymm15, %ymm0 - vfmadd213pd %ymm1, %ymm4, %ymm0 - vcvtpd2ps %ymm2, %xmm1 - vfmadd213pd %ymm6, %ymm4, %ymm0 - vfmadd213pd %ymm8, %ymm4, %ymm0 - vcvtpd2ps %ymm0, %xmm0 - vinsertf128 $1, %xmm0, %ymm1, %ymm2 - vorps %ymm11, %ymm2, %ymm0 - testl %r11d, %r11d - /* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r13 r14 r15 r11d ymm0 ymm12 + vunpcklpd %ymm1, %ymm5, %ymm3 + vunpckhpd %ymm1, %ymm5, %ymm1 - /* Restore registers - * and exit the function - */ + vmovaps TANHF_DATA(_sAbsMask)(%rip), %ymm11 + /* Store special cases in ymm15. */ + vpcmpgtd TANHF_DATA(_iExpMask)(%rip), %ymm4, %ymm15 -L(EXIT): - addq $120, %rsp - cfi_restore(12) - popq %r12 - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 + vandps %ymm11, %ymm0, %ymm4 - /* Branch to process - * special inputs - */ + vcvtps2pd %xmm4, %ymm5 -L(SPECIAL_VALUES_BRANCH): - vmovups %ymm12, 32(%rsp) - vmovups %ymm0, 64(%rsp) - # LOE rbx r13 r14 r15 r11d ymm0 + vextractf128 $1, %ymm4, %xmm4 + vcvtps2pd %xmm4, %ymm4 - xorl %r12d, %r12d - # LOE rbx r13 r14 r15 r11d r12d + vmovupd 16(%rcx, %rax), %xmm2 + vinsertf128 $1, 16(%r11, %rax), %ymm2, %ymm2 - vzeroupper - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22 - movl %r11d, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d - - /* Range mask - * bits check - */ + vfmadd213pd %ymm3, %ymm5, %ymm1 + + vmovupd 16(%rdx, %rax), %xmm3 + vinsertf128 $1, 16(%r10, %rax), %ymm3, %ymm3 + + vunpcklpd %ymm3, %ymm2, %ymm10 + vunpckhpd %ymm3, %ymm2, %ymm2 + + vfmadd213pd %ymm10, %ymm4, %ymm2 + vfmadd213pd %ymm6, %ymm4, %ymm2 + vfmadd213pd %ymm7, %ymm4, %ymm2 + vcvtpd2ps %ymm2, %xmm2 + + vmovupd (%r9, %rax), %xmm7 + vinsertf128 $1, (%rdi, %rax), %ymm7, %ymm7 + + vmovupd (%r8, %rax), %xmm3 + vinsertf128 $1, (%rsi, %rax), %ymm3, %ymm3 + + vunpckhpd %ymm3, %ymm7, %ymm4 + vunpcklpd %ymm3, %ymm7, %ymm7 -L(RANGEMASK_CHECK): - btl %r12d, %r13d + vfmadd213pd %ymm4, %ymm5, %ymm1 + vfmadd213pd %ymm7, %ymm5, %ymm1 + + + vcvtpd2ps %ymm1, %xmm1 + vinsertf128 $1, %xmm2, %ymm1, %ymm1 + + vmovmskps %ymm15, %edx + vandnps %ymm0, %ymm11, %ymm2 + testl %edx, %edx + /* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 ymm0 ymm1 ymm2 + /* Wait until after branch of write over ymm0. */ + vorps %ymm2, %ymm1, %ymm0 + /* No stack restoration on the fastpath. */ + ret - /* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d - /* Special inputs - * processing loop + /* Cold case. edx has 1s where there was a special value that + needs to be handled by a tanhf call. Optimize for code size + more so than speed here. */ +L(SPECIAL_VALUES_BRANCH): + # LOE rbx rdx r12 r13 r14 r15 ymm0 ymm1 ymm2 + /* Use r13 to save/restore the stack. This allows us to use rbp as + callee save register saving code size. */ + pushq %r13 + cfi_adjust_cfa_offset(8) + cfi_offset(r13, -16) + /* Need to callee save registers to preserve state across tanhf calls. */ + pushq %rbx + cfi_adjust_cfa_offset(8) + cfi_offset(rbx, -24) + pushq %rbp + cfi_adjust_cfa_offset(8) + cfi_offset(rbp, -32) + movq %rsp, %r13 + cfi_def_cfa_register(r13) + + /* Align stack and make room for 2x ymm vectors. */ + andq $-32, %rsp + addq $-64, %rsp + + /* Save all already computed inputs. */ + vorps %ymm2, %ymm1, %ymm1 + vmovaps %ymm1, (%rsp) + /* Save original input (ymm0 unchanged up to this point). */ + vmovaps %ymm0, 32(%rsp) + + vzeroupper + /* edx has 1s where there was a special value that needs to be handled + by a tanhf call. */ + movl %edx, %ebx L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $8, %r12d - - /* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d - - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - vmovups 64(%rsp), %ymm0 - - /* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r13 r14 r15 ymm0 - - /* Scalar math fucntion call - * to process special input - */ + # LOE rbx rbp r12 r13 r14 r15 + /* use rbp as index for special value that is saved across calls to + tanhf. We technically don't need a callee save register here as offset + to rsp is always [0, 28] so we can restore rsp by realigning to 64. + Essentially the tradeoff is 1 extra save/restore vs 2 extra instructions + in the loop. Realigning also costs more code size. */ + xorl %ebp, %ebp + tzcntl %ebx, %ebp -L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movss 32(%rsp, %r14, 4), %xmm0 + /* Scalar math function call to process special input. */ + movss 32(%rsp, %rbp, 4), %xmm0 call tanhf@PLT - # LOE rbx r14 r15 r12d r13d xmm0 - movss %xmm0, 64(%rsp, %r14, 4) + /* No good way to avoid the store-forwarding fault this will cause on + return. `lfence` avoids the SF fault but at greater cost as it + serialized stack/callee save restoration. */ + movss %xmm0, (%rsp, %rbp, 4) + + blsrl %ebx, %ebx + jnz L(SPECIAL_VALUES_LOOP) + # LOE r12 r13 r14 r15 - /* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d -END(_ZGVdN8v_tanhf_avx2) - .section .rodata, "a" - .align 32 - -#ifdef __svml_stanh_data_internal_typedef -typedef unsigned int VUINT32; -typedef struct { - __declspec(align(32)) VUINT32 _dbP[(134*4)][2]; - __declspec(align(32)) VUINT32 _sSignMask[8][1]; - __declspec(align(32)) VUINT32 _sAbsMask[8][1]; - __declspec(align(32)) VUINT32 _iExpMantMask[8][1]; - __declspec(align(32)) VUINT32 _iExpMask[8][1]; - __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1]; - __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1]; -} __svml_stanh_data_internal; -#endif -__svml_stanh_data_internal: - /* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */ - .quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01 */ - .quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00 */ - .quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06 */ - .quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01 */ - .quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08 */ - .quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00 */ - .quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05 */ - .quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01 */ - .quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08 */ - .quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00 */ - .quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04 */ - .quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01 */ - .quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08 */ - .quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00 */ - .quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04 */ - .quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01 */ - .quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08 */ - .quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00 */ - .quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04 */ - .quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01 */ - .quad 0xBE6D5794E361E964 /* A00 = -5.465394929765249413434e-08 */ - .quad 0x3FF000055EE2A0CB /* A01 = +1.000005121846742950353e+00 */ - .quad 0xBF265E6C77E66C8B /* A02 = -1.706607253709506650304e-04 */ - .quad 0xBFD53264DDCCEDA6 /* A03 = -3.312008062382240103361e-01 */ - .quad 0xBE729C844D374A6E /* A00 = -6.933284462462096107184e-08 */ - .quad 0x3FF000067F019093 /* A01 = +1.000006195180536350264e+00 */ - .quad 0xBF29CC5348D6DCE5 /* A02 = -1.968242326435338705130e-04 */ - .quad 0xBFD52EE92121ED35 /* A03 = -3.309881995734998416658e-01 */ - .quad 0xBE775AEA17EAA872 /* A00 = -8.700465590574974405858e-08 */ - .quad 0x3FF00007CA1D66B8 /* A01 = +1.000007428656699559610e+00 */ - .quad 0xBF2D8F5EB98A2637 /* A02 = -2.255252009216044881395e-04 */ - .quad 0xBFD52B435CDF9128 /* A03 = -3.307655722585587376727e-01 */ - .quad 0xBE7D04DA28C343F0 /* A00 = -1.081040272327705484794e-07 */ - .quad 0x3FF000094443CCF5 /* A01 = +1.000008837375216730337e+00 */ - .quad 0xBF30D5B76C947AE5 /* A02 = -2.568791210978817814332e-04 */ - .quad 0xBFD52773A0776FAD /* A03 = -3.305329386764651045105e-01 */ - .quad 0xBE81DD77A12C51C7 /* A00 = -1.331054169875768625701e-07 */ - .quad 0x3FF0000AF1AFD2DA /* A01 = +1.000010437096696680470e+00 */ - .quad 0xBF331230624C1680 /* A02 = -2.910011410651516805537e-04 */ - .quad 0xBFD52379FC0B61DF /* A03 = -3.302903138515186909352e-01 */ - .quad 0xBE85D04EEEB3C435 /* A00 = -1.625247628488202841012e-07 */ - .quad 0x3FF0000CD6C9B1F2 /* A01 = +1.000012244238970726684e+00 */ - .quad 0xBF357F0742FADDD4 /* A02 = -3.280060509313874068243e-04 */ - .quad 0xBFD51F56806D0E81 /* A03 = -3.300377134475880880338e-01 */ - .quad 0xBE8A6E289B59681B /* A00 = -1.969211333326924655065e-07 */ - .quad 0x3FF0000EF8268F72 /* A01 = +1.000014275873550406715e+00 */ - .quad 0xBF381E277A1B747A /* A02 = -3.680082682942575423093e-04 */ - .quad 0xBFD51B093F1D6FD4 /* A03 = -3.297751537663746734808e-01 */ - .quad 0xBE8FCBC40EE9ABD5 /* A00 = -2.368983653301529373887e-07 */ - .quad 0x3FF000115A883B6C /* A01 = +1.000016549721943981410e+00 */ - .quad 0xBF3AF17AC974B3D9 /* A02 = -4.111218235774406434303e-04 */ - .quad 0xBFD516924A4C549C /* A03 = -3.295026517456081105450e-01 */ - .quad 0xBE92FFBC60A3F956 /* A00 = -2.831066871072026054144e-07 */ - .quad 0x3FF0001402DCED8A /* A01 = +1.000019084151832604590e+00 */ - .quad 0xBF3DFAE9390C4801 /* A02 = -4.574603454311488280083e-04 */ - .quad 0xBFD511F1B4D7DC3A /* A03 = -3.292202249571719585575e-01 */ - .quad 0xBE9690A22F96D5AD /* A00 = -3.362443262393081632612e-07 */ - .quad 0x3FF00016F63EFF5D /* A01 = +1.000021898173108825247e+00 */ - .quad 0xBF409E2C839605BB /* A02 = -5.071370461992499986334e-04 */ - .quad 0xBFD50D27924BEE00 /* A03 = -3.289278916051614487515e-01 */ - .quad 0xBE9AA56C65E72A73 /* A00 = -3.970591019557469835586e-07 */ - .quad 0x3FF0001A39F4A43E /* A01 = +1.000025011433776978009e+00 */ - .quad 0xBF425BD74C3D6667 /* A02 = -5.602647074553602319844e-04 */ - .quad 0xBFD50833F6E1ABA2 /* A03 = -3.286256705238718156536e-01 */ - .quad 0xBE9F4BD4FF1A83B0 /* A00 = -4.663500013744687071912e-07 */ - .quad 0x3FF0001DD36F9EC2 /* A01 = +1.000028444215715683896e+00 */ - .quad 0xBF44376634149405 /* A02 = -6.169556656102642569831e-04 */ - .quad 0xBFD50316F77EDEE5 /* A03 = -3.283135811757190158922e-01 */ - .quad 0xBEA3B625387BB079 /* A00 = -5.874486399249461304297e-07 */ - .quad 0x3FF00023E14CFBA9 /* A01 = +1.000034217911642153709e+00 */ - .quad 0xBF47392F923218D2 /* A02 = -7.087213783883111826306e-04 */ - .quad 0xBFD4FB1FACDEB938 /* A03 = -3.278273761924483942209e-01 */ - .quad 0xBEAA6E24F543500A /* A00 = -7.876828740601738750574e-07 */ - .quad 0x3FF0002D5C6E8412 /* A01 = +1.000043259679163742959e+00 */ - .quad 0xBF4BAF02BD7FDD70 /* A02 = -8.448375110664940040861e-04 */ - .quad 0xBFD4EFEE6527A7DE /* A03 = -3.271442401734229177279e-01 */ - .quad 0xBEB16E3EBE2157D0 /* A00 = -1.038947396133402500647e-06 */ - .quad 0x3FF00038990FEE2F /* A01 = +1.000053975962952312884e+00 */ - .quad 0xBF50569481C574CB /* A02 = -9.972048056490652716971e-04 */ - .quad 0xBFD4E419278DA2B4 /* A03 = -3.264220129263251113372e-01 */ - .quad 0xBEB6A7B6723165D4 /* A00 = -1.350350836279403750524e-06 */ - .quad 0x3FF00045CAB4158E /* A01 = +1.000066558657042303793e+00 */ - .quad 0xBF531D7C9C849108 /* A02 = -1.166698160951775212202e-03 */ - .quad 0xBFD4D7A0BB33B152 /* A03 = -3.256608799117844954552e-01 */ - .quad 0xBEBD0EE2A8654AFD /* A00 = -1.732000471561702711532e-06 */ - .quad 0x3FF00055276F18D6 /* A01 = +1.000081209219890521211e+00 */ - .quad 0xBF562FDBA3FB6C6C /* A02 = -1.354183666925102939860e-03 */ - .quad 0xBFD4CA85F1B93DB2 /* A03 = -3.248610363561638125773e-01 */ - .quad 0xBEC269D4036A207E /* A00 = -2.195047297096822741730e-06 */ - .quad 0x3FF00066E7DA6E4E /* A01 = +1.000098138500919997540e+00 */ - .quad 0xBF5991499FC36B3A /* A02 = -1.560518167983372759405e-03 */ - .quad 0xBFD4BCC9A72283D6 /* A03 = -3.240226871658341556426e-01 */ - .quad 0xBEC7154B6C09CFE1 /* A00 = -2.751729738565190291276e-06 */ - .quad 0x3FF0007B47086B80 /* A01 = +1.000117566559055148900e+00 */ - .quad 0xBF5D455433B4F8F4 /* A02 = -1.786548832412968197680e-03 */ - .quad 0xBFD4AE6CC1BFE145 /* A03 = -3.231460468373550942722e-01 */ - .quad 0xBECCA68CC64A0F8A /* A00 = -3.415415948561670285790e-06 */ - .quad 0x3FF00092827742F7 /* A01 = +1.000139722473418535387e+00 */ - .quad 0xBF60A7BF15A527AF /* A02 = -2.033112728132522705610e-03 */ - .quad 0xBFD49F703214084C /* A03 = -3.222313393636155876010e-01 */ - .quad 0xBED19E68676B241B /* A00 = -4.200644630977303616698e-06 */ - .quad 0x3FF000ACDA037B26 /* A01 = +1.000164844146362863597e+00 */ - .quad 0xBF62D99F836A02F8 /* A02 = -2.301036405072284102280e-03 */ - .quad 0xBFD48FD4F2B91B28 /* A03 = -3.212787981359945810311e-01 */ - .quad 0xBED57CF4B0C7AA54 /* A00 = -5.123164339408145209103e-06 */ - .quad 0x3FF000CA8FD9E1A1 /* A01 = +1.000193178099017865534e+00 */ - .quad 0xBF653A014548E686 /* A02 = -2.591135484433962181405e-03 */ - .quad 0xBFD47F9C0844B38F /* A03 = -3.202886658426046806447e-01 */ - .quad 0xBEDA012B1B1A41E2 /* A00 = -6.199971197454598722328e-06 */ - .quad 0x3FF000EBE868FDF4 /* A01 = +1.000224979259539459520e+00 */ - .quad 0xBF67CA9427E0A544 /* A02 = -2.904214255086275467410e-03 */ - .quad 0xBFD46EC6812ADB37 /* A03 = -3.192611943626845749655e-01 */ - .quad 0xBEDF3EAC5BF12194 /* A00 = -7.449344990702664567927e-06 */ - .quad 0x3FF001112A520784 /* A01 = +1.000260510744255704196e+00 */ - .quad 0xBF6A8D01ABDA4DC4 /* A02 = -3.241065277345108255891e-03 */ - .quad 0xBFD45D55759FFA4A /* A03 = -3.181966446572103146551e-01 */ - .quad 0xBEE2A541BC274267 /* A00 = -8.890883582164319970972e-06 */ - .quad 0x3FF0013A9E5961F2 /* A01 = +1.000300043631906721231e+00 */ - .quad 0xBF6D82ECD080C540 /* A02 = -3.602468994380686462264e-03 */ - .quad 0xBFD44B4A0779C0AD /* A03 = -3.170952866557950611259e-01 */ - .quad 0xBEE61D97609A27F4 /* A00 = -1.054553560499505625520e-05 */ - .quad 0x3FF001688F56A3AF /* A01 = +1.000343856731187974773e+00 */ - .quad 0xBF7056F8EFB683EC /* A02 = -3.989193351487490407647e-03 */ - .quad 0xBFD438A5620F0F74 /* A03 = -3.159573991399533543500e-01 */ - .quad 0xBEEA145429EDD370 /* A00 = -1.243563138839952927732e-05 */ - .quad 0x3FF0019B4A242A67 /* A01 = +1.000392236341804297339e+00 */ - .quad 0xBF7207D31CA78D9B /* A02 = -4.401993423445739288258e-03 */ - .quad 0xBFD42568BA16E7CD /* A03 = -3.147832696228050619602e-01 */ - .quad 0xBEEE96370D52680F /* A00 = -1.458491207477835326165e-05 */ - .quad 0x3FF001D31D8E4115 /* A01 = +1.000445476009251821736e+00 */ - .quad 0xBF73D4CC11EDC094 /* A02 = -4.841611050196221316400e-03 */ - .quad 0xBFD411954D8664E7 /* A03 = -3.135731942252974469021e-01 */ - .quad 0xBEF338C046215EF8 /* A00 = -1.833122622260562810219e-05 */ - .quad 0x3FF00230C32C2EC1 /* A01 = +1.000534784691737621998e+00 */ - .quad 0xBF76BD019BCC5DAF /* A02 = -5.551344188254799492943e-03 */ - .quad 0xBFD3F2C7156DC21E /* A03 = -3.116929730668135389848e-01 */ - .quad 0xBEF9B15EAE411EAE /* A00 = -2.450261207822986676092e-05 */ - .quad 0x3FF002C2DF057A4D /* A01 = +1.000674124886830940184e+00 */ - .quad 0xBF7B08CCD9AC1E30 /* A02 = -6.600189396301511801646e-03 */ - .quad 0xBFD3C7A7A114FED8 /* A03 = -3.090609620157755976777e-01 */ - .quad 0xBF00E36483C373B3 /* A00 = -3.221178528332122595812e-05 */ - .quad 0x3FF0036F419480D7 /* A01 = +1.000838524028997644777e+00 */ - .quad 0xBF7FD255D1777007 /* A02 = -7.768950679260206403087e-03 */ - .quad 0xBFD39A453911D6CE /* A03 = -3.062909180947429588215e-01 */ - .quad 0xBF05DFA04DD12059 /* A00 = -4.172046622180685472624e-05 */ - .quad 0x3FF00438B2A03D8D /* A01 = +1.001030633695197069599e+00 */ - .quad 0xBF828F8DBB4A9D10 /* A02 = -9.062869337255224921890e-03 */ - .quad 0xBFD36AAB704697D9 /* A03 = -3.033856007044711255993e-01 */ - .quad 0xBF0BF3E0C647DEFB /* A00 = -5.331544597092331081714e-05 */ - .quad 0x3FF005221063D36D /* A01 = +1.001253189109060359741e+00 */ - .quad 0xBF857A2CB3C96102 /* A02 = -1.048693584122917590862e-02 */ - .quad 0xBFD338E65BBB4FEC /* A03 = -3.003478904549854444639e-01 */ - .quad 0xBF11A506ED7C9D31 /* A00 = -6.730894835681591541979e-05 */ - .quad 0x3FF0062E4D0EA92A /* A01 = +1.001508999829250345925e+00 */ - .quad 0xBF88AB82C2761AF3 /* A02 = -1.204588085125866091241e-02 */ - .quad 0xBFD305028D6BD206 /* A03 = -2.971807843271395688234e-01 */ - .quad 0xBF1607C0922D9BF1 /* A00 = -8.403885708006799337092e-05 */ - .quad 0x3FF007606C341961 /* A01 = +1.001800940198869449560e+00 */ - .quad 0xBF8C25E6DA487BCF /* A02 = -1.374416688582682892494e-02 */ - .quad 0xBFD2CF0D0EE8F7B5 /* A03 = -2.938873906713255768075e-01 */ - .quad 0xBF1B3A8480A0A16D /* A00 = -1.038688061788578038307e-04 */ - .quad 0x3FF008BB802D02D6 /* A01 = +1.002131939589323561535e+00 */ - .quad 0xBF8FEB8AE99FD100 /* A02 = -1.558598065819483124983e-02 */ - .quad 0xBFD297135BD0911B /* A03 = -2.904709240558688843059e-01 */ - .quad 0xBF20ABB9BDB75C65 /* A00 = -1.271881327357976163798e-04 */ - .quad 0x3FF00A42A76D8CD1 /* A01 = +1.002504972472525901495e+00 */ - .quad 0xBF91FF3D752BB9E6 /* A02 = -1.757522609380570560722e-02 */ - .quad 0xBFD25D235C1F88B4 /* A03 = -2.869346999779154305799e-01 */ - .quad 0xBF243D3254425461 /* A00 = -1.544116913733432829448e-04 */ - .quad 0x3FF00BF909D1795E /* A01 = +1.002923048355647051011e+00 */ - .quad 0xBF94304E04D44942 /* A02 = -1.971551804042204897316e-02 */ - .quad 0xBFD2214B5E61CFA6 /* A03 = -2.832821294498394371075e-01 */ - .quad 0xBF286070011B61CE /* A00 = -1.859795307186510085994e-04 */ - .quad 0x3FF00DE1D5E1627E /* A01 = +1.003389201612804537689e+00 */ - .quad 0xBF9689D5F4163F59 /* A02 = -2.201017668045266231780e-02 */ - .quad 0xBFD1E39A11C3B42C /* A03 = -2.795167134743816728104e-01 */ - .quad 0xBF2D250B366A79E8 /* A00 = -2.223564326486314902259e-04 */ - .quad 0x3FF010003E134001 /* A01 = +1.003906481248123094829e+00 */ - .quad 0xBF990C9FF91F6F81 /* A02 = -2.446222265267250853271e-02 */ - .quad 0xBFD1A41E80084CDC /* A03 = -2.756420374218586655246e-01 */ - .quad 0xBF314DB5DDC2A30E /* A00 = -2.640313157465248123865e-04 */ - .quad 0x3FF012577608921B /* A01 = +1.004477940624503018441e+00 */ - .quad 0xBF9BB9626875B0C9 /* A02 = -2.707437288829409385849e-02 */ - .quad 0xBFD162E80768A9D0 /* A03 = -2.716617653228725615122e-01 */ - .quad 0xBF346A6133808864 /* A00 = -3.115165050094957730625e-04 */ - .quad 0x3FF014EAAFCC88A3 /* A01 = +1.005106627192198898157e+00 */ - .quad 0xBF9E90BEF9BF7419 /* A02 = -2.984903716411588595059e-02 */ - .quad 0xBFD12006545F7FAD /* A03 = -2.675796340899932457269e-01 */ - .quad 0xBF37F180DC3848EA /* A00 = -3.653468704395550778821e-04 */ - .quad 0x3FF017BD19147861 /* A01 = +1.005795572250939295955e+00 */ - .quad 0xBFA0C9A14C702E07 /* A02 = -3.278831537326359207851e-02 */ - .quad 0xBFD0DB895B650092 /* A03 = -2.633994476818851682154e-01 */ - .quad 0xBF3BEC6AAC6D7635 /* A00 = -4.260788377246944457107e-04 */ - .quad 0x3FF01AD1D884E719 /* A01 = +1.006547780778822565040e+00 */ - .quad 0xBFA260B2A1B1434A /* A02 = -3.589399551186163439542e-02 */ - .quad 0xBFD09581529E93D6 /* A03 = -2.591250712233067465817e-01 */ - .quad 0xBF4164E26167882B /* A00 = -5.308251737086202562063e-04 */ - .quad 0x3FF01FEF14B62B81 /* A01 = +1.007796364693348545316e+00 */ - .quad 0xBFA4EB014538AA42 /* A02 = -4.085544557559163403315e-02 */ - .quad 0xBFD029D36FEAF41F /* A03 = -2.525528519580024222613e-01 */ - .quad 0xBF46F6FFF4E53DC8 /* A00 = -7.008313930700277652464e-04 */ - .quad 0x3FF027CBB51CBBA0 /* A01 = +1.009715754956893363214e+00 */ - .quad 0xBFA89DEC9FEC112E /* A02 = -4.807986690687680864098e-02 */ - .quad 0xBFCF2A99464D0DB4 /* A03 = -2.434875100390009317053e-01 */ - .quad 0xBF4DCC9C4F66A4D9 /* A00 = -9.094012482836712945103e-04 */ - .quad 0x3FF030E7CFCCD583 /* A01 = +1.011939822882909068014e+00 */ - .quad 0xBFACAA3B95814081 /* A02 = -5.598627281199331645611e-02 */ - .quad 0xBFCDF78F15 |
