diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 10:47:15 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 21:44:09 -0800 |
| commit | 28ba5ee77fa61eb951736ec751e1921d4580aa2a (patch) | |
| tree | 832acfa0bbce4a73547c44bab12d690def40d86e | |
| parent | 06c7208f27b8c44f115ae357a97a7bbc7085497a (diff) | |
| download | glibc-28ba5ee77fa61eb951736ec751e1921d4580aa2a.tar.xz glibc-28ba5ee77fa61eb951736ec751e1921d4580aa2a.zip | |
x86_64: Fix svml_d_tanh4_core_avx2.S code formatting
This commit contains following formatting changes
1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
between it and the first operand.
3. Instruction greater than 7 characters in length have a
space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S | 2330 |
1 files changed, 1164 insertions, 1166 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S index d2a971ead5..7ddf145b25 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S @@ -28,7 +28,7 @@ * and to approximate tanh(.) with a polynomial on each of them. * * IEEE SPECIAL CONDITIONS: - * x = [+,-]0, r = [+,-]0 + * x = [+, -]0, r = [+, -]0 * x = +Inf, r = +1 * x = -Inf, r = -1 * x = QNaN, r = QNaN @@ -72,1208 +72,1206 @@ /* Offsets for data table __svml_dtanh_data_internal */ -#define _dbP 0 -#define _dbSignMask 7680 -#define _dbAbsMask 7712 -#define _iExpMantMask 7744 -#define _iExpMask 7776 -#define _iMinIdxOfsMask 7808 -#define _iMaxIdxMask 7840 +#define _dbP 0 +#define _dbSignMask 7680 +#define _dbAbsMask 7712 +#define _iExpMantMask 7744 +#define _iExpMask 7776 +#define _iMinIdxOfsMask 7808 +#define _iMaxIdxMask 7840 #include <sysdep.h> - .text - .section .text.avx2,"ax",@progbits + .section .text.avx2, "ax", @progbits ENTRY(_ZGVdN4v_tanh_avx2) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-32, %rsp - subq $96, %rsp - lea _dbP+96+__svml_dtanh_data_internal(%rip), %r8 - vmovupd %ymm0, (%rsp) + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + lea _dbP+96+__svml_dtanh_data_internal(%rip), %r8 + vmovupd %ymm0, (%rsp) -/* if VMIN, VMAX is defined for I type */ - vpxor %xmm11, %xmm11, %xmm11 + /* if VMIN, VMAX is defined for I type */ + vpxor %xmm11, %xmm11, %xmm11 -/* Constant loading */ - vmovups _iMaxIdxMask+__svml_dtanh_data_internal(%rip), %xmm8 - vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm1 - vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm2 - vextractf128 $1, %ymm0, %xmm15 - vshufps $221, %xmm15, %xmm0, %xmm14 + /* Constant loading */ + vmovups _iMaxIdxMask+__svml_dtanh_data_internal(%rip), %xmm8 + vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm1 + vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm2 + vextractf128 $1, %ymm0, %xmm15 + vshufps $221, %xmm15, %xmm0, %xmm14 -/* Here huge arguments, INF and NaNs are filtered out to callout. */ - vpand _iExpMantMask+__svml_dtanh_data_internal(%rip), %xmm14, %xmm12 - vpsubd _iMinIdxOfsMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm9 - vpcmpgtd %xmm11, %xmm9, %xmm10 - vpcmpgtd %xmm8, %xmm9, %xmm0 - vpand %xmm10, %xmm9, %xmm7 - blendvps %xmm0, %xmm8, %xmm7 + /* Here huge arguments, INF and NaNs are filtered out to callout. */ + vpand _iExpMantMask+__svml_dtanh_data_internal(%rip), %xmm14, %xmm12 + vpsubd _iMinIdxOfsMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm9 + vpcmpgtd %xmm11, %xmm9, %xmm10 + vpcmpgtd %xmm8, %xmm9, %xmm0 + vpand %xmm10, %xmm9, %xmm7 + blendvps %xmm0, %xmm8, %xmm7 -/* - * VSHRIMM( I, iIndex, = iIndex, (17 - 4) ); - * VGATHER_MATRIX( L2D, p, TAB._dbP, iIndex, 0, T_ITEM_SIZE, T_ITEM_GRAN, 13, 0, 0 ); - */ - vpsrld $10, %xmm7, %xmm6 - vmovd %xmm6, %edx - vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm13 - vmovmskps %xmm13, %eax - vpextrd $1, %xmm6, %ecx - movslq %edx, %rdx - movslq %ecx, %rcx - vpextrd $2, %xmm6, %esi - vpextrd $3, %xmm6, %edi - movslq %esi, %rsi - movslq %edi, %rdi - vmovupd -96(%rdx,%r8), %xmm3 - vmovupd -96(%rcx,%r8), %xmm4 - vmovupd -80(%rcx,%r8), %xmm13 - vmovupd -64(%rcx,%r8), %xmm9 - vmovupd -80(%rdx,%r8), %xmm14 - vmovupd -64(%rdx,%r8), %xmm10 - vmovupd -48(%rdx,%r8), %xmm6 - vinsertf128 $1, -96(%rsi,%r8), %ymm3, %ymm0 - vinsertf128 $1, -96(%rdi,%r8), %ymm4, %ymm15 - vmovupd -48(%rcx,%r8), %xmm3 - vunpckhpd %ymm15, %ymm0, %ymm0 - vinsertf128 $1, -80(%rsi,%r8), %ymm14, %ymm12 - vinsertf128 $1, -64(%rsi,%r8), %ymm10, %ymm8 - vinsertf128 $1, -80(%rdi,%r8), %ymm13, %ymm11 - vinsertf128 $1, -64(%rdi,%r8), %ymm9, %ymm7 - vunpcklpd %ymm11, %ymm12, %ymm15 - vunpckhpd %ymm11, %ymm12, %ymm14 - vunpcklpd %ymm7, %ymm8, %ymm13 - vunpckhpd %ymm7, %ymm8, %ymm12 - vmovupd -32(%rdx,%r8), %xmm9 - vmovupd -32(%rcx,%r8), %xmm8 - vinsertf128 $1, -48(%rsi,%r8), %ymm6, %ymm4 - vinsertf128 $1, -48(%rdi,%r8), %ymm3, %ymm5 - vunpcklpd %ymm5, %ymm4, %ymm11 - vunpckhpd %ymm5, %ymm4, %ymm10 - vmovupd -16(%rdx,%r8), %xmm3 - vmovupd -16(%rcx,%r8), %xmm4 - vinsertf128 $1, -32(%rsi,%r8), %ymm9, %ymm7 - vinsertf128 $1, -32(%rdi,%r8), %ymm8, %ymm6 - vunpcklpd %ymm6, %ymm7, %ymm9 - vunpckhpd %ymm6, %ymm7, %ymm8 - vinsertf128 $1, -16(%rsi,%r8), %ymm3, %ymm5 - vinsertf128 $1, -16(%rdi,%r8), %ymm4, %ymm6 - vunpcklpd %ymm6, %ymm5, %ymm7 - vunpckhpd %ymm6, %ymm5, %ymm6 - vmovupd (%rdx,%r8), %xmm3 - vmovupd (%rcx,%r8), %xmm5 - vinsertf128 $1, (%rsi,%r8), %ymm3, %ymm4 - vinsertf128 $1, (%rdi,%r8), %ymm5, %ymm5 - vunpcklpd %ymm5, %ymm4, %ymm3 - vaddpd %ymm3, %ymm1, %ymm1 - vfmadd213pd %ymm7, %ymm1, %ymm6 - vfmadd213pd %ymm8, %ymm1, %ymm6 - vfmadd213pd %ymm9, %ymm1, %ymm6 - vfmadd213pd %ymm10, %ymm1, %ymm6 - vfmadd213pd %ymm11, %ymm1, %ymm6 - vfmadd213pd %ymm12, %ymm1, %ymm6 - vfmadd213pd %ymm13, %ymm1, %ymm6 - vfmadd213pd %ymm14, %ymm1, %ymm6 - vfmadd213pd %ymm15, %ymm1, %ymm6 - vfmadd213pd %ymm0, %ymm1, %ymm6 - vorpd %ymm2, %ymm6, %ymm0 - testl %eax, %eax + /* + * VSHRIMM( I, iIndex, = iIndex, (17 - 4) ); + * VGATHER_MATRIX( L2D, p, TAB._dbP, iIndex, 0, T_ITEM_SIZE, T_ITEM_GRAN, 13, 0, 0 ); + */ + vpsrld $10, %xmm7, %xmm6 + vmovd %xmm6, %edx + vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm13 + vmovmskps %xmm13, %eax + vpextrd $1, %xmm6, %ecx + movslq %edx, %rdx + movslq %ecx, %rcx + vpextrd $2, %xmm6, %esi + vpextrd $3, %xmm6, %edi + movslq %esi, %rsi + movslq %edi, %rdi + vmovupd -96(%rdx, %r8), %xmm3 + vmovupd -96(%rcx, %r8), %xmm4 + vmovupd -80(%rcx, %r8), %xmm13 + vmovupd -64(%rcx, %r8), %xmm9 + vmovupd -80(%rdx, %r8), %xmm14 + vmovupd -64(%rdx, %r8), %xmm10 + vmovupd -48(%rdx, %r8), %xmm6 + vinsertf128 $1, -96(%rsi, %r8), %ymm3, %ymm0 + vinsertf128 $1, -96(%rdi, %r8), %ymm4, %ymm15 + vmovupd -48(%rcx, %r8), %xmm3 + vunpckhpd %ymm15, %ymm0, %ymm0 + vinsertf128 $1, -80(%rsi, %r8), %ymm14, %ymm12 + vinsertf128 $1, -64(%rsi, %r8), %ymm10, %ymm8 + vinsertf128 $1, -80(%rdi, %r8), %ymm13, %ymm11 + vinsertf128 $1, -64(%rdi, %r8), %ymm9, %ymm7 + vunpcklpd %ymm11, %ymm12, %ymm15 + vunpckhpd %ymm11, %ymm12, %ymm14 + vunpcklpd %ymm7, %ymm8, %ymm13 + vunpckhpd %ymm7, %ymm8, %ymm12 + vmovupd -32(%rdx, %r8), %xmm9 + vmovupd -32(%rcx, %r8), %xmm8 + vinsertf128 $1, -48(%rsi, %r8), %ymm6, %ymm4 + vinsertf128 $1, -48(%rdi, %r8), %ymm3, %ymm5 + vunpcklpd %ymm5, %ymm4, %ymm11 + vunpckhpd %ymm5, %ymm4, %ymm10 + vmovupd -16(%rdx, %r8), %xmm3 + vmovupd -16(%rcx, %r8), %xmm4 + vinsertf128 $1, -32(%rsi, %r8), %ymm9, %ymm7 + vinsertf128 $1, -32(%rdi, %r8), %ymm8, %ymm6 + vunpcklpd %ymm6, %ymm7, %ymm9 + vunpckhpd %ymm6, %ymm7, %ymm8 + vinsertf128 $1, -16(%rsi, %r8), %ymm3, %ymm5 + vinsertf128 $1, -16(%rdi, %r8), %ymm4, %ymm6 + vunpcklpd %ymm6, %ymm5, %ymm7 + vunpckhpd %ymm6, %ymm5, %ymm6 + vmovupd (%rdx, %r8), %xmm3 + vmovupd (%rcx, %r8), %xmm5 + vinsertf128 $1, (%rsi, %r8), %ymm3, %ymm4 + vinsertf128 $1, (%rdi, %r8), %ymm5, %ymm5 + vunpcklpd %ymm5, %ymm4, %ymm3 + vaddpd %ymm3, %ymm1, %ymm1 + vfmadd213pd %ymm7, %ymm1, %ymm6 + vfmadd213pd %ymm8, %ymm1, %ymm6 + vfmadd213pd %ymm9, %ymm1, %ymm6 + vfmadd213pd %ymm10, %ymm1, %ymm6 + vfmadd213pd %ymm11, %ymm1, %ymm6 + vfmadd213pd %ymm12, %ymm1, %ymm6 + vfmadd213pd %ymm13, %ymm1, %ymm6 + vfmadd213pd %ymm14, %ymm1, %ymm6 + vfmadd213pd %ymm15, %ymm1, %ymm6 + vfmadd213pd %ymm0, %ymm1, %ymm6 + vorpd %ymm2, %ymm6, %ymm0 + testl %eax, %eax -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r12 r13 r14 r15 eax ymm0 + /* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 eax ymm0 -/* Restore registers - * and exit the function - */ + /* Restore registers + * and exit the function + */ L(EXIT): - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) -/* Branch to process - * special inputs - */ + /* Branch to process + * special inputs + */ L(SPECIAL_VALUES_BRANCH): - vmovupd (%rsp), %ymm1 - vmovupd %ymm0, 64(%rsp) - vmovupd %ymm1, 32(%rsp) - # LOE rbx r12 r13 r14 r15 eax ymm0 + vmovupd (%rsp), %ymm1 + vmovupd %ymm0, 64(%rsp) + vmovupd %ymm1, 32(%rsp) + # LOE rbx r12 r13 r14 r15 eax ymm0 - xorl %edx, %edx - # LOE rbx r12 r13 r14 r15 eax edx + xorl %edx, %edx + # LOE rbx r12 r13 r14 r15 eax edx - vzeroupper - movq %r12, 16(%rsp) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 - movl %edx, %r12d - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 - movl %eax, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d -/* Range mask - * bits check - */ + /* Range mask + * bits check + */ L(RANGEMASK_CHECK): - btl %r12d, %r13d + btl %r12d, %r13d -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d + /* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d -/* Special inputs - * processing loop - */ + /* Special inputs + * processing loop + */ L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $4, %r12d + incl %r12d + cmpl $4, %r12d -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d + /* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d - movq 16(%rsp), %r12 - cfi_restore(12) - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - vmovupd 64(%rsp), %ymm0 + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovupd 64(%rsp), %ymm0 -/* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r12 r13 r14 r15 ymm0 + /* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 ymm0 -/* Scalar math fucntion call - * to process special input - */ + /* Scalar math fucntion call + * to process special input + */ L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movsd 32(%rsp,%r14,8), %xmm0 - call tanh@PLT - # LOE rbx r14 r15 r12d r13d xmm0 + movl %r12d, %r14d + movsd 32(%rsp, %r14, 8), %xmm0 + call tanh@PLT + # LOE rbx r14 r15 r12d r13d xmm0 - movsd %xmm0, 64(%rsp,%r14,8) + movsd %xmm0, 64(%rsp, %r14, 8) -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d + /* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d END(_ZGVdN4v_tanh_avx2) - .section .rodata, "a" - .align 32 + .section .rodata, "a" + .align 32 #ifdef __svml_dtanh_data_internal_typedef typedef unsigned int VUINT32; -typedef struct -{ - __declspec(align(32)) VUINT32 _dbP[60*16][2]; - __declspec(align(32)) VUINT32 _dbSignMask[4][2]; - __declspec(align(32)) VUINT32 _dbAbsMask[4][2]; - __declspec(align(32)) VUINT32 _iExpMantMask[8][1]; - __declspec(align(32)) VUINT32 _iExpMask[8][1]; - __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1]; - __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1]; +typedef struct { + __declspec(align(32)) VUINT32 _dbP[60*16][2]; + __declspec(align(32)) VUINT32 _dbSignMask[4][2]; + __declspec(align(32)) VUINT32 _dbAbsMask[4][2]; + __declspec(align(32)) VUINT32 _iExpMantMask[8][1]; + __declspec(align(32)) VUINT32 _iExpMask[8][1]; + __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1]; + __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1]; } __svml_dtanh_data_internal; #endif __svml_dtanh_data_internal: - /* Polynomial coefficients */ - .quad 0x0000000000000000 /* PL0 = +0.000000000000000000000e-01 */ - .quad 0x0000000000000000 /* PH0 = +0.000000000000000000000e-01 */ - .quad 0x3FF0000000000000 /* P1 = +1.000000000000000014103e+00 */ - .quad 0xBD197DEAD79668D3 /* P2 = -2.264132406596103056796e-14 */ - .quad 0xBFD555555553AF3C /* P3 = -3.333333333273349741024e-01 */ - .quad 0xBE052F7CCA134846 /* P4 = -6.165791385711493738399e-10 */ - .quad 0x3FC11111563849D6 /* P5 = +1.333333655353061107201e-01 */ - .quad 0xBEB038623673FFB2 /* P6 = -9.668021563879858950855e-07 */ - .quad 0xBFAB9F685E64022E /* P7 = -5.395055916051593179252e-02 */ - .quad 0xBF2A54E2B28F2207 /* P8 = -2.008940439550829012647e-04 */ - .quad 0x3F97CFB9328A230E /* P9 = +2.325333949059698582189e-02 */ - .quad 0xBF75CA6D61723E02 /* P10 = -5.320002811586290441790e-03 */ - .quad 0x0000000000000000 /* B = +0 */ - .quad 0x3FF0000000000000 /* A = +1.0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C3708A564FAD29A /* PL0 = +1.248663375337163807466e-18 */ - .quad 0x3FC0E6973998DA48 /* PH0 = +1.320370703922029154143e-01 */ - .quad 0x3FEF712EB25C0888 /* P1 = +9.825662120422444519229e-01 */ - .quad 0xBFC09B296F7C1EA9 /* P2 = -1.297351641044220078331e-01 */ - .quad 0xBFD3DD77541EDDA7 /* P3 = -3.103922196855485849143e-01 */ - .quad 0x3FB58FFCF4309615 /* P4 = +8.422833406128689275566e-02 */ - .quad 0x3FBD3ABE845DCF49 /* P5 = +1.141776154670967208833e-01 */ - .quad 0xBFA791DF538C37FA /* P6 = -4.603479285115947936529e-02 */ - .quad 0xBFA4F872F69CD6E8 /* P7 = -4.095801601799370195284e-02 */ - .quad 0x3F9772E49EF6412B /* P8 = +2.289921970583567527179e-02 */ - .quad 0x3F8CBC0807393909 /* P9 = +1.403051635784581776625e-02 */ - .quad 0xBF85F06A30F93319 /* P10 = -1.071246110873285040939e-02 */ - .quad 0xBFC1000000000000 /* B = -.132813 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C6004EE5739DEAC /* PL0 = +6.947247374112211856530e-18 */ - .quad 0x3FC2DC968E6E0D62 /* PH0 = +1.473568149050193398786e-01 */ - .quad 0x3FEF4E1E606D96DF /* P1 = +9.782859691010478680677e-01 */ - .quad 0xBFC273BD70994AB9 /* P2 = -1.441571044730005866646e-01 */ - .quad 0xBFD382B548270D2C /* P3 = -3.048527912726111386771e-01 */ - .quad 0x3FB7CD2D582A6B29 /* P4 = +9.297450449450351894400e-02 */ - .quad 0x3FBC1278CCCBF0DB /* P5 = +1.096568584434324642303e-01 */ - .quad 0xBFA9C7F5115B86A1 /* P6 = -5.035367810138536095866e-02 */ - .quad 0xBFA371C21BAF618E /* P7 = -3.797728145554222910481e-02 */ - .quad 0x3F9958943F68417E /* P8 = +2.475196492201935923783e-02 */ - .quad 0x3F8930D5CFFD4152 /* P9 = +1.230017701132682667572e-02 */ - .quad 0xBF875CF7ADD31B76 /* P10 = -1.140779017658897660092e-02 */ - .quad 0xBFC3000000000000 /* B = -.148438 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C7EABE24E052A1F /* PL0 = +2.660321779421749543501e-17 */ - .quad 0x3FC4D04783618C71 /* PH0 = +1.626061812886266111366e-01 */ - .quad 0x3FEF2765AF97A4B3 /* P1 = +9.735592298067302883212e-01 */ - .quad 0xBFC443654205FEA5 /* P2 = -1.583067486171689074207e-01 */ - .quad 0xBFD31F2E208A5B97 /* P3 = -2.987780874040536844467e-01 */ - .quad 0x3FB9F235BD339878 /* P4 = +1.013520800512156573576e-01 */ - .quad 0x3FBAD0B0DFCCA141 /* P5 = +1.047468706498238100104e-01 */ - .quad 0xBFABD1B9600E608E /* P6 = -5.433444306908184548967e-02 */ - .quad 0xBFA1CEBEAF07DB58 /* P7 = -3.478046309094534453598e-02 */ - .quad 0x3F9AFC9FB1D8EFD2 /* P8 = +2.635430834764902126383e-02 */ - .quad 0x3F8573444F1AB502 /* P9 = +1.047376028449287564018e-02 */ - .quad 0xBF8874FBC8F24406 /* P10 = -1.194187838544459322219e-02 */ - .quad 0xBFC5000000000000 /* B = -.164063 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C7FB199D361A790 /* PL0 = +2.748994907060158996213e-17 */ - .quad 0x3FC6C170259E21F7 /* PH0 = +1.777782615356639783766e-01 */ - .quad 0x3FEEFD17479F7C65 /* P1 = +9.683948897253570478266e-01 */ - .quad 0xBFC609530FE4DF8D /* P2 = -1.721595599753950294577e-01 */ - .quad 0xBFD2B3465D71B4DE /* P3 = -2.921920692959484052676e-01 */ - .quad 0x3FBBFD2D34AC509B /* P4 = +1.093319181057403192166e-01 */ - .quad 0x3FB9778C3C16A0FE /* P5 = +9.948040453912551395183e-02 */ - .quad 0xBFADAC4D9E63C665 /* P6 = -5.795519407719210697372e-02 */ - .quad 0xBFA0139CCAD02D60 /* P7 = -3.139963126894929339124e-02 */ - .quad 0x3F9C5BF43BA6F19D /* P8 = +2.769452680671379432854e-02 */ - .quad 0x3F8190B703350341 /* P9 = +8.576803002712575184772e-03 */ - .quad 0xBF8936606782858A /* P10 = -1.231074634444230850234e-02 */ - .quad 0xBFC7000000000000 /* B = -.179688 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C6A917CA3624D50 /* PL0 = +1.152216693509785660691e-17 */ - .quad 0x3FC8AFD7B974FABB /* PH0 = +1.928662925292508878439e-01 */ - .quad 0x3FEECF47624A5D03 /* P1 = +9.628025932060214187231e-01 */ - .quad 0xBFC7C4C2CB4FDE4D /* P2 = -1.856921665891938814679e-01 */ - .quad 0xBFD23F69CB2C1F9D /* P3 = -2.851204380135586155453e-01 */ - .quad 0x3FBDEC5703A03814 /* P4 = +1.168875106670557712458e-01 */ - .quad 0x3FB8095003D0CF15 /* P5 = +9.389209836154706616487e-02 */ - .quad 0xBFAF554B47B10CBB /* P6 = -6.119761705533607365968e-02 */ - .quad 0xBF9C89743FE7BC1B /* P7 = -2.786809577986213853937e-02 */ - .quad 0x3F9D74725B746E7C /* P8 = +2.876452143855921824991e-02 */ - .quad 0x3F7B2D8AFB70B88C /* P9 = +6.635229968237631511880e-03 */ - .quad 0xBF89A0A2883EF6CB /* P10 = -1.251341799058582545252e-02 */ - .quad 0xBFC9000000000000 /* B = -.195313 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C7608279E8609CB /* PL0 = +1.910958764623660748269e-17 */ - .quad 0x3FCA9B46D2DDC5E3 /* PH0 = +2.078636674519166172015e-01 */ - .quad 0x3FEE9E0BB72A01A1 /* P1 = +9.567926957534390123919e-01 */ - .quad 0xBFC974FAD10C5330 /* P2 = -1.988824387305156976885e-01 */ - .quad 0xBFD1C40ACCBA4044 /* P3 = -2.775904654781735703430e-01 */ - .quad 0x3FBFBE24E2987853 /* P4 = +1.239951184474830487522e-01 */ - .quad 0x3FB6885B4345E47F /* P5 = +8.801813499839460539687e-02 */ - .quad 0xBFB06563D5670584 /* P6 = -6.404708824176991770896e-02 */ - .quad 0xBF98CD1D620DF6E2 /* P7 = -2.421995078065365147772e-02 */ - .quad 0x3F9E44EF3E844D21 /* P8 = +2.955983943054463683119e-02 */ - .quad 0x3F7325FA0148CAAE /* P9 = +4.674889165971292322643e-03 */ - .quad 0xBF89B4C8556C2D92 /* P10 = -1.255184660614964011319e-02 */ - .quad 0xBFCB000000000000 /* B = -.210938 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C6F19DAA20F51D5 /* PL0 = +1.348790537832000351176e-17 */ - .quad 0x3FCC83876CA98E15 /* PH0 = +2.227639465883021474557e-01 */ - .quad 0x3FEE697B662D07CD /* P1 = +9.503762241004040620296e-01 */ - .quad 0xBFCB194C7ED76ACF /* P2 = -2.117095584242946953999e-01 */ - .quad 0xBFD141A19E419762 /* P3 = -2.696308179350720680191e-01 */ - .quad 0x3FC0B89C64BC7B98 /* P4 = +1.306338779331468503007e-01 */ - .quad 0x3FB4F721150BBFC5 /* P5 = +8.189589275184434216748e-02 */ - .quad 0xBFB105AAFAB87898 /* P6 = -6.649273511036069461061e-02 */ - .quad 0xBF94FB3B31248C01 /* P7 = -2.048962104266749732921e-02 */ - .quad 0x3F9ECD31E588709C /* P8 = +3.007963145692880855964e-02 */ - .quad 0x3F664A91A335C105 /* P9 = +2.721104095762541127495e-03 */ - .quad 0xBF89754E32E1E26E /* P10 = -1.243077366619723806134e-02 */ - .quad 0xBFCD000000000000 /* B = -.226563 */ - .quad 0x3FF0000000000000 /* A = +1 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x0000000000000000 /* Align value = +0 */ - .quad 0x3C6AC6C889D8111D /* PL0 = +1.161245469312620769170e-17 */ - .quad 0x3FCE6864FE55A3D0 /* PH0 = +2.375608674877001114112e-01 */ - .quad 0x3FEE31AEE116B82B /* P1 = +9.435648342384913826391e-01 */ - .quad 0xBFCCB114B69E808B /* P2 = -2.241540805525839833707e-01 */ - .quad 0xBFD0B8AB913BA99D /* P3 = -2.612713735858507980441e-01 */ - .quad 0x3FC1823322BED48A /* P4 = +1.367858810096190233514e-01 */ - .quad 0x3FB35822B7929893 /* P5 = +7.556359273675842651653e-02 */ - |
