diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 10:47:14 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 21:44:09 -0800 |
| commit | 38f0c40f28f6e90384a193318b1d6fdacdc6c2fd (patch) | |
| tree | 1112bdb3a73d0652273a0bf8fb2a8adc25318ff1 | |
| parent | 9db25a9b138e96300fad11f65d1cd7f6d72bb52e (diff) | |
| download | glibc-38f0c40f28f6e90384a193318b1d6fdacdc6c2fd.tar.xz glibc-38f0c40f28f6e90384a193318b1d6fdacdc6c2fd.zip | |
x86_64: Fix svml_d_sinh4_core_avx2.S code formatting
This commit contains following formatting changes
1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
between it and the first operand.
3. Instruction greater than 7 characters in length have a
space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_sinh4_core_avx2.S | 814 |
1 files changed, 406 insertions, 408 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sinh4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sinh4_core_avx2.S index 53b8a32426..ae16600579 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sinh4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sinh4_core_avx2.S @@ -34,437 +34,435 @@ /* Offsets for data table __svml_dsinh_data_internal */ -#define _dbInvLn2 0 -#define _dbLn2hi 32 -#define _dbLn2lo 64 -#define _dSign 96 -#define _dbT 128 -#define _dbShifter 2176 -#define _iDomainRange 2208 -#define _dPC2 2240 -#define _dPC3 2272 -#define _dPC4 2304 -#define _dPC5 2336 -#define _lIndexMask 2368 +#define _dbInvLn2 0 +#define _dbLn2hi 32 +#define _dbLn2lo 64 +#define _dSign 96 +#define _dbT 128 +#define _dbShifter 2176 +#define _iDomainRange 2208 +#define _dPC2 2240 +#define _dPC3 2272 +#define _dPC4 2304 +#define _dPC5 2336 +#define _lIndexMask 2368 #include <sysdep.h> - .text - .section .text.avx2,"ax",@progbits + .section .text.avx2, "ax", @progbits ENTRY(_ZGVdN4v_sinh_avx2) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-32, %rsp - subq $96, %rsp - lea _dbT+8+__svml_dsinh_data_internal(%rip), %r8 - vmovupd _dbShifter+__svml_dsinh_data_internal(%rip), %ymm12 - -/* - * Load argument - * dM = x*2^K/log(2) + RShifter - */ - vmovupd _dbInvLn2+__svml_dsinh_data_internal(%rip), %ymm5 - vmovupd _dbLn2hi+__svml_dsinh_data_internal(%rip), %ymm13 - vmovapd %ymm0, %ymm8 - -/* - * VLOAD_CONST( D, dPC[0], TAB._dPC1 ); - * Abs argument - */ - vandpd _dSign+__svml_dsinh_data_internal(%rip), %ymm8, %ymm7 - vxorpd %ymm8, %ymm7, %ymm6 - vfmadd213pd %ymm12, %ymm6, %ymm5 - -/* - * R - * dN = dM - RShifter - */ - vsubpd %ymm12, %ymm5, %ymm3 - -/* - * Index and lookup - * j - */ - vandps _lIndexMask+__svml_dsinh_data_internal(%rip), %ymm5, %ymm4 - -/* - * Check for overflow\underflow - * - */ - vextractf128 $1, %ymm6, %xmm9 - vshufps $221, %xmm9, %xmm6, %xmm10 - -/* dR = dX - dN*Log2_hi/2^K */ - vfnmadd231pd %ymm13, %ymm3, %ymm6 - vpcmpgtd _iDomainRange+__svml_dsinh_data_internal(%rip), %xmm10, %xmm11 - vmovmskps %xmm11, %eax - -/* dR = (dX - dN*Log2_hi/2^K) - dN*Log2_lo/2^K */ - vfnmadd231pd _dbLn2lo+__svml_dsinh_data_internal(%rip), %ymm3, %ymm6 - vextractf128 $1, %ymm4, %xmm0 - vmovd %xmm4, %edx - vmovd %xmm0, %esi - shll $4, %edx - vpextrd $2, %xmm4, %ecx - -/* split j and N */ - vxorps %ymm4, %ymm5, %ymm3 - shll $4, %esi - vpextrd $2, %xmm0, %edi - shll $4, %ecx - -/* - * G1,G2,G3: dTdif,dTn * 2^N,2^(-N) - * lM now is an EXP(2^N) - */ - vpsllq $45, %ymm3, %ymm4 - vmovq (%rdx,%r8), %xmm14 - vmovq (%rsi,%r8), %xmm1 - vmovhpd (%rcx,%r8), %xmm14, %xmm15 - shll $4, %edi - vmovhpd (%rdi,%r8), %xmm1, %xmm2 - -/* dR2 = dR^2 */ - vmulpd %ymm6, %ymm6, %ymm1 - vmovq -8(%rdx,%r8), %xmm9 - vmovq -8(%rsi,%r8), %xmm11 - vmovhpd -8(%rcx,%r8), %xmm9, %xmm10 - vmovhpd -8(%rdi,%r8), %xmm11, %xmm12 - vinsertf128 $1, %xmm2, %ymm15, %ymm2 - -/* */ - vpaddq %ymm4, %ymm2, %ymm5 - -/* */ - vpsubq %ymm4, %ymm2, %ymm14 - -/* dG3 = dTn*2^N + dTn*2^-N */ - vaddpd %ymm14, %ymm5, %ymm2 - -/* dG2 = dTn*2^N - dTn*2^-N */ - vsubpd %ymm14, %ymm5, %ymm14 - -/* - * sinh(r) = r*((a1=1)+r^2*(a3+r^2*a5)) = r + r*(r^2*(a3+r^2*a5)) .... - * dSinh_r = (a3+r^2*a5) - */ - vmovupd _dPC5+__svml_dsinh_data_internal(%rip), %ymm5 - vfmadd213pd _dPC3+__svml_dsinh_data_internal(%rip), %ymm1, %ymm5 - vinsertf128 $1, %xmm12, %ymm10, %ymm13 - vpaddq %ymm4, %ymm13, %ymm0 - -/* dSinh_r = r^2*(a3+r^2*a5) */ - vmulpd %ymm5, %ymm1, %ymm4 - -/* dG2 += dG1 */ - vaddpd %ymm14, %ymm0, %ymm3 - -/* dG1 += dG3 */ - vaddpd %ymm2, %ymm0, %ymm0 - -/* dSinh_r = r + r*(r^2*(a3+r^2*a5)) */ - vfmadd213pd %ymm6, %ymm6, %ymm4 - -/* - * poly(r) = (dG2+dG1)+dG3*sinh(dR)+dG1*sinh(dR)+(dG1+dG2)*dR2*(a2 +a4*dR2) - * dOut = (a2 +a4*dR2) - */ - vmovupd _dPC4+__svml_dsinh_data_internal(%rip), %ymm6 - vfmadd213pd _dPC2+__svml_dsinh_data_internal(%rip), %ymm1, %ymm6 - -/* dOut = dR2*(a2 +a4*dR2) */ - vmulpd %ymm6, %ymm1, %ymm1 - -/* dOut = dG2*dR2*(a2 +a4*dR2) */ - vmulpd %ymm3, %ymm1, %ymm6 - -/* dOut = dG1*sinh(dR)+dG2*dR2*(a2 +a4*dR2) */ - vfmadd213pd %ymm6, %ymm0, %ymm4 - -/* dOut = dG2 + dG1*sinh(dR)+dG2*dR2*(a2 +a4*dR2) */ - vaddpd %ymm4, %ymm3, %ymm5 - -/* Ret H */ - vorpd %ymm5, %ymm7, %ymm0 - testl %eax, %eax - -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r12 r13 r14 r15 eax ymm0 ymm8 - -/* Restore registers - * and exit the function - */ + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + lea _dbT+8+__svml_dsinh_data_internal(%rip), %r8 + vmovupd _dbShifter+__svml_dsinh_data_internal(%rip), %ymm12 + + /* + * Load argument + * dM = x*2^K/log(2) + RShifter + */ + vmovupd _dbInvLn2+__svml_dsinh_data_internal(%rip), %ymm5 + vmovupd _dbLn2hi+__svml_dsinh_data_internal(%rip), %ymm13 + vmovapd %ymm0, %ymm8 + + /* + * VLOAD_CONST( D, dPC[0], TAB._dPC1 ); + * Abs argument + */ + vandpd _dSign+__svml_dsinh_data_internal(%rip), %ymm8, %ymm7 + vxorpd %ymm8, %ymm7, %ymm6 + vfmadd213pd %ymm12, %ymm6, %ymm5 + + /* + * R + * dN = dM - RShifter + */ + vsubpd %ymm12, %ymm5, %ymm3 + + /* + * Index and lookup + * j + */ + vandps _lIndexMask+__svml_dsinh_data_internal(%rip), %ymm5, %ymm4 + + /* + * Check for overflow\underflow + * + */ + vextractf128 $1, %ymm6, %xmm9 + vshufps $221, %xmm9, %xmm6, %xmm10 + + /* dR = dX - dN*Log2_hi/2^K */ + vfnmadd231pd %ymm13, %ymm3, %ymm6 + vpcmpgtd _iDomainRange+__svml_dsinh_data_internal(%rip), %xmm10, %xmm11 + vmovmskps %xmm11, %eax + + /* dR = (dX - dN*Log2_hi/2^K) - dN*Log2_lo/2^K */ + vfnmadd231pd _dbLn2lo+__svml_dsinh_data_internal(%rip), %ymm3, %ymm6 + vextractf128 $1, %ymm4, %xmm0 + vmovd %xmm4, %edx + vmovd %xmm0, %esi + shll $4, %edx + vpextrd $2, %xmm4, %ecx + + /* split j and N */ + vxorps %ymm4, %ymm5, %ymm3 + shll $4, %esi + vpextrd $2, %xmm0, %edi + shll $4, %ecx + + /* + * G1, G2, G3: dTdif, dTn * 2^N, 2^(-N) + * lM now is an EXP(2^N) + */ + vpsllq $45, %ymm3, %ymm4 + vmovq (%rdx, %r8), %xmm14 + vmovq (%rsi, %r8), %xmm1 + vmovhpd (%rcx, %r8), %xmm14, %xmm15 + shll $4, %edi + vmovhpd (%rdi, %r8), %xmm1, %xmm2 + + /* dR2 = dR^2 */ + vmulpd %ymm6, %ymm6, %ymm1 + vmovq -8(%rdx, %r8), %xmm9 + vmovq -8(%rsi, %r8), %xmm11 + vmovhpd -8(%rcx, %r8), %xmm9, %xmm10 + vmovhpd -8(%rdi, %r8), %xmm11, %xmm12 + vinsertf128 $1, %xmm2, %ymm15, %ymm2 + + /* */ + vpaddq %ymm4, %ymm2, %ymm5 + + /* */ + vpsubq %ymm4, %ymm2, %ymm14 + + /* dG3 = dTn*2^N + dTn*2^-N */ + vaddpd %ymm14, %ymm5, %ymm2 + + /* dG2 = dTn*2^N - dTn*2^-N */ + vsubpd %ymm14, %ymm5, %ymm14 + + /* + * sinh(r) = r*((a1=1)+r^2*(a3+r^2*a5)) = r + r*(r^2*(a3+r^2*a5)) .... + * dSinh_r = (a3+r^2*a5) + */ + vmovupd _dPC5+__svml_dsinh_data_internal(%rip), %ymm5 + vfmadd213pd _dPC3+__svml_dsinh_data_internal(%rip), %ymm1, %ymm5 + vinsertf128 $1, %xmm12, %ymm10, %ymm13 + vpaddq %ymm4, %ymm13, %ymm0 + + /* dSinh_r = r^2*(a3+r^2*a5) */ + vmulpd %ymm5, %ymm1, %ymm4 + + /* dG2 += dG1 */ + vaddpd %ymm14, %ymm0, %ymm3 + + /* dG1 += dG3 */ + vaddpd %ymm2, %ymm0, %ymm0 + + /* dSinh_r = r + r*(r^2*(a3+r^2*a5)) */ + vfmadd213pd %ymm6, %ymm6, %ymm4 + + /* + * poly(r) = (dG2+dG1)+dG3*sinh(dR)+dG1*sinh(dR)+(dG1+dG2)*dR2*(a2 +a4*dR2) + * dOut = (a2 +a4*dR2) + */ + vmovupd _dPC4+__svml_dsinh_data_internal(%rip), %ymm6 + vfmadd213pd _dPC2+__svml_dsinh_data_internal(%rip), %ymm1, %ymm6 + + /* dOut = dR2*(a2 +a4*dR2) */ + vmulpd %ymm6, %ymm1, %ymm1 + + /* dOut = dG2*dR2*(a2 +a4*dR2) */ + vmulpd %ymm3, %ymm1, %ymm6 + + /* dOut = dG1*sinh(dR)+dG2*dR2*(a2 +a4*dR2) */ + vfmadd213pd %ymm6, %ymm0, %ymm4 + + /* dOut = dG2 + dG1*sinh(dR)+dG2*dR2*(a2 +a4*dR2) */ + vaddpd %ymm4, %ymm3, %ymm5 + + /* Ret H */ + vorpd %ymm5, %ymm7, %ymm0 + testl %eax, %eax + + /* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm8 + + /* Restore registers + * and exit the function + */ L(EXIT): - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - -/* Branch to process - * special inputs - */ + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + + /* Branch to process + * special inputs + */ L(SPECIAL_VALUES_BRANCH): - vmovupd %ymm8, 32(%rsp) - vmovupd %ymm0, 64(%rsp) - # LOE rbx r12 r13 r14 r15 eax ymm0 - - xorl %edx, %edx - # LOE rbx r12 r13 r14 r15 eax edx - - vzeroupper - movq %r12, 16(%rsp) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 - movl %edx, %r12d - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 - movl %eax, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d - -/* Range mask - * bits check - */ + vmovupd %ymm8, 32(%rsp) + vmovupd %ymm0, 64(%rsp) + # LOE rbx r12 r13 r14 r15 eax ymm0 + + xorl %edx, %edx + # LOE rbx r12 r13 r14 r15 eax edx + + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d + + /* Range mask + * bits check + */ L(RANGEMASK_CHECK): - btl %r12d, %r13d + btl %r12d, %r13d -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d + /* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d -/* Special inputs - * processing loop - */ + /* Special inputs + * processing loop + */ L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $4, %r12d - -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d - - movq 16(%rsp), %r12 - cfi_restore(12) - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - vmovupd 64(%rsp), %ymm0 - -/* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r12 r13 r14 r15 ymm0 - -/* Scalar math fucntion call - * to process special input - */ + incl %r12d + cmpl $4, %r12d + + /* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d + + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovupd 64(%rsp), %ymm0 + + /* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 ymm0 + + /* Scalar math fucntion call + * to process special input + */ L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movsd 32(%rsp,%r14,8), %xmm0 - call sinh@PLT - # LOE rbx r14 r15 r12d r13d xmm0 + movl %r12d, %r14d + movsd 32(%rsp, %r14, 8), %xmm0 + call sinh@PLT + # LOE rbx r14 r15 r12d r13d xmm0 - movsd %xmm0, 64(%rsp,%r14,8) + movsd %xmm0, 64(%rsp, %r14, 8) -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d + /* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d END(_ZGVdN4v_sinh_avx2) - .section .rodata, "a" - .align 32 + .section .rodata, "a" + .align 32 #ifdef __svml_dsinh_data_internal_typedef typedef unsigned int VUINT32; -typedef struct -{ - __declspec(align(32)) VUINT32 _dbInvLn2[4][2]; - __declspec(align(32)) VUINT32 _dbLn2hi[4][2]; - __declspec(align(32)) VUINT32 _dbLn2lo[4][2]; - __declspec(align(32)) VUINT32 _dSign[4][2]; //0x8000000000000000 - __declspec(align(32)) VUINT32 _dbT[(1<<7)][2][2]; //precalc poly coeff - __declspec(align(32)) VUINT32 _dbShifter[4][2]; - __declspec(align(32)) VUINT32 _iDomainRange[8][1]; - __declspec(align(32)) VUINT32 _dPC2[4][2]; - __declspec(align(32)) VUINT32 _dPC3[4][2]; - __declspec(align(32)) VUINT32 _dPC4[4][2]; - __declspec(align(32)) VUINT32 _dPC5[4][2]; - __declspec(align(32)) VUINT32 _lIndexMask[4][2]; +typedef struct { + __declspec(align(32)) VUINT32 _dbInvLn2[4][2]; + __declspec(align(32)) VUINT32 _dbLn2hi[4][2]; + __declspec(align(32)) VUINT32 _dbLn2lo[4][2]; + __declspec(align(32)) VUINT32 _dSign[4][2]; // 0x8000000000000000 + __declspec(align(32)) VUINT32 _dbT[(1<<7)][2][2]; // precalc poly coeff + __declspec(align(32)) VUINT32 _dbShifter[4][2]; + __declspec(align(32)) VUINT32 _iDomainRange[8][1]; + __declspec(align(32)) VUINT32 _dPC2[4][2]; + __declspec(align(32)) VUINT32 _dPC3[4][2]; + __declspec(align(32)) VUINT32 _dPC4[4][2]; + __declspec(align(32)) VUINT32 _dPC5[4][2]; + __declspec(align(32)) VUINT32 _lIndexMask[4][2]; } __svml_dsinh_data_internal; #endif __svml_dsinh_data_internal: - .quad 0x3FF71547652B82FE, 0x3FF71547652B82FE, 0x3FF71547652B82FE, 0x3FF71547652B82FE /* _dbInvLn2 = 1/log(2) */ - .align 32 - .quad 0x3FE62E42FEFA0000, 0x3FE62E42FEFA0000, 0x3FE62E42FEFA0000, 0x3FE62E42FEFA0000 /* _dbLn2hi = log(2) hi*/ - .align 32 - .quad 0x3D7CF79ABC9E3B3A, 0x3D7CF79ABC9E3B3A, 0x3D7CF79ABC9E3B3A, 0x3D7CF79ABC9E3B3A /* _dbLn2lo = log(2) lo*/ - .align 32 - .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 /* _dSign */ - //_dbT - .align 32 - .quad 0x0000000000000000, 0x3FE0000000000000 //2^( 0 /128-1) - 2^(- 0 /128-1), 2^(- 0 /128-1) - .quad 0x3F762E4A19BD1E74, 0x3FDFD3C22B8F71F1 //2^( 1 /128-1) - 2^(- 1 /128-1), 2^(- 1 /128-1) - .quad 0x3F862E5F6A0DFD36, 0x3FDFA7C1819E90D8 //2^( 2 /128-1) - 2^(- 2 /128-1), 2^(- 2 /128-1) - .quad 0x3F90A2E234040F5F, 0x3FDF7BFDAD9CBE14 //2^( 3 /128-1) - 2^(- 3 /128-1), 2^(- 3 /128-1) - .quad 0x3F962EB4ABCC5A81, 0x3FDF50765B6E4540 //2^( 4 /128-1) - 2^(- 4 /128-1), 2^(- 4 /128-1) - .quad 0x3F9BBAB1C5033244, 0x3FDF252B376BBA97 //2^( 5 /128-1) - 2^(- 5 /128-1), 2^(- 5 /128-1) - .quad 0x3FA0A372144EEB45, 0x3FDEFA1BEE615A27 //2^( 6 /128-1) - 2^(- 6 /128-1), 2^(- 6 /128-1) - .quad 0x3FA369AB3FFBF8B0, 0x3FDECF482D8E67F1 //2^( 7 /128-1) - 2^(- 7 /128-1), 2^(- 7 /128-1) - .quad 0x3FA63009BA740A2A, 0x3FDEA4AFA2A490DA //2^( 8 /128-1) - 2^(- 8 /128-1), 2^(- 8 /128-1) - .quad 0x3FA8F692D8EA1B5A, 0x3FDE7A51FBC74C83 //2^( 9 /128-1) - 2^(- 9 /128-1), 2^(- 9 /128-1) - .quad 0x3FABBD4BF0E31A6F, 0x3FDE502EE78B3FF6 //2^( 10 /128-1) - 2^(- 10 /128-1), 2^(- 10 /128-1) - .quad 0x3FAE843A5840286A, 0x3FDE264614F5A129 //2^( 11 /128-1) - 2^(- 11 /128-1), 2^(- 11 /128-1) - .quad 0x3FB0A5B1B2A46D0A, 0x3FDDFC97337B9B5F //2^( 12 /128-1) - 2^(- 12 /128-1), 2^(- 12 /128-1) - .quad 0x3FB20966375ABCDF, 0x3FDDD321F301B460 //2^( 13 /128-1) - 2^(- 13 /128-1), 2^(- 13 /128-1) - .quad 0x3FB36D3D65DCA4E8, 0x3FDDA9E603DB3285 //2^( 14 /128-1) - 2^(- 14 /128-1), 2^(- 14 /128-1) - .quad 0x3FB4D139EA06642A, 0x3FDD80E316C98398 //2^( 15 /128-1) - 2^(- 15 /128-1), 2^(- 15 /128-1) - .quad 0x3FB6355E6FFBF9BA, 0x3FDD5818DCFBA487 //2^( 16 /128-1) - 2^(- 16 /128-1), 2^(- 16 /128-1) - .quad 0x3FB799ADA42E4788, 0x3FDD2F87080D89F2 //2^( 17 /128-1) - 2^(- 17 /128-1), 2^(- 17 /128-1) - .quad 0x3FB8FE2A336035BC, 0x3FDD072D4A07897C //2^( 18 /128-1) - 2^(- 18 /128-1), 2^(- 18 /128-1) - .quad 0x3FBA62D6CAABD6B6, 0x3FDCDF0B555DC3FA //2^( 19 /128-1) - 2^(- 19 /128-1), 2^(- 19 /128-1) - .quad 0x3FBBC7B617878BAF, 0x3FDCB720DCEF9069 //2^( 20 /128-1) - 2^(- 20 /128-1), 2^(- 20 /128-1) - .quad 0x3FBD2CCAC7CB2A11, 0x3FDC8F6D9406E7B5 //2^( 21 /128-1) - 2^(- 21 /128-1), 2^(- 21 /128-1) - .quad 0x3FBE921789B52185, 0x3FDC67F12E57D14B //2^( 22 /128-1) - 2^(- 22 /128-1), 2^(- 22 /128-1) - .quad 0x3FBFF79F0BEFA2C7, 0x3FDC40AB5FFFD07A //2^( 23 /128-1) - 2^(- 23 /128-1), 2^(- 23 /128-1) - .quad 0x3FC0AEB1FECAE3A9, 0x3FDC199BDD85529C //2^( 24 /128-1) - 2^(- 24 /128-1), 2^(- 24 /128-1) - .quad 0x3FC161B4871C5CEC, 0x3FDBF2C25BD71E09 //2^( 25 /128-1) - 2^(- 25 /128-1), 2^(- 25 /128-1) - .quad 0x3FC214D876F26FD0, 0x3FDBCC1E904BC1D2 //2^( 26 /128-1) - 2^(- 26 /128-1), 2^(- 26 /128-1) - .quad 0x3FC2C81F2693816F, 0x3FDBA5B030A1064A //2^( 27 /128-1) - 2^(- 27 /128-1), 2^(- 27 /128-1) - .quad 0x3FC37B89EE88BEF7, 0x3FDB7F76F2FB5E47 //2^( 28 /128-1) - 2^(- 28 /128-1), 2^(- 28 /128-1) - .quad 0x3FC42F1A27A0B3CD, 0x3FDB59728DE5593A //2^( 29 /128-1) - 2^(- 29 /128-1), 2^(- 29 /128-1) - .quad 0x3FC4E2D12AF1E037, 0x3FDB33A2B84F15FB //2^( 30 /128-1) - 2^(- 30 /128-1), 2^(- 30 /128-1) - .quad 0x3FC596B051DD508D, 0x3FDB0E07298DB666 //2^( 31 /128-1) - 2^(- 31 /128-1), 2^(- 31 /128-1) - .quad 0x3FC64AB8F61134FA, 0x3FDAE89F995AD3AD //2^( 32 /128-1) - 2^(- 32 /128-1), 2^(- 32 /128-1) - .quad 0x3FC6FEEC718B79D1, 0x3FDAC36BBFD3F37A //2^( 33 /128-1) - 2^(- 33 /128-1), 2^(- 33 /128-1) - .quad 0x3FC7B34C1E9C607F, 0x3FDA9E6B5579FDBF //2^( 34 /128-1) - 2^(- 34 /128-1), 2^(- 34 /128-1) - .quad 0x3FC867D957E91912, 0x3FDA799E1330B358 //2^( 35 /128-1) - 2^(- 35 /128-1), 2^(- 35 /128-1) - .quad 0x3FC91C95786E5C72, 0x3FDA5503B23E255D //2^( 36 /128-1) - 2^(- 36 /128-1), 2^(- 36 /128-1) - .quad 0x3FC9D181DB83072F, 0x3FDA309BEC4A2D33 //2^( 37 /128-1) - 2^(- 37 /128-1), 2^(- 37 /128-1) - .quad 0x3FCA869FDCDAB512, 0x3FDA0C667B5DE565 //2^( 38 /128-1) - 2^(- 38 /128-1), 2^(- 38 /128-1) - .quad 0x3FCB3BF0D8885D4C, 0x3FD9E86319E32323 //2^( 39 /128-1) - 2^(- 39 /128-1), 2^(- 39 /128-1) - .quad 0x3FCBF1762B00EF69, 0x3FD9C49182A3F090 //2^( 40 /128-1) - 2^(- 40 /128-1), 2^(- 40 /128-1) - .quad 0x3FCCA731311DF0FB, 0x3FD9A0F170CA07BA //2^( 41 /128-1) - 2^(- 41 /128-1), 2^(- 41 /128-1) - .quad 0x3FCD5D2348201C09, 0x3FD97D829FDE4E50 //2^( 42 /128-1) - 2^(- 42 /128-1), 2^(- 42 /128-1) - .quad 0x3FCE134DCDB1FE3E, 0x3FD95A44CBC8520F //2^( 43 /128-1) - 2^(- 43 /128-1), 2^(- 43 /128-1) - .quad 0x3FCEC9B21FEA98EA, 0x3FD93737B0CDC5E5 //2^( 44 /128-1) - 2^(- 44 /128-1), 2^(- 44 /128-1) - .quad 0x3FCF80519D5001D3, 0x3FD9145B0B91FFC6 //2^( 45 /128-1) - 2^(- 45 /128-1), 2^(- 45 /128-1) - .quad 0x3FD01B96D26D026A, 0x3FD8F1AE99157736 //2^( 46 /128-1) - 2^(- 46 /128-1), 2^(- 46 /128-1) - .quad 0x3FD07723CAFA6331, 0x3FD8CF3216B5448C //2^( 47 /128-1) - 2^(- 47 /128-1), 2^(- 47 /128-1) - .quad 0x3FD0D2D06841B373, 0x3FD8ACE5422AA0DB //2^( 48 /128-1) - 2^(- 48 /128-1), 2^(- 48 /128-1) - .quad 0x3FD12E9D5A715381, 0x3FD88AC7D98A6699 //2^( 49 /128-1) - 2^(- 49 /128-1), 2^(- 49 /128-1) - .quad 0x3FD18A8B51F5C661, 0x3FD868D99B4492ED //2^( 50 /128-1) - 2^(- 50 /128-1), 2^(- 50 /128-1) - .quad 0x3FD1E69AFF7B04D7, 0x3FD8471A4623C7AD //2^( 51 /128-1) - 2^(- 51 /128-1), 2^(- 51 /128-1) - .quad 0x3FD242CD13EDD0F1, 0x3FD82589994CCE13 //2^( 52 /128-1) - 2^(- 52 /128-1), 2^(- 52 /128-1) - .quad 0x3FD29F22407D0A0C, 0x3FD80427543E1A12 //2^( 53 /128-1) - 2^(- 53 /128-1), 2^(- 53 /128-1) - .quad 0x3FD2FB9B369B0153, 0x3FD7E2F336CF4E62 //2^( 54 /128-1) - 2^(- 54 /128-1), 2^(- 54 /128-1) - .quad 0x3FD35838A7FECEC8, 0x3FD7C1ED0130C132 //2^( 55 /128-1) - 2^(- 55 /128-1), 2^(- 55 /128-1) - .quad 0x3FD3B4FB46A5A6CC, 0x3FD7A11473EB0187 //2^( 56 /128-1) - 2^(- 56 /128-1), 2^(- 56 /128-1) - .quad 0x3FD411E3C4D4302F, 0x3FD780694FDE5D3F //2^( 57 /128-1) - 2^(- 57 /128-1), 2^(- 57 /128-1) - .quad 0x3FD46EF2D517DAC8, 0x3FD75FEB564267C9 //2^( 58 /128-1) - 2^(- 58 /128-1), 2^(- 58 /128-1) - .quad 0x3FD4CC292A48369E, 0x3FD73F9A48A58174 //2^( 59 /128-1) - 2^(- 59 /128-1), 2^(- 59 /128-1) - .quad 0x3FD5298777884B96, 0x3FD71F75E8EC5F74 //2^( 60 /128-1) - 2^(- 60 /128-1), 2^(- 60 /128-1) - .quad 0x3FD5870E7047F1BC, 0x3FD6FF7DF9519484 //2^( 61 /128-1) - 2^(- 61 /128-1), 2^(- 61 /128-1) - .quad 0x3FD5E4BEC8452A1A, 0x3FD6DFB23C651A2F //2^( 62 /128-1) - 2^(- 62 /128-1), 2^(- 62 /128-1) - .quad 0x3FD64299338D7827, 0x3FD6C012750BDABF //2^( 63 /128-1) - 2^(- 63 /128-1), 2^(- 63 /128-1) - .quad 0x3FD6A09E667F3BCD, 0x3FD6A09E667F3BCD //2^( 64 /128-1) - 2^(- 64 /128-1), 2^(- 64 /128-1) - .quad 0x3FD6FECF15CB0C0B, 0x3FD68155D44CA973 //2^( 65 /128-1) - 2^(- 65 /128-1), 2^(- 65 /128-1) - .quad 0x3FD75D2BF6751239, 0x3FD6623882552225 //2^( 66 /128-1) - 2^(- 66 /128-1), 2^(- 66 /128-1) - .quad 0x3FD7BBB5BDD665E8, 0x3FD6434634CCC320 //2^( 67 /128-1) - 2^(- 67 /128-1), 2^(- 67 /128-1) - .quad 0x3FD81A6D219E6963, 0x3FD6247EB03A5585 //2^( 68 /128-1) - 2^(- 68 /128-1), 2^(- 68 /128-1) - .quad 0x3FD87952D7D426DF, 0x3FD605E1B976DC09 //2^( 69 /128-1) - 2^(- 69 /128-1), 2^(- 69 /128-1) - .quad 0x3FD8D86796D7AE49, 0x3FD5E76F15AD2148 //2^( 70 /128-1) - 2^(- 70 /128-1), 2^(- 70 /128-1) - .quad 0x3FD937AC156373C8, 0x3FD5C9268A5946B7 //2^( 71 /128-1) - 2^(- 71 /128-1 |
