diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 10:47:14 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 21:44:08 -0800 |
| commit | f2469622f550e477901df9735e04896db9eb4b8c (patch) | |
| tree | c8272deae204e20f7e7f89955e9952c5f07d2387 | |
| parent | 1025469bf64c1572524343531dfa553a9d0a9cc2 (diff) | |
| download | glibc-f2469622f550e477901df9735e04896db9eb4b8c.tar.xz glibc-f2469622f550e477901df9735e04896db9eb4b8c.zip | |
x86_64: Fix svml_d_log1p4_core_avx2.S code formatting
This commit contains following formatting changes
1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
between it and the first operand.
3. Instruction greater than 7 characters in length have a
space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S | 2587 |
1 files changed, 1293 insertions, 1294 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S index 9ae36d484b..13235793e8 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S @@ -19,7 +19,7 @@ /* * ALGORITHM DESCRIPTION: * - * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1, 2) * Get short reciprocal approximation Rcp ~ 1/xh * R = (Rcp*xh - 1.0) + Rcp*xl * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) @@ -30,1354 +30,1353 @@ /* Offsets for data table __svml_dlog1p_data_internal */ -#define Log_HA_table 0 -#define Log_LA_table 8224 -#define poly_coeff 12352 -#define ExpMask 12480 -#define Two10 12512 -#define MinLog1p 12544 -#define MaxLog1p 12576 -#define One 12608 -#define SgnMask 12640 -#define XThreshold 12672 -#define XhMask 12704 -#define Threshold 12736 -#define Bias 12768 -#define Bias1 12800 -#define ExpMask0 12832 -#define ExpMask2 12864 -#define L2 12896 +#define Log_HA_table 0 +#define Log_LA_table 8224 +#define poly_coeff 12352 +#define ExpMask 12480 +#define Two10 12512 +#define MinLog1p 12544 +#define MaxLog1p 12576 +#define One 12608 +#define SgnMask 12640 +#define XThreshold 12672 +#define XhMask 12704 +#define Threshold 12736 +#define Bias 12768 +#define Bias1 12800 +#define ExpMask0 12832 +#define ExpMask2 12864 +#define L2 12896 /* Lookup bias for data table __svml_dlog1p_data_internal. */ -#define Table_Lookup_Bias -0x405fe0 +#define Table_Lookup_Bias -0x405fe0 #include <sysdep.h> - .text - .section .text.avx2,"ax",@progbits + .section .text.avx2, "ax", @progbits ENTRY(_ZGVdN4v_log1p_avx2) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-32, %rsp - subq $96, %rsp - lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8 + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $96, %rsp + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8 -/* SgnMask used by all accuracies */ - vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12 - vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7 + /* SgnMask used by all accuracies */ + vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12 + vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7 -/* 2^ (-10-exp(X) ) */ - vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3 - vmovapd %ymm0, %ymm9 - vandpd %ymm12, %ymm9, %ymm10 - vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11 - vaddpd %ymm7, %ymm9, %ymm13 + /* 2^ (-10-exp(X) ) */ + vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3 + vmovapd %ymm0, %ymm9 + vandpd %ymm12, %ymm9, %ymm10 + vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11 + vaddpd %ymm7, %ymm9, %ymm13 -/* compute 1+x as high, low parts */ - vmaxpd %ymm9, %ymm7, %ymm15 - vminpd %ymm9, %ymm7, %ymm6 - vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14 - vandpd %ymm14, %ymm13, %ymm4 + /* compute 1+x as high, low parts */ + vmaxpd %ymm9, %ymm7, %ymm15 + vminpd %ymm9, %ymm7, %ymm6 + vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14 + vandpd %ymm14, %ymm13, %ymm4 -/* preserve mantissa, set input exponent to 2^(-10) */ - vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5 - vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5 + /* preserve mantissa, set input exponent to 2^(-10) */ + vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5 + vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5 -/* reciprocal approximation good to at least 11 bits */ - vcvtpd2ps %ymm5, %xmm2 - vsubpd %ymm4, %ymm15, %ymm0 + /* reciprocal approximation good to at least 11 bits */ + vcvtpd2ps %ymm5, %xmm2 + vsubpd %ymm4, %ymm15, %ymm0 -/* check range */ - vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15 - vrcpps %xmm2, %xmm1 - vaddpd %ymm0, %ymm6, %ymm6 - vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0 - vcvtps2pd %xmm1, %ymm11 + /* check range */ + vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15 + vrcpps %xmm2, %xmm1 + vaddpd %ymm0, %ymm6, %ymm6 + vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0 + vcvtps2pd %xmm1, %ymm11 -/* exponent of X needed to scale Xl */ - vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10 - vpsubq %ymm10, %ymm3, %ymm13 + /* exponent of X needed to scale Xl */ + vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10 + vpsubq %ymm10, %ymm3, %ymm13 -/* exponent bits */ - vpsrlq $20, %ymm4, %ymm4 + /* exponent bits */ + vpsrlq $20, %ymm4, %ymm4 -/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ - vroundpd $0, %ymm11, %ymm3 + /* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + vroundpd $0, %ymm11, %ymm3 -/* scale DblRcp */ - vmulpd %ymm13, %ymm3, %ymm2 + /* scale DblRcp */ + vmulpd %ymm13, %ymm3, %ymm2 -/* exponent*log(2.0) */ - vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13 - vfmsub213pd %ymm7, %ymm3, %ymm5 + /* exponent*log(2.0) */ + vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13 + vfmsub213pd %ymm7, %ymm3, %ymm5 -/* Compute SignMask for all accuracies, including EP */ - vandnpd %ymm9, %ymm12, %ymm8 - vorpd %ymm0, %ymm15, %ymm7 + /* Compute SignMask for all accuracies, including EP */ + vandnpd %ymm9, %ymm12, %ymm8 + vorpd %ymm0, %ymm15, %ymm7 -/* - * prepare table index - * table lookup - */ - vpsrlq $40, %ymm3, %ymm0 + /* + * prepare table index + * table lookup + */ + vpsrlq $40, %ymm3, %ymm0 -/* - * argument reduction - * VQFMS( D, R, X, DblRcp1, One ); - */ - vfmadd213pd %ymm5, %ymm2, %ymm6 - vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2 - vcmplt_oqpd %ymm3, %ymm13, %ymm3 - vmulpd %ymm6, %ymm6, %ymm5 - vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2 + /* + * argument reduction + * VQFMS( D, R, X, DblRcp1, One ); + */ + vfmadd213pd %ymm5, %ymm2, %ymm6 + vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2 + vcmplt_oqpd %ymm3, %ymm13, %ymm3 + vmulpd %ymm6, %ymm6, %ymm5 + vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2 -/* combine and get argument value range mask */ - vmovmskpd %ymm7, %eax - vextractf128 $1, %ymm4, %xmm12 - vshufps $221, %xmm12, %xmm4, %xmm14 + /* combine and get argument value range mask */ + vmovmskpd %ymm7, %eax + vextractf128 $1, %ymm4, %xmm12 + vshufps $221, %xmm12, %xmm4, %xmm14 -/* biased exponent in DP format */ - vcvtdq2pd %xmm14, %ymm1 - vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14 - vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15 - vsubpd %ymm15, %ymm1, %ymm1 - vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3 + /* biased exponent in DP format */ + vcvtdq2pd %xmm14, %ymm1 + vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14 + vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15 + vsubpd %ymm15, %ymm1, %ymm1 + vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3 -/* polynomial */ - vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1 - vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1 - vfmadd213pd %ymm2, %ymm5, %ymm1 + /* polynomial */ + vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1 + vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1 + vfmadd213pd %ymm2, %ymm5, %ymm1 -/* reconstruction */ - vfmadd213pd %ymm6, %ymm5, %ymm1 - vextractf128 $1, %ymm0, %xmm10 - vmovd %xmm0, %edx - vmovd %xmm10, %esi - movslq %edx, %rdx - vpextrd $2, %xmm0, %ecx - movslq %esi, %rsi - vpextrd $2, %xmm10, %edi - movslq %ecx, %rcx - movslq %edi, %rdi - vmovsd (%r8,%rdx), %xmm4 - vmovsd (%r8,%rsi), %xmm11 - vmovhpd (%r8,%rcx), %xmm4, %xmm7 - vmovhpd (%r8,%rdi), %xmm11, %xmm12 - vinsertf128 $1, %xmm12, %ymm7, %ymm0 - vaddpd %ymm1, %ymm0, %ymm6 - vaddpd %ymm6, %ymm3, %ymm0 + /* reconstruction */ + vfmadd213pd %ymm6, %ymm5, %ymm1 + vextractf128 $1, %ymm0, %xmm10 + vmovd %xmm0, %edx + vmovd %xmm10, %esi + movslq %edx, %rdx + vpextrd $2, %xmm0, %ecx + movslq %esi, %rsi + vpextrd $2, %xmm10, %edi + movslq %ecx, %rcx + movslq %edi, %rdi + vmovsd (%r8, %rdx), %xmm4 + vmovsd (%r8, %rsi), %xmm11 + vmovhpd (%r8, %rcx), %xmm4, %xmm7 + vmovhpd (%r8, %rdi), %xmm11, %xmm12 + vinsertf128 $1, %xmm12, %ymm7, %ymm0 + vaddpd %ymm1, %ymm0, %ymm6 + vaddpd %ymm6, %ymm3, %ymm0 -/* OR in the Sign of input argument to produce correct log1p(-0) */ - vorpd %ymm8, %ymm0, %ymm0 - testl %eax, %eax + /* OR in the Sign of input argument to produce correct log1p(-0) */ + vorpd %ymm8, %ymm0, %ymm0 + testl %eax, %eax -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9 + /* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9 -/* Restore registers - * and exit the function - */ + /* Restore registers + * and exit the function + */ L(EXIT): - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) -/* Branch to process - * special inputs - */ + /* Branch to process + * special inputs + */ L(SPECIAL_VALUES_BRANCH): - vmovupd %ymm9, 32(%rsp) - vmovupd %ymm0, 64(%rsp) - # LOE rbx r12 r13 r14 r15 eax ymm0 + vmovupd %ymm9, 32(%rsp) + vmovupd %ymm0, 64(%rsp) + # LOE rbx r12 r13 r14 r15 eax ymm0 - xorl %edx, %edx - # LOE rbx r12 r13 r14 r15 eax edx + xorl %edx, %edx + # LOE rbx r12 r13 r14 r15 eax edx - vzeroupper - movq %r12, 16(%rsp) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 - movl %edx, %r12d - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 - movl %eax, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d + vzeroupper + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d -/* Range mask - * bits check - */ + /* Range mask + * bits check + */ L(RANGEMASK_CHECK): - btl %r12d, %r13d + btl %r12d, %r13d -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d + /* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d -/* Special inputs - * processing loop - */ + /* Special inputs + * processing loop + */ L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $4, %r12d + incl %r12d + cmpl $4, %r12d -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d + /* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d - movq 16(%rsp), %r12 - cfi_restore(12) - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - vmovupd 64(%rsp), %ymm0 + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + vmovupd 64(%rsp), %ymm0 -/* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r12 r13 r14 r15 ymm0 + /* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 ymm0 -/* Scalar math fucntion call - * to process special input - */ + /* Scalar math fucntion call + * to process special input + */ L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movsd 32(%rsp,%r14,8), %xmm0 - call log1p@PLT - # LOE rbx r14 r15 r12d r13d xmm0 + movl %r12d, %r14d + movsd 32(%rsp, %r14, 8), %xmm0 + call log1p@PLT + # LOE rbx r14 r15 r12d r13d xmm0 - movsd %xmm0, 64(%rsp,%r14,8) + movsd %xmm0, 64(%rsp, %r14, 8) -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d + /* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d END(_ZGVdN4v_log1p_avx2) - .section .rodata, "a" - .align 32 + .section .rodata, "a" + .align 32 #ifdef __svml_dlog1p_data_internal_typedef typedef unsigned int VUINT32; typedef struct { - __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2]; - __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2]; - __declspec(align(32)) VUINT32 poly_coeff[4][4][2]; - __declspec(align(32)) VUINT32 ExpMask[4][2]; - __declspec(align(32)) VUINT32 Two10[4][2]; - __declspec(align(32)) VUINT32 MinLog1p[4][2]; - __declspec(align(32)) VUINT32 MaxLog1p[4][2]; - __declspec(align(32)) VUINT32 One[4][2]; - __declspec(align(32)) VUINT32 SgnMask[4][2]; - __declspec(align(32)) VUINT32 XThreshold[4][2]; - __declspec(align(32)) VUINT32 XhMask[4][2]; - __declspec(align(32)) VUINT32 Threshold[4][2]; - __declspec(align(32)) VUINT32 Bias[4][2]; - __declspec(align(32)) VUINT32 Bias1[4][2]; - __declspec(align(32)) VUINT32 ExpMask0[4][2]; - __declspec(align(32)) VUINT32 ExpMask2[4][2]; - __declspec(align(32)) VUINT32 L2[4][2]; + __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2]; + __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2]; + __declspec(align(32)) VUINT32 poly_coeff[4][4][2]; + __declspec(align(32)) VUINT32 ExpMask[4][2]; + __declspec(align(32)) VUINT32 Two10[4][2]; + __declspec(align(32)) VUINT32 MinLog1p[4][2]; + __declspec(align(32)) VUINT32 MaxLog1p[4][2]; + __declspec(align(32)) VUINT32 One[4][2]; + __declspec(align(32)) VUINT32 SgnMask[4][2]; + __declspec(align(32)) VUINT32 XThreshold[4][2]; + __declspec(align(32)) VUINT32 XhMask[4][2]; + __declspec(align(32)) VUINT32 Threshold[4][2]; + __declspec(align(32)) VUINT32 Bias[4][2]; + __declspec(align(32)) VUINT32 Bias1[4][2]; + __declspec(align(32)) VUINT32 ExpMask0[4][2]; + __declspec(align(32)) VUINT32 ExpMask2[4][2]; + __declspec(align(32)) VUINT32 L2[4][2]; } __svml_dlog1p_data_internal; #endif __svml_dlog1p_data_internal: - /* Log_HA_table */ - .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 - .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a - .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff - .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a - .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb - .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e - .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b - .quad 0xc0862347acebaf68, 0xbe1cef3b152048af - .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e - .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 - .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 - .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 - .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 - .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 - .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 - .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b - .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed - .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed - .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f - .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce - .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 - .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 - .quad 0xc086238206e94218, 0xbe1ceee898588610 - .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea - .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 - .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 - .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 - .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 - .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b - .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 - .quad 0xc08623a07b28ae60, 0xbe1cef359363787c - .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c - .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 - .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 - .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b - .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf - .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 - .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b - .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 - .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 - .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff - .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 - .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e - .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde - .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b - .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c - .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 - .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f - .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 - .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 - .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 - .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b - .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 - .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 - .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 - .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 - .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a - .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d - .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b - .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 - .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 - .quad 0xc086241263e87f50, 0xbe1cf16e74768529 - .quad 0xc0862415f6193658, 0xbe1cefec64b8becb - .quad 0xc086241986b28f30, 0xbe1cf0838d210baa - .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 - .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 - .quad 0xc08624242f008380, 0xbe1ceea988c5a417 - .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 - .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 - .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 - .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 - .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 - .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d - .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e - .quad 0xc086244055d2c968, 0xbe1cef345284c119 - .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 - .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 - .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 - .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f - .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f - .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 - .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 - .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d - .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb - .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 - .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f - .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 - .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 - .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc - .quad 0xc086247419475160, 0xbe1cf03dd9922331 - .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 - .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 - .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 - .quad 0xc0862481af27c528, 0xbe1cee8a6593278a - .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 - .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 - .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 - .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 - .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c - .quad 0xc0862495e5179270, 0xbe1cee757f20c326 - .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 - .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 - .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb - .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e - .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b - .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 - .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 - .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 - .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 - .quad 0xc08624b72472a528, 0xbe1cf031c931c11f - .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 - .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d - .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 - .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 - .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 - .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe - .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f - .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968 - .quad 0xc08624d49c4a4b78, 0xbe1 |
