diff options
| author | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 10:47:14 -0800 |
|---|---|---|
| committer | Sunil K Pandey <skpgkp2@gmail.com> | 2022-03-07 21:44:08 -0800 |
| commit | 1025469bf64c1572524343531dfa553a9d0a9cc2 (patch) | |
| tree | 51ee400792fa1c13ebd53e0b0501a201107959af | |
| parent | b44b6f4288a772816d8d92785e9106086d593172 (diff) | |
| download | glibc-1025469bf64c1572524343531dfa553a9d0a9cc2.tar.xz glibc-1025469bf64c1572524343531dfa553a9d0a9cc2.zip | |
x86_64: Fix svml_d_log1p2_core_sse4.S code formatting
This commit contains following formatting changes
1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
between it and the first operand.
3. Instruction greater than 7 characters in length have a
space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
| -rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S | 2615 |
1 files changed, 1307 insertions, 1308 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S index 04377f5593..eb0d063762 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p2_core_sse4.S @@ -19,7 +19,7 @@ /* * ALGORITHM DESCRIPTION: * - * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2) + * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1, 2) * Get short reciprocal approximation Rcp ~ 1/xh * R = (Rcp*xh - 1.0) + Rcp*xl * log1p(x) = k*log(2.0) - log(Rcp) + poly(R) @@ -30,1369 +30,1368 @@ /* Offsets for data table __svml_dlog1p_data_internal */ -#define Log_HA_table 0 -#define Log_LA_table 8208 -#define poly_coeff 12320 -#define ExpMask 12384 -#define Two10 12400 -#define MinLog1p 12416 -#define MaxLog1p 12432 -#define One 12448 -#define SgnMask 12464 -#define XThreshold 12480 -#define XhMask 12496 -#define Threshold 12512 -#define Bias 12528 -#define Bias1 12544 -#define ExpMask0 12560 -#define ExpMask2 12576 -#define L2 12592 +#define Log_HA_table 0 +#define Log_LA_table 8208 +#define poly_coeff 12320 +#define ExpMask 12384 +#define Two10 12400 +#define MinLog1p 12416 +#define MaxLog1p 12432 +#define One 12448 +#define SgnMask 12464 +#define XThreshold 12480 +#define XhMask 12496 +#define Threshold 12512 +#define Bias 12528 +#define Bias1 12544 +#define ExpMask0 12560 +#define ExpMask2 12576 +#define L2 12592 /* Lookup bias for data table __svml_dlog1p_data_internal. */ -#define Table_Lookup_Bias -0x405ff0 +#define Table_Lookup_Bias -0x405ff0 #include <sysdep.h> - .text - .section .text.sse4,"ax",@progbits + .section .text.sse4, "ax", @progbits ENTRY(_ZGVbN2v_log1p_sse4) - pushq %rbp - cfi_def_cfa_offset(16) - movq %rsp, %rbp - cfi_def_cfa(6, 16) - cfi_offset(6, -16) - andq $-32, %rsp - subq $64, %rsp - movaps %xmm0, %xmm7 + pushq %rbp + cfi_def_cfa_offset(16) + movq %rsp, %rbp + cfi_def_cfa(6, 16) + cfi_offset(6, -16) + andq $-32, %rsp + subq $64, %rsp + movaps %xmm0, %xmm7 -/* SgnMask used by all accuracies */ - movups SgnMask+__svml_dlog1p_data_internal(%rip), %xmm6 - lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %rsi - movaps %xmm6, %xmm8 - movaps %xmm7, %xmm15 - movups One+__svml_dlog1p_data_internal(%rip), %xmm0 - andps %xmm7, %xmm8 - cmpltpd XThreshold+__svml_dlog1p_data_internal(%rip), %xmm8 - cmpnlepd MaxLog1p+__svml_dlog1p_data_internal(%rip), %xmm15 - movaps %xmm0, %xmm4 + /* SgnMask used by all accuracies */ + movups SgnMask+__svml_dlog1p_data_internal(%rip), %xmm6 + lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %rsi + movaps %xmm6, %xmm8 + movaps %xmm7, %xmm15 + movups One+__svml_dlog1p_data_internal(%rip), %xmm0 + andps %xmm7, %xmm8 + cmpltpd XThreshold+__svml_dlog1p_data_internal(%rip), %xmm8 + cmpnlepd MaxLog1p+__svml_dlog1p_data_internal(%rip), %xmm15 + movaps %xmm0, %xmm4 -/* compute 1+x as high, low parts */ - movaps %xmm0, %xmm9 - addpd %xmm7, %xmm4 - maxpd %xmm7, %xmm9 - orps XhMask+__svml_dlog1p_data_internal(%rip), %xmm8 - movaps %xmm0, %xmm5 + /* compute 1+x as high, low parts */ + movaps %xmm0, %xmm9 + addpd %xmm7, %xmm4 + maxpd %xmm7, %xmm9 + orps XhMask+__svml_dlog1p_data_internal(%rip), %xmm8 + movaps %xmm0, %xmm5 -/* preserve mantissa, set input exponent to 2^(-10) */ - movups ExpMask+__svml_dlog1p_data_internal(%rip), %xmm3 - andps %xmm8, %xmm4 - andps %xmm4, %xmm3 + /* preserve mantissa, set input exponent to 2^(-10) */ + movups ExpMask+__svml_dlog1p_data_internal(%rip), %xmm3 + andps %xmm8, %xmm4 + andps %xmm4, %xmm3 -/* check range */ - movaps %xmm7, %xmm8 - orps Two10+__svml_dlog1p_data_internal(%rip), %xmm3 + /* check range */ + movaps %xmm7, %xmm8 + orps Two10+__svml_dlog1p_data_internal(%rip), %xmm3 -/* Compute SignMask for all accuracies, including EP */ - andnps %xmm7, %xmm6 + /* Compute SignMask for all accuracies, including EP */ + andnps %xmm7, %xmm6 -/* reciprocal approximation good to at least 11 bits */ - cvtpd2ps %xmm3, %xmm10 - minpd %xmm7, %xmm5 - subpd %xmm4, %xmm9 - cmpltpd MinLog1p+__svml_dlog1p_data_internal(%rip), %xmm8 - addpd %xmm9, %xmm5 - movlhps %xmm10, %xmm10 - orps %xmm15, %xmm8 - rcpps %xmm10, %xmm11 + /* reciprocal approximation good to at least 11 bits */ + cvtpd2ps %xmm3, %xmm10 + minpd %xmm7, %xmm5 + subpd %xmm4, %xmm9 + cmpltpd MinLog1p+__svml_dlog1p_data_internal(%rip), %xmm8 + addpd %xmm9, %xmm5 + movlhps %xmm10, %xmm10 + orps %xmm15, %xmm8 + rcpps %xmm10, %xmm11 -/* combine and get argument value range mask */ - movmskpd %xmm8, %edx + /* combine and get argument value range mask */ + movmskpd %xmm8, %edx -/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ - movups .FLT_16(%rip), %xmm13 + /* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + movups .FLT_16(%rip), %xmm13 -/* exponent of X needed to scale Xl */ - movdqu ExpMask0+__svml_dlog1p_data_internal(%rip), %xmm12 - cvtps2pd %xmm11, %xmm1 - addpd %xmm13, %xmm1 - subpd %xmm13, %xmm1 + /* exponent of X needed to scale Xl */ + movdqu ExpMask0+__svml_dlog1p_data_internal(%rip), %xmm12 + cvtps2pd %xmm11, %xmm1 + addpd %xmm13, %xmm1 + subpd %xmm13, %xmm1 -/* 2^ (-10-exp(X) ) */ - movdqu ExpMask2+__svml_dlog1p_data_internal(%rip), %xmm2 - pand %xmm4, %xmm12 - psubq %xmm12, %xmm2 - mulpd %xmm1, %xmm3 + /* 2^ (-10-exp(X) ) */ + movdqu ExpMask2+__svml_dlog1p_data_internal(%rip), %xmm2 + pand %xmm4, %xmm12 + psubq %xmm12, %xmm2 + mulpd %xmm1, %xmm3 -/* scale DblRcp */ - mulpd %xmm1, %xmm2 - subpd %xmm0, %xmm3 + /* scale DblRcp */ + mulpd %xmm1, %xmm2 + subpd %xmm0, %xmm3 -/* - * argument reduction - * VQFMS( D, R, X, DblRcp1, One ); - */ - mulpd %xmm2, %xmm5 - addpd %xmm5, %xmm3 + /* + * argument reduction + * VQFMS( D, R, X, DblRcp1, One ); + */ + mulpd %xmm2, %xmm5 + addpd %xmm5, %xmm3 -/* exponent*log(2.0) */ - movups Threshold+__svml_dlog1p_data_internal(%rip), %xmm10 + /* exponent*log(2.0) */ + movups Threshold+__svml_dlog1p_data_internal(%rip), %xmm10 -/* exponent bits */ - psrlq $20, %xmm4 - pshufd $221, %xmm4, %xmm14 + /* exponent bits */ + psrlq $20, %xmm4 + pshufd $221, %xmm4, %xmm14 -/* - * prepare table index - * table lookup - */ - movaps %xmm1, %xmm4 - cmpltpd %xmm1, %xmm10 + /* + * prepare table index + * table lookup + */ + movaps %xmm1, %xmm4 + cmpltpd %xmm1, %xmm10 -/* biased exponent in DP format */ - cvtdq2pd %xmm14, %xmm0 + /* biased exponent in DP format */ + cvtdq2pd %xmm14, %xmm0 -/* polynomial */ - movups poly_coeff+__svml_dlog1p_data_internal(%rip), %xmm1 - movaps %xmm3, %xmm5 - mulpd %xmm3, %xmm1 - mulpd %xmm3, %xmm5 - addpd poly_coeff+16+__svml_dlog1p_data_internal(%rip), %xmm1 - movups poly_coeff+32+__svml_dlog1p_data_internal(%rip), %xmm2 - psrlq $40, %xmm4 - mulpd %xmm3, %xmm2 - mulpd %xmm5, %xmm1 - addpd poly_coeff+48+__svml_dlog1p_data_internal(%rip), %xmm2 - movd %xmm4, %eax - andps Bias+__svml_dlog1p_data_internal(%rip), %xmm10 - addpd %xmm1, %xmm2 + /* polynomial */ + movups poly_coeff+__svml_dlog1p_data_internal(%rip), %xmm1 + movaps %xmm3, %xmm5 + mulpd %xmm3, %xmm1 + mulpd %xmm3, %xmm5 + addpd poly_coeff+16+__svml_dlog1p_data_internal(%rip), %xmm1 + movups poly_coeff+32+__svml_dlog1p_data_internal(%rip), %xmm2 + psrlq $40, %xmm4 + mulpd %xmm3, %xmm2 + mulpd %xmm5, %xmm1 + addpd poly_coeff+48+__svml_dlog1p_data_internal(%rip), %xmm2 + movd %xmm4, %eax + andps Bias+__svml_dlog1p_data_internal(%rip), %xmm10 + addpd %xmm1, %xmm2 -/* reconstruction */ - mulpd %xmm2, %xmm5 - orps Bias1+__svml_dlog1p_data_internal(%rip), %xmm10 - pshufd $2, %xmm4, %xmm9 - subpd %xmm10, %xmm0 - addpd %xmm5, %xmm3 - movd %xmm9, %ecx - mulpd L2+__svml_dlog1p_data_internal(%rip), %xmm0 - movslq %eax, %rax - movslq %ecx, %rcx - movsd (%rsi,%rax), %xmm11 - movhpd (%rsi,%rcx), %xmm11 - addpd %xmm3, %xmm11 - addpd %xmm11, %xmm0 + /* reconstruction */ + mulpd %xmm2, %xmm5 + orps Bias1+__svml_dlog1p_data_internal(%rip), %xmm10 + pshufd $2, %xmm4, %xmm9 + subpd %xmm10, %xmm0 + addpd %xmm5, %xmm3 + movd %xmm9, %ecx + mulpd L2+__svml_dlog1p_data_internal(%rip), %xmm0 + movslq %eax, %rax + movslq %ecx, %rcx + movsd (%rsi, %rax), %xmm11 + movhpd (%rsi, %rcx), %xmm11 + addpd %xmm3, %xmm11 + addpd %xmm11, %xmm0 -/* OR in the Sign of input argument to produce correct log1p(-0) */ - orps %xmm6, %xmm0 - testl %edx, %edx + /* OR in the Sign of input argument to produce correct log1p(-0) */ + orps %xmm6, %xmm0 + testl %edx, %edx -/* Go to special inputs processing branch */ - jne L(SPECIAL_VALUES_BRANCH) - # LOE rbx r12 r13 r14 r15 edx xmm0 xmm7 + /* Go to special inputs processing branch */ + jne L(SPECIAL_VALUES_BRANCH) + # LOE rbx r12 r13 r14 r15 edx xmm0 xmm7 -/* Restore registers - * and exit the function - */ + /* Restore registers + * and exit the function + */ L(EXIT): - movq %rbp, %rsp - popq %rbp - cfi_def_cfa(7, 8) - cfi_restore(6) - ret - cfi_def_cfa(6, 16) - cfi_offset(6, -16) + movq %rbp, %rsp + popq %rbp + cfi_def_cfa(7, 8) + cfi_restore(6) + ret + cfi_def_cfa(6, 16) + cfi_offset(6, -16) -/* Branch to process - * special inputs - */ + /* Branch to process + * special inputs + */ L(SPECIAL_VALUES_BRANCH): - movups %xmm7, 32(%rsp) - movups %xmm0, 48(%rsp) - # LOE rbx r12 r13 r14 r15 edx + movups %xmm7, 32(%rsp) + movups %xmm0, 48(%rsp) + # LOE rbx r12 r13 r14 r15 edx - xorl %eax, %eax - movq %r12, 16(%rsp) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 - movl %eax, %r12d - movq %r13, 8(%rsp) - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 - movl %edx, %r13d - movq %r14, (%rsp) - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r15 r12d r13d + xorl %eax, %eax + movq %r12, 16(%rsp) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 + movl %eax, %r12d + movq %r13, 8(%rsp) + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 + movl %edx, %r13d + movq %r14, (%rsp) + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r15 r12d r13d -/* Range mask - * bits check - */ + /* Range mask + * bits check + */ L(RANGEMASK_CHECK): - btl %r12d, %r13d + btl %r12d, %r13d -/* Call scalar math function */ - jc L(SCALAR_MATH_CALL) - # LOE rbx r15 r12d r13d + /* Call scalar math function */ + jc L(SCALAR_MATH_CALL) + # LOE rbx r15 r12d r13d -/* Special inputs - * processing loop - */ + /* Special inputs + * processing loop + */ L(SPECIAL_VALUES_LOOP): - incl %r12d - cmpl $2, %r12d + incl %r12d + cmpl $2, %r12d -/* Check bits in range mask */ - jl L(RANGEMASK_CHECK) - # LOE rbx r15 r12d r13d + /* Check bits in range mask */ + jl L(RANGEMASK_CHECK) + # LOE rbx r15 r12d r13d - movq 16(%rsp), %r12 - cfi_restore(12) - movq 8(%rsp), %r13 - cfi_restore(13) - movq (%rsp), %r14 - cfi_restore(14) - movups 48(%rsp), %xmm0 + movq 16(%rsp), %r12 + cfi_restore(12) + movq 8(%rsp), %r13 + cfi_restore(13) + movq (%rsp), %r14 + cfi_restore(14) + movups 48(%rsp), %xmm0 -/* Go to exit */ - jmp L(EXIT) - /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ - .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ - .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 - /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ - .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 - # LOE rbx r12 r13 r14 r15 xmm0 + /* Go to exit */ + jmp L(EXIT) + /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -48; DW_OP_plus) */ + .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -56; DW_OP_plus) */ + .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xff, 0xff, 0xff, 0x22 + /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -64; DW_OP_plus) */ + .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x22 + # LOE rbx r12 r13 r14 r15 xmm0 -/* Scalar math fucntion call - * to process special input - */ + /* Scalar math fucntion call + * to process special input + */ L(SCALAR_MATH_CALL): - movl %r12d, %r14d - movsd 32(%rsp,%r14,8), %xmm0 - call log1p@PLT - # LOE rbx r14 r15 r12d r13d xmm0 + movl %r12d, %r14d + movsd 32(%rsp, %r14, 8), %xmm0 + call log1p@PLT + # LOE rbx r14 r15 r12d r13d xmm0 - movsd %xmm0, 48(%rsp,%r14,8) + movsd %xmm0, 48(%rsp, %r14, 8) -/* Process special inputs in loop */ - jmp L(SPECIAL_VALUES_LOOP) - # LOE rbx r15 r12d r13d + /* Process special inputs in loop */ + jmp L(SPECIAL_VALUES_LOOP) + # LOE rbx r15 r12d r13d END(_ZGVbN2v_log1p_sse4) - .section .rodata, "a" - .align 16 + .section .rodata, "a" + .align 16 #ifdef __svml_dlog1p_data_internal_typedef typedef unsigned int VUINT32; typedef struct { - __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2]; - __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2]; - __declspec(align(16)) VUINT32 poly_coeff[4][2][2]; - __declspec(align(16)) VUINT32 ExpMask[2][2]; - __declspec(align(16)) VUINT32 Two10[2][2]; - __declspec(align(16)) VUINT32 MinLog1p[2][2]; - __declspec(align(16)) VUINT32 MaxLog1p[2][2]; - __declspec(align(16)) VUINT32 One[2][2]; - __declspec(align(16)) VUINT32 SgnMask[2][2]; - __declspec(align(16)) VUINT32 XThreshold[2][2]; - __declspec(align(16)) VUINT32 XhMask[2][2]; - __declspec(align(16)) VUINT32 Threshold[2][2]; - __declspec(align(16)) VUINT32 Bias[2][2]; - __declspec(align(16)) VUINT32 Bias1[2][2]; - __declspec(align(16)) VUINT32 ExpMask0[2][2]; - __declspec(align(16)) VUINT32 ExpMask2[2][2]; - __declspec(align(16)) VUINT32 L2[2][2]; + __declspec(align(16)) VUINT32 Log_HA_table[(1<<10)+2][2]; + __declspec(align(16)) VUINT32 Log_LA_table[(1<<9)+1][2]; + __declspec(align(16)) VUINT32 poly_coeff[4][2][2]; + __declspec(align(16)) VUINT32 ExpMask[2][2]; + __declspec(align(16)) VUINT32 Two10[2][2]; + __declspec(align(16)) VUINT32 MinLog1p[2][2]; + __declspec(align(16)) VUINT32 MaxLog1p[2][2]; + __declspec(align(16)) VUINT32 One[2][2]; + __declspec(align(16)) VUINT32 SgnMask[2][2]; + __declspec(align(16)) VUINT32 XThreshold[2][2]; + __declspec(align(16)) VUINT32 XhMask[2][2]; + __declspec(align(16)) VUINT32 Threshold[2][2]; + __declspec(align(16)) VUINT32 Bias[2][2]; + __declspec(align(16)) VUINT32 Bias1[2][2]; + __declspec(align(16)) VUINT32 ExpMask0[2][2]; + __declspec(align(16)) VUINT32 ExpMask2[2][2]; + __declspec(align(16)) VUINT32 L2[2][2]; } __svml_dlog1p_data_internal; #endif __svml_dlog1p_data_internal: - /* Log_HA_table */ - .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100 - .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a - .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff - .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a - .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb - .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e - .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b - .quad 0xc0862347acebaf68, 0xbe1cef3b152048af - .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e - .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4 - .quad 0xc08623537ac30980, 0xbe1cefc4642ee597 - .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16 - .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6 - .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362 - .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557 - .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b - .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed - .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed - .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f - .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce - .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7 - .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1 - .quad 0xc086238206e94218, 0xbe1ceee898588610 - .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea - .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6 - .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6 - .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165 - .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1 - .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b - .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670 - .quad 0xc08623a07b28ae60, 0xbe1cef359363787c - .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c - .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84 - .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7 - .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b - .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf - .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62 - .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b - .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98 - .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87 - .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff - .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798 - .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e - .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde - .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b - .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c - .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98 - .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f - .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358 - .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380 - .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4 - .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b - .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2 - .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4 - .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400 - .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7 - .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a - .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d - .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b - .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575 - .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951 - .quad 0xc086241263e87f50, 0xbe1cf16e74768529 - .quad 0xc0862415f6193658, 0xbe1cefec64b8becb - .quad 0xc086241986b28f30, 0xbe1cf0838d210baa - .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11 - .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805 - .quad 0xc08624242f008380, 0xbe1ceea988c5a417 - .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5 - .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38 - .quad 0xc086242ec92eaee8, 0xbe1cef0946455411 - .quad 0xc08624324ecbaf98, 0xbe1cefea60907739 - .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42 - .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d - .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e - .quad 0xc086244055d2c968, 0xbe1cef345284c119 - .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219 - .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114 - .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189 - .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f - .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f - .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5 - .quad 0xc0862458a789e250, 0xbe1cf0b173796a31 - .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d - .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb - .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7 - .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f - .quad 0xc0862469d9a591c0, 0xbe1cef503d947663 - .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2 - .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc - .quad 0xc086247419475160, 0xbe1cf03dd9922331 - .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129 - .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6 - .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100 - .quad 0xc0862481af27c528, 0xbe1cee8a6593278a - .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7 - .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8 - .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002 - .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4 - .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c - .quad 0xc0862495e5179270, 0xbe1cee757f20c326 - .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4 - .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97 - .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb - .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e - .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b - .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80 - .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71 - .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9 - .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139 - .quad 0xc08624b72472a528, 0xbe1cf031c931c11f - .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7 - .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d - .quad 0xc08624c103245238, 0xbe1cefd492f1ba75 - .quad 0xc08624c44aacab08, 0xbe1cf1253e154466 - .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55 - .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe - .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f - .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d |
