aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2022-03-07 10:47:14 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2022-03-07 21:44:08 -0800
commitf2469622f550e477901df9735e04896db9eb4b8c (patch)
treec8272deae204e20f7e7f89955e9952c5f07d2387
parent1025469bf64c1572524343531dfa553a9d0a9cc2 (diff)
downloadglibc-f2469622f550e477901df9735e04896db9eb4b8c.tar.xz
glibc-f2469622f550e477901df9735e04896db9eb4b8c.zip
x86_64: Fix svml_d_log1p4_core_avx2.S code formatting
This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S2587
1 files changed, 1293 insertions, 1294 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S
index 9ae36d484b..13235793e8 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log1p4_core_avx2.S
@@ -19,7 +19,7 @@
/*
* ALGORITHM DESCRIPTION:
*
- * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1,2)
+ * 1+x = 2^k*(xh + xl) is computed in high-low parts; xh in [1, 2)
* Get short reciprocal approximation Rcp ~ 1/xh
* R = (Rcp*xh - 1.0) + Rcp*xl
* log1p(x) = k*log(2.0) - log(Rcp) + poly(R)
@@ -30,1354 +30,1353 @@
/* Offsets for data table __svml_dlog1p_data_internal
*/
-#define Log_HA_table 0
-#define Log_LA_table 8224
-#define poly_coeff 12352
-#define ExpMask 12480
-#define Two10 12512
-#define MinLog1p 12544
-#define MaxLog1p 12576
-#define One 12608
-#define SgnMask 12640
-#define XThreshold 12672
-#define XhMask 12704
-#define Threshold 12736
-#define Bias 12768
-#define Bias1 12800
-#define ExpMask0 12832
-#define ExpMask2 12864
-#define L2 12896
+#define Log_HA_table 0
+#define Log_LA_table 8224
+#define poly_coeff 12352
+#define ExpMask 12480
+#define Two10 12512
+#define MinLog1p 12544
+#define MaxLog1p 12576
+#define One 12608
+#define SgnMask 12640
+#define XThreshold 12672
+#define XhMask 12704
+#define Threshold 12736
+#define Bias 12768
+#define Bias1 12800
+#define ExpMask0 12832
+#define ExpMask2 12864
+#define L2 12896
/* Lookup bias for data table __svml_dlog1p_data_internal. */
-#define Table_Lookup_Bias -0x405fe0
+#define Table_Lookup_Bias -0x405fe0
#include <sysdep.h>
- .text
- .section .text.avx2,"ax",@progbits
+ .section .text.avx2, "ax", @progbits
ENTRY(_ZGVdN4v_log1p_avx2)
- pushq %rbp
- cfi_def_cfa_offset(16)
- movq %rsp, %rbp
- cfi_def_cfa(6, 16)
- cfi_offset(6, -16)
- andq $-32, %rsp
- subq $96, %rsp
- lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-32, %rsp
+ subq $96, %rsp
+ lea Table_Lookup_Bias+__svml_dlog1p_data_internal(%rip), %r8
-/* SgnMask used by all accuracies */
- vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12
- vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7
+ /* SgnMask used by all accuracies */
+ vmovupd SgnMask+__svml_dlog1p_data_internal(%rip), %ymm12
+ vmovupd One+__svml_dlog1p_data_internal(%rip), %ymm7
-/* 2^ (-10-exp(X) ) */
- vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3
- vmovapd %ymm0, %ymm9
- vandpd %ymm12, %ymm9, %ymm10
- vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11
- vaddpd %ymm7, %ymm9, %ymm13
+ /* 2^ (-10-exp(X) ) */
+ vmovupd ExpMask2+__svml_dlog1p_data_internal(%rip), %ymm3
+ vmovapd %ymm0, %ymm9
+ vandpd %ymm12, %ymm9, %ymm10
+ vcmplt_oqpd XThreshold+__svml_dlog1p_data_internal(%rip), %ymm10, %ymm11
+ vaddpd %ymm7, %ymm9, %ymm13
-/* compute 1+x as high, low parts */
- vmaxpd %ymm9, %ymm7, %ymm15
- vminpd %ymm9, %ymm7, %ymm6
- vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14
- vandpd %ymm14, %ymm13, %ymm4
+ /* compute 1+x as high, low parts */
+ vmaxpd %ymm9, %ymm7, %ymm15
+ vminpd %ymm9, %ymm7, %ymm6
+ vorpd XhMask+__svml_dlog1p_data_internal(%rip), %ymm11, %ymm14
+ vandpd %ymm14, %ymm13, %ymm4
-/* preserve mantissa, set input exponent to 2^(-10) */
- vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5
- vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5
+ /* preserve mantissa, set input exponent to 2^(-10) */
+ vandpd ExpMask+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm5
+ vorpd Two10+__svml_dlog1p_data_internal(%rip), %ymm5, %ymm5
-/* reciprocal approximation good to at least 11 bits */
- vcvtpd2ps %ymm5, %xmm2
- vsubpd %ymm4, %ymm15, %ymm0
+ /* reciprocal approximation good to at least 11 bits */
+ vcvtpd2ps %ymm5, %xmm2
+ vsubpd %ymm4, %ymm15, %ymm0
-/* check range */
- vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15
- vrcpps %xmm2, %xmm1
- vaddpd %ymm0, %ymm6, %ymm6
- vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0
- vcvtps2pd %xmm1, %ymm11
+ /* check range */
+ vcmplt_oqpd MinLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm15
+ vrcpps %xmm2, %xmm1
+ vaddpd %ymm0, %ymm6, %ymm6
+ vcmpnle_uqpd MaxLog1p+__svml_dlog1p_data_internal(%rip), %ymm9, %ymm0
+ vcvtps2pd %xmm1, %ymm11
-/* exponent of X needed to scale Xl */
- vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10
- vpsubq %ymm10, %ymm3, %ymm13
+ /* exponent of X needed to scale Xl */
+ vandps ExpMask0+__svml_dlog1p_data_internal(%rip), %ymm4, %ymm10
+ vpsubq %ymm10, %ymm3, %ymm13
-/* exponent bits */
- vpsrlq $20, %ymm4, %ymm4
+ /* exponent bits */
+ vpsrlq $20, %ymm4, %ymm4
-/* round reciprocal to nearest integer, will have 1+9 mantissa bits */
- vroundpd $0, %ymm11, %ymm3
+ /* round reciprocal to nearest integer, will have 1+9 mantissa bits */
+ vroundpd $0, %ymm11, %ymm3
-/* scale DblRcp */
- vmulpd %ymm13, %ymm3, %ymm2
+ /* scale DblRcp */
+ vmulpd %ymm13, %ymm3, %ymm2
-/* exponent*log(2.0) */
- vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13
- vfmsub213pd %ymm7, %ymm3, %ymm5
+ /* exponent*log(2.0) */
+ vmovupd Threshold+__svml_dlog1p_data_internal(%rip), %ymm13
+ vfmsub213pd %ymm7, %ymm3, %ymm5
-/* Compute SignMask for all accuracies, including EP */
- vandnpd %ymm9, %ymm12, %ymm8
- vorpd %ymm0, %ymm15, %ymm7
+ /* Compute SignMask for all accuracies, including EP */
+ vandnpd %ymm9, %ymm12, %ymm8
+ vorpd %ymm0, %ymm15, %ymm7
-/*
- * prepare table index
- * table lookup
- */
- vpsrlq $40, %ymm3, %ymm0
+ /*
+ * prepare table index
+ * table lookup
+ */
+ vpsrlq $40, %ymm3, %ymm0
-/*
- * argument reduction
- * VQFMS( D, R, X, DblRcp1, One );
- */
- vfmadd213pd %ymm5, %ymm2, %ymm6
- vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2
- vcmplt_oqpd %ymm3, %ymm13, %ymm3
- vmulpd %ymm6, %ymm6, %ymm5
- vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2
+ /*
+ * argument reduction
+ * VQFMS( D, R, X, DblRcp1, One );
+ */
+ vfmadd213pd %ymm5, %ymm2, %ymm6
+ vmovupd poly_coeff+64+__svml_dlog1p_data_internal(%rip), %ymm2
+ vcmplt_oqpd %ymm3, %ymm13, %ymm3
+ vmulpd %ymm6, %ymm6, %ymm5
+ vfmadd213pd poly_coeff+96+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm2
-/* combine and get argument value range mask */
- vmovmskpd %ymm7, %eax
- vextractf128 $1, %ymm4, %xmm12
- vshufps $221, %xmm12, %xmm4, %xmm14
+ /* combine and get argument value range mask */
+ vmovmskpd %ymm7, %eax
+ vextractf128 $1, %ymm4, %xmm12
+ vshufps $221, %xmm12, %xmm4, %xmm14
-/* biased exponent in DP format */
- vcvtdq2pd %xmm14, %ymm1
- vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14
- vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15
- vsubpd %ymm15, %ymm1, %ymm1
- vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3
+ /* biased exponent in DP format */
+ vcvtdq2pd %xmm14, %ymm1
+ vandpd Bias+__svml_dlog1p_data_internal(%rip), %ymm3, %ymm14
+ vorpd Bias1+__svml_dlog1p_data_internal(%rip), %ymm14, %ymm15
+ vsubpd %ymm15, %ymm1, %ymm1
+ vmulpd L2+__svml_dlog1p_data_internal(%rip), %ymm1, %ymm3
-/* polynomial */
- vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1
- vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1
- vfmadd213pd %ymm2, %ymm5, %ymm1
+ /* polynomial */
+ vmovupd poly_coeff+__svml_dlog1p_data_internal(%rip), %ymm1
+ vfmadd213pd poly_coeff+32+__svml_dlog1p_data_internal(%rip), %ymm6, %ymm1
+ vfmadd213pd %ymm2, %ymm5, %ymm1
-/* reconstruction */
- vfmadd213pd %ymm6, %ymm5, %ymm1
- vextractf128 $1, %ymm0, %xmm10
- vmovd %xmm0, %edx
- vmovd %xmm10, %esi
- movslq %edx, %rdx
- vpextrd $2, %xmm0, %ecx
- movslq %esi, %rsi
- vpextrd $2, %xmm10, %edi
- movslq %ecx, %rcx
- movslq %edi, %rdi
- vmovsd (%r8,%rdx), %xmm4
- vmovsd (%r8,%rsi), %xmm11
- vmovhpd (%r8,%rcx), %xmm4, %xmm7
- vmovhpd (%r8,%rdi), %xmm11, %xmm12
- vinsertf128 $1, %xmm12, %ymm7, %ymm0
- vaddpd %ymm1, %ymm0, %ymm6
- vaddpd %ymm6, %ymm3, %ymm0
+ /* reconstruction */
+ vfmadd213pd %ymm6, %ymm5, %ymm1
+ vextractf128 $1, %ymm0, %xmm10
+ vmovd %xmm0, %edx
+ vmovd %xmm10, %esi
+ movslq %edx, %rdx
+ vpextrd $2, %xmm0, %ecx
+ movslq %esi, %rsi
+ vpextrd $2, %xmm10, %edi
+ movslq %ecx, %rcx
+ movslq %edi, %rdi
+ vmovsd (%r8, %rdx), %xmm4
+ vmovsd (%r8, %rsi), %xmm11
+ vmovhpd (%r8, %rcx), %xmm4, %xmm7
+ vmovhpd (%r8, %rdi), %xmm11, %xmm12
+ vinsertf128 $1, %xmm12, %ymm7, %ymm0
+ vaddpd %ymm1, %ymm0, %ymm6
+ vaddpd %ymm6, %ymm3, %ymm0
-/* OR in the Sign of input argument to produce correct log1p(-0) */
- vorpd %ymm8, %ymm0, %ymm0
- testl %eax, %eax
+ /* OR in the Sign of input argument to produce correct log1p(-0) */
+ vorpd %ymm8, %ymm0, %ymm0
+ testl %eax, %eax
-/* Go to special inputs processing branch */
- jne L(SPECIAL_VALUES_BRANCH)
- # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9
+ /* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 eax ymm0 ymm9
-/* Restore registers
- * and exit the function
- */
+ /* Restore registers
+ * and exit the function
+ */
L(EXIT):
- movq %rbp, %rsp
- popq %rbp
- cfi_def_cfa(7, 8)
- cfi_restore(6)
- ret
- cfi_def_cfa(6, 16)
- cfi_offset(6, -16)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
-/* Branch to process
- * special inputs
- */
+ /* Branch to process
+ * special inputs
+ */
L(SPECIAL_VALUES_BRANCH):
- vmovupd %ymm9, 32(%rsp)
- vmovupd %ymm0, 64(%rsp)
- # LOE rbx r12 r13 r14 r15 eax ymm0
+ vmovupd %ymm9, 32(%rsp)
+ vmovupd %ymm0, 64(%rsp)
+ # LOE rbx r12 r13 r14 r15 eax ymm0
- xorl %edx, %edx
- # LOE rbx r12 r13 r14 r15 eax edx
+ xorl %edx, %edx
+ # LOE rbx r12 r13 r14 r15 eax edx
- vzeroupper
- movq %r12, 16(%rsp)
- /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
- .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
- movl %edx, %r12d
- movq %r13, 8(%rsp)
- /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
- .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
- movl %eax, %r13d
- movq %r14, (%rsp)
- /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
- .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
- # LOE rbx r15 r12d r13d
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
-/* Range mask
- * bits check
- */
+ /* Range mask
+ * bits check
+ */
L(RANGEMASK_CHECK):
- btl %r12d, %r13d
+ btl %r12d, %r13d
-/* Call scalar math function */
- jc L(SCALAR_MATH_CALL)
- # LOE rbx r15 r12d r13d
+ /* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
-/* Special inputs
- * processing loop
- */
+ /* Special inputs
+ * processing loop
+ */
L(SPECIAL_VALUES_LOOP):
- incl %r12d
- cmpl $4, %r12d
+ incl %r12d
+ cmpl $4, %r12d
-/* Check bits in range mask */
- jl L(RANGEMASK_CHECK)
- # LOE rbx r15 r12d r13d
+ /* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
- movq 16(%rsp), %r12
- cfi_restore(12)
- movq 8(%rsp), %r13
- cfi_restore(13)
- movq (%rsp), %r14
- cfi_restore(14)
- vmovupd 64(%rsp), %ymm0
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovupd 64(%rsp), %ymm0
-/* Go to exit */
- jmp L(EXIT)
- /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
- .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
- /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
- .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
- /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
- .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
- # LOE rbx r12 r13 r14 r15 ymm0
+ /* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 ymm0
-/* Scalar math fucntion call
- * to process special input
- */
+ /* Scalar math fucntion call
+ * to process special input
+ */
L(SCALAR_MATH_CALL):
- movl %r12d, %r14d
- movsd 32(%rsp,%r14,8), %xmm0
- call log1p@PLT
- # LOE rbx r14 r15 r12d r13d xmm0
+ movl %r12d, %r14d
+ movsd 32(%rsp, %r14, 8), %xmm0
+ call log1p@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
- movsd %xmm0, 64(%rsp,%r14,8)
+ movsd %xmm0, 64(%rsp, %r14, 8)
-/* Process special inputs in loop */
- jmp L(SPECIAL_VALUES_LOOP)
- # LOE rbx r15 r12d r13d
+ /* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
END(_ZGVdN4v_log1p_avx2)
- .section .rodata, "a"
- .align 32
+ .section .rodata, "a"
+ .align 32
#ifdef __svml_dlog1p_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
- __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2];
- __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2];
- __declspec(align(32)) VUINT32 poly_coeff[4][4][2];
- __declspec(align(32)) VUINT32 ExpMask[4][2];
- __declspec(align(32)) VUINT32 Two10[4][2];
- __declspec(align(32)) VUINT32 MinLog1p[4][2];
- __declspec(align(32)) VUINT32 MaxLog1p[4][2];
- __declspec(align(32)) VUINT32 One[4][2];
- __declspec(align(32)) VUINT32 SgnMask[4][2];
- __declspec(align(32)) VUINT32 XThreshold[4][2];
- __declspec(align(32)) VUINT32 XhMask[4][2];
- __declspec(align(32)) VUINT32 Threshold[4][2];
- __declspec(align(32)) VUINT32 Bias[4][2];
- __declspec(align(32)) VUINT32 Bias1[4][2];
- __declspec(align(32)) VUINT32 ExpMask0[4][2];
- __declspec(align(32)) VUINT32 ExpMask2[4][2];
- __declspec(align(32)) VUINT32 L2[4][2];
+ __declspec(align(32)) VUINT32 Log_HA_table[(1<<10)+2][2];
+ __declspec(align(32)) VUINT32 Log_LA_table[(1<<9)+1][2];
+ __declspec(align(32)) VUINT32 poly_coeff[4][4][2];
+ __declspec(align(32)) VUINT32 ExpMask[4][2];
+ __declspec(align(32)) VUINT32 Two10[4][2];
+ __declspec(align(32)) VUINT32 MinLog1p[4][2];
+ __declspec(align(32)) VUINT32 MaxLog1p[4][2];
+ __declspec(align(32)) VUINT32 One[4][2];
+ __declspec(align(32)) VUINT32 SgnMask[4][2];
+ __declspec(align(32)) VUINT32 XThreshold[4][2];
+ __declspec(align(32)) VUINT32 XhMask[4][2];
+ __declspec(align(32)) VUINT32 Threshold[4][2];
+ __declspec(align(32)) VUINT32 Bias[4][2];
+ __declspec(align(32)) VUINT32 Bias1[4][2];
+ __declspec(align(32)) VUINT32 ExpMask0[4][2];
+ __declspec(align(32)) VUINT32 ExpMask2[4][2];
+ __declspec(align(32)) VUINT32 L2[4][2];
} __svml_dlog1p_data_internal;
#endif
__svml_dlog1p_data_internal:
- /* Log_HA_table */
- .quad 0xc086232bdd7a8300, 0xbe1ce91eef3fb100
- .quad 0xc086232fdc7ad828, 0xbe1cefcffda73b6a
- .quad 0xc0862333d97d2ba0, 0xbe1cef406748f1ff
- .quad 0xc0862337d48378e0, 0xbe1cef2a9429925a
- .quad 0xc086233bcd8fb878, 0xbe1cf138d17ebecb
- .quad 0xc086233fc4a3e018, 0xbe1ceff2dbbbb29e
- .quad 0xc0862343b9c1e270, 0xbe1cf1a42aae437b
- .quad 0xc0862347acebaf68, 0xbe1cef3b152048af
- .quad 0xc086234b9e2333f0, 0xbe1cef20e127805e
- .quad 0xc086234f8d6a5a30, 0xbe1cf00ad6052cf4
- .quad 0xc08623537ac30980, 0xbe1cefc4642ee597
- .quad 0xc0862357662f2660, 0xbe1cf1f277d36e16
- .quad 0xc086235b4fb092a0, 0xbe1ceed009e8d8e6
- .quad 0xc086235f37492d28, 0xbe1cf1e4038cb362
- .quad 0xc08623631cfad250, 0xbe1cf0b0873b8557
- .quad 0xc086236700c75b98, 0xbe1cf15bb3227c0b
- .quad 0xc086236ae2b09fe0, 0xbe1cf151ef8ca9ed
- .quad 0xc086236ec2b87358, 0xbe1cefe1dc2cd2ed
- .quad 0xc0862372a0e0a780, 0xbe1cf0d1eec5454f
- .quad 0xc08623767d2b0b48, 0xbe1ceeefd570bbce
- .quad 0xc086237a57996af0, 0xbe1cee99ae91b3a7
- .quad 0xc086237e302d9028, 0xbe1cf0412830fbd1
- .quad 0xc086238206e94218, 0xbe1ceee898588610
- .quad 0xc0862385dbce4548, 0xbe1cee9a1fbcaaea
- .quad 0xc0862389aede5bc0, 0xbe1ceed8e7cc1ad6
- .quad 0xc086238d801b4500, 0xbe1cf10c8d059da6
- .quad 0xc08623914f86be18, 0xbe1ceee6c63a8165
- .quad 0xc08623951d228180, 0xbe1cf0c3592d2ff1
- .quad 0xc0862398e8f04758, 0xbe1cf0026cc4cb1b
- .quad 0xc086239cb2f1c538, 0xbe1cf15d48d8e670
- .quad 0xc08623a07b28ae60, 0xbe1cef359363787c
- .quad 0xc08623a44196b390, 0xbe1cefdf1ab2e82c
- .quad 0xc08623a8063d8338, 0xbe1cefe43c02aa84
- .quad 0xc08623abc91ec960, 0xbe1cf044f5ae35b7
- .quad 0xc08623af8a3c2fb8, 0xbe1cf0b0b4001e1b
- .quad 0xc08623b349975d98, 0xbe1cf1bae76dfbcf
- .quad 0xc08623b70731f810, 0xbe1cef0a72e13a62
- .quad 0xc08623bac30da1c8, 0xbe1cf184007d2b6b
- .quad 0xc08623be7d2bfb40, 0xbe1cf16f4b239e98
- .quad 0xc08623c2358ea2a0, 0xbe1cf0976acada87
- .quad 0xc08623c5ec3733d0, 0xbe1cf066318a16ff
- .quad 0xc08623c9a1274880, 0xbe1ceffaa7148798
- .quad 0xc08623cd54607820, 0xbe1cf23ab02e9b6e
- .quad 0xc08623d105e45800, 0xbe1cefdfef7d4fde
- .quad 0xc08623d4b5b47b20, 0xbe1cf17fece44f2b
- .quad 0xc08623d863d27270, 0xbe1cf18f907d0d7c
- .quad 0xc08623dc103fccb0, 0xbe1cee61fe072c98
- .quad 0xc08623dfbafe1668, 0xbe1cf022dd891e2f
- .quad 0xc08623e3640eda20, 0xbe1ceecc1daf4358
- .quad 0xc08623e70b73a028, 0xbe1cf0173c4fa380
- .quad 0xc08623eab12deec8, 0xbe1cf16a2150c2f4
- .quad 0xc08623ee553f4a30, 0xbe1cf1bf980b1f4b
- .quad 0xc08623f1f7a93480, 0xbe1cef8b731663c2
- .quad 0xc08623f5986d2dc0, 0xbe1cee9a664d7ef4
- .quad 0xc08623f9378cb3f0, 0xbe1cf1eda2af6400
- .quad 0xc08623fcd5094320, 0xbe1cf1923f9d68d7
- .quad 0xc086240070e45548, 0xbe1cf0747cd3e03a
- .quad 0xc08624040b1f6260, 0xbe1cf22ee855bd6d
- .quad 0xc0862407a3bbe078, 0xbe1cf0d57360c00b
- .quad 0xc086240b3abb4398, 0xbe1ceebc815cd575
- .quad 0xc086240ed01efdd0, 0xbe1cf03bfb970951
- .quad 0xc086241263e87f50, 0xbe1cf16e74768529
- .quad 0xc0862415f6193658, 0xbe1cefec64b8becb
- .quad 0xc086241986b28f30, 0xbe1cf0838d210baa
- .quad 0xc086241d15b5f448, 0xbe1cf0ea86e75b11
- .quad 0xc0862420a324ce28, 0xbe1cf1708d11d805
- .quad 0xc08624242f008380, 0xbe1ceea988c5a417
- .quad 0xc0862427b94a7910, 0xbe1cef166a7bbca5
- .quad 0xc086242b420411d0, 0xbe1cf0c9d9e86a38
- .quad 0xc086242ec92eaee8, 0xbe1cef0946455411
- .quad 0xc08624324ecbaf98, 0xbe1cefea60907739
- .quad 0xc0862435d2dc7160, 0xbe1cf1ed0934ce42
- .quad 0xc086243955624ff8, 0xbe1cf191ba746c7d
- .quad 0xc086243cd65ea548, 0xbe1ceeec78cf2a7e
- .quad 0xc086244055d2c968, 0xbe1cef345284c119
- .quad 0xc0862443d3c012b8, 0xbe1cf24f77355219
- .quad 0xc08624475027d5e8, 0xbe1cf05bf087e114
- .quad 0xc086244acb0b65d0, 0xbe1cef3504a32189
- .quad 0xc086244e446c1398, 0xbe1ceff54b2a406f
- .quad 0xc0862451bc4b2eb8, 0xbe1cf0757d54ed4f
- .quad 0xc086245532aa04f0, 0xbe1cf0c8099fdfd5
- .quad 0xc0862458a789e250, 0xbe1cf0b173796a31
- .quad 0xc086245c1aec1138, 0xbe1cf11d8734540d
- .quad 0xc086245f8cd1da60, 0xbe1cf1916a723ceb
- .quad 0xc0862462fd3c84d8, 0xbe1cf19a911e1da7
- .quad 0xc08624666c2d5608, 0xbe1cf23a9ef72e4f
- .quad 0xc0862469d9a591c0, 0xbe1cef503d947663
- .quad 0xc086246d45a67a18, 0xbe1cf0fceeb1a0b2
- .quad 0xc0862470b0314fa8, 0xbe1cf107e27e4fbc
- .quad 0xc086247419475160, 0xbe1cf03dd9922331
- .quad 0xc086247780e9bc98, 0xbe1cefce1a10e129
- .quad 0xc086247ae719cd18, 0xbe1ceea47f73c4f6
- .quad 0xc086247e4bd8bd10, 0xbe1ceec0ac56d100
- .quad 0xc0862481af27c528, 0xbe1cee8a6593278a
- .quad 0xc086248511081c70, 0xbe1cf2231dd9dec7
- .quad 0xc0862488717af888, 0xbe1cf0b4b8ed7da8
- .quad 0xc086248bd0818d68, 0xbe1cf1bd8d835002
- .quad 0xc086248f2e1d0d98, 0xbe1cf259acc107f4
- .quad 0xc08624928a4eaa20, 0xbe1cee897636b00c
- .quad 0xc0862495e5179270, 0xbe1cee757f20c326
- .quad 0xc08624993e78f490, 0xbe1cefafd3aa54a4
- .quad 0xc086249c9673fd10, 0xbe1cee7298d38b97
- .quad 0xc086249fed09d6f8, 0xbe1ceedc158d4ceb
- .quad 0xc08624a3423babe0, 0xbe1cf2282987cb2e
- .quad 0xc08624a6960aa400, 0xbe1cefe7381ecc4b
- .quad 0xc08624a9e877e600, 0xbe1cef328dbbce80
- .quad 0xc08624ad39849728, 0xbe1cefde45f3cc71
- .quad 0xc08624b08931db58, 0xbe1cefa8b89433b9
- .quad 0xc08624b3d780d500, 0xbe1cef6773c0b139
- .quad 0xc08624b72472a528, 0xbe1cf031c931c11f
- .quad 0xc08624ba70086b78, 0xbe1cf088f49275e7
- .quad 0xc08624bdba434630, 0xbe1cf17de0eaa86d
- .quad 0xc08624c103245238, 0xbe1cefd492f1ba75
- .quad 0xc08624c44aacab08, 0xbe1cf1253e154466
- .quad 0xc08624c790dd6ad0, 0xbe1cf0fb09ee6d55
- .quad 0xc08624cad5b7aa58, 0xbe1cf1f08dd048fe
- .quad 0xc08624ce193c8120, 0xbe1ceeca0809697f
- .quad 0xc08624d15b6d0538, 0xbe1cef8d5662d968
- .quad 0xc08624d49c4a4b78, 0xbe1