aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2022-03-07 10:47:15 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2022-03-07 21:44:09 -0800
commit28ba5ee77fa61eb951736ec751e1921d4580aa2a (patch)
tree832acfa0bbce4a73547c44bab12d690def40d86e
parent06c7208f27b8c44f115ae357a97a7bbc7085497a (diff)
downloadglibc-28ba5ee77fa61eb951736ec751e1921d4580aa2a.tar.xz
glibc-28ba5ee77fa61eb951736ec751e1921d4580aa2a.zip
x86_64: Fix svml_d_tanh4_core_avx2.S code formatting
This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S2330
1 files changed, 1164 insertions, 1166 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S
index d2a971ead5..7ddf145b25 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S
@@ -28,7 +28,7 @@
* and to approximate tanh(.) with a polynomial on each of them.
*
* IEEE SPECIAL CONDITIONS:
- * x = [+,-]0, r = [+,-]0
+ * x = [+, -]0, r = [+, -]0
* x = +Inf, r = +1
* x = -Inf, r = -1
* x = QNaN, r = QNaN
@@ -72,1208 +72,1206 @@
/* Offsets for data table __svml_dtanh_data_internal
*/
-#define _dbP 0
-#define _dbSignMask 7680
-#define _dbAbsMask 7712
-#define _iExpMantMask 7744
-#define _iExpMask 7776
-#define _iMinIdxOfsMask 7808
-#define _iMaxIdxMask 7840
+#define _dbP 0
+#define _dbSignMask 7680
+#define _dbAbsMask 7712
+#define _iExpMantMask 7744
+#define _iExpMask 7776
+#define _iMinIdxOfsMask 7808
+#define _iMaxIdxMask 7840
#include <sysdep.h>
- .text
- .section .text.avx2,"ax",@progbits
+ .section .text.avx2, "ax", @progbits
ENTRY(_ZGVdN4v_tanh_avx2)
- pushq %rbp
- cfi_def_cfa_offset(16)
- movq %rsp, %rbp
- cfi_def_cfa(6, 16)
- cfi_offset(6, -16)
- andq $-32, %rsp
- subq $96, %rsp
- lea _dbP+96+__svml_dtanh_data_internal(%rip), %r8
- vmovupd %ymm0, (%rsp)
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-32, %rsp
+ subq $96, %rsp
+ lea _dbP+96+__svml_dtanh_data_internal(%rip), %r8
+ vmovupd %ymm0, (%rsp)
-/* if VMIN, VMAX is defined for I type */
- vpxor %xmm11, %xmm11, %xmm11
+ /* if VMIN, VMAX is defined for I type */
+ vpxor %xmm11, %xmm11, %xmm11
-/* Constant loading */
- vmovups _iMaxIdxMask+__svml_dtanh_data_internal(%rip), %xmm8
- vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm1
- vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm2
- vextractf128 $1, %ymm0, %xmm15
- vshufps $221, %xmm15, %xmm0, %xmm14
+ /* Constant loading */
+ vmovups _iMaxIdxMask+__svml_dtanh_data_internal(%rip), %xmm8
+ vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm1
+ vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %ymm0, %ymm2
+ vextractf128 $1, %ymm0, %xmm15
+ vshufps $221, %xmm15, %xmm0, %xmm14
-/* Here huge arguments, INF and NaNs are filtered out to callout. */
- vpand _iExpMantMask+__svml_dtanh_data_internal(%rip), %xmm14, %xmm12
- vpsubd _iMinIdxOfsMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm9
- vpcmpgtd %xmm11, %xmm9, %xmm10
- vpcmpgtd %xmm8, %xmm9, %xmm0
- vpand %xmm10, %xmm9, %xmm7
- blendvps %xmm0, %xmm8, %xmm7
+ /* Here huge arguments, INF and NaNs are filtered out to callout. */
+ vpand _iExpMantMask+__svml_dtanh_data_internal(%rip), %xmm14, %xmm12
+ vpsubd _iMinIdxOfsMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm9
+ vpcmpgtd %xmm11, %xmm9, %xmm10
+ vpcmpgtd %xmm8, %xmm9, %xmm0
+ vpand %xmm10, %xmm9, %xmm7
+ blendvps %xmm0, %xmm8, %xmm7
-/*
- * VSHRIMM( I, iIndex, = iIndex, (17 - 4) );
- * VGATHER_MATRIX( L2D, p, TAB._dbP, iIndex, 0, T_ITEM_SIZE, T_ITEM_GRAN, 13, 0, 0 );
- */
- vpsrld $10, %xmm7, %xmm6
- vmovd %xmm6, %edx
- vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm13
- vmovmskps %xmm13, %eax
- vpextrd $1, %xmm6, %ecx
- movslq %edx, %rdx
- movslq %ecx, %rcx
- vpextrd $2, %xmm6, %esi
- vpextrd $3, %xmm6, %edi
- movslq %esi, %rsi
- movslq %edi, %rdi
- vmovupd -96(%rdx,%r8), %xmm3
- vmovupd -96(%rcx,%r8), %xmm4
- vmovupd -80(%rcx,%r8), %xmm13
- vmovupd -64(%rcx,%r8), %xmm9
- vmovupd -80(%rdx,%r8), %xmm14
- vmovupd -64(%rdx,%r8), %xmm10
- vmovupd -48(%rdx,%r8), %xmm6
- vinsertf128 $1, -96(%rsi,%r8), %ymm3, %ymm0
- vinsertf128 $1, -96(%rdi,%r8), %ymm4, %ymm15
- vmovupd -48(%rcx,%r8), %xmm3
- vunpckhpd %ymm15, %ymm0, %ymm0
- vinsertf128 $1, -80(%rsi,%r8), %ymm14, %ymm12
- vinsertf128 $1, -64(%rsi,%r8), %ymm10, %ymm8
- vinsertf128 $1, -80(%rdi,%r8), %ymm13, %ymm11
- vinsertf128 $1, -64(%rdi,%r8), %ymm9, %ymm7
- vunpcklpd %ymm11, %ymm12, %ymm15
- vunpckhpd %ymm11, %ymm12, %ymm14
- vunpcklpd %ymm7, %ymm8, %ymm13
- vunpckhpd %ymm7, %ymm8, %ymm12
- vmovupd -32(%rdx,%r8), %xmm9
- vmovupd -32(%rcx,%r8), %xmm8
- vinsertf128 $1, -48(%rsi,%r8), %ymm6, %ymm4
- vinsertf128 $1, -48(%rdi,%r8), %ymm3, %ymm5
- vunpcklpd %ymm5, %ymm4, %ymm11
- vunpckhpd %ymm5, %ymm4, %ymm10
- vmovupd -16(%rdx,%r8), %xmm3
- vmovupd -16(%rcx,%r8), %xmm4
- vinsertf128 $1, -32(%rsi,%r8), %ymm9, %ymm7
- vinsertf128 $1, -32(%rdi,%r8), %ymm8, %ymm6
- vunpcklpd %ymm6, %ymm7, %ymm9
- vunpckhpd %ymm6, %ymm7, %ymm8
- vinsertf128 $1, -16(%rsi,%r8), %ymm3, %ymm5
- vinsertf128 $1, -16(%rdi,%r8), %ymm4, %ymm6
- vunpcklpd %ymm6, %ymm5, %ymm7
- vunpckhpd %ymm6, %ymm5, %ymm6
- vmovupd (%rdx,%r8), %xmm3
- vmovupd (%rcx,%r8), %xmm5
- vinsertf128 $1, (%rsi,%r8), %ymm3, %ymm4
- vinsertf128 $1, (%rdi,%r8), %ymm5, %ymm5
- vunpcklpd %ymm5, %ymm4, %ymm3
- vaddpd %ymm3, %ymm1, %ymm1
- vfmadd213pd %ymm7, %ymm1, %ymm6
- vfmadd213pd %ymm8, %ymm1, %ymm6
- vfmadd213pd %ymm9, %ymm1, %ymm6
- vfmadd213pd %ymm10, %ymm1, %ymm6
- vfmadd213pd %ymm11, %ymm1, %ymm6
- vfmadd213pd %ymm12, %ymm1, %ymm6
- vfmadd213pd %ymm13, %ymm1, %ymm6
- vfmadd213pd %ymm14, %ymm1, %ymm6
- vfmadd213pd %ymm15, %ymm1, %ymm6
- vfmadd213pd %ymm0, %ymm1, %ymm6
- vorpd %ymm2, %ymm6, %ymm0
- testl %eax, %eax
+ /*
+ * VSHRIMM( I, iIndex, = iIndex, (17 - 4) );
+ * VGATHER_MATRIX( L2D, p, TAB._dbP, iIndex, 0, T_ITEM_SIZE, T_ITEM_GRAN, 13, 0, 0 );
+ */
+ vpsrld $10, %xmm7, %xmm6
+ vmovd %xmm6, %edx
+ vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %xmm12, %xmm13
+ vmovmskps %xmm13, %eax
+ vpextrd $1, %xmm6, %ecx
+ movslq %edx, %rdx
+ movslq %ecx, %rcx
+ vpextrd $2, %xmm6, %esi
+ vpextrd $3, %xmm6, %edi
+ movslq %esi, %rsi
+ movslq %edi, %rdi
+ vmovupd -96(%rdx, %r8), %xmm3
+ vmovupd -96(%rcx, %r8), %xmm4
+ vmovupd -80(%rcx, %r8), %xmm13
+ vmovupd -64(%rcx, %r8), %xmm9
+ vmovupd -80(%rdx, %r8), %xmm14
+ vmovupd -64(%rdx, %r8), %xmm10
+ vmovupd -48(%rdx, %r8), %xmm6
+ vinsertf128 $1, -96(%rsi, %r8), %ymm3, %ymm0
+ vinsertf128 $1, -96(%rdi, %r8), %ymm4, %ymm15
+ vmovupd -48(%rcx, %r8), %xmm3
+ vunpckhpd %ymm15, %ymm0, %ymm0
+ vinsertf128 $1, -80(%rsi, %r8), %ymm14, %ymm12
+ vinsertf128 $1, -64(%rsi, %r8), %ymm10, %ymm8
+ vinsertf128 $1, -80(%rdi, %r8), %ymm13, %ymm11
+ vinsertf128 $1, -64(%rdi, %r8), %ymm9, %ymm7
+ vunpcklpd %ymm11, %ymm12, %ymm15
+ vunpckhpd %ymm11, %ymm12, %ymm14
+ vunpcklpd %ymm7, %ymm8, %ymm13
+ vunpckhpd %ymm7, %ymm8, %ymm12
+ vmovupd -32(%rdx, %r8), %xmm9
+ vmovupd -32(%rcx, %r8), %xmm8
+ vinsertf128 $1, -48(%rsi, %r8), %ymm6, %ymm4
+ vinsertf128 $1, -48(%rdi, %r8), %ymm3, %ymm5
+ vunpcklpd %ymm5, %ymm4, %ymm11
+ vunpckhpd %ymm5, %ymm4, %ymm10
+ vmovupd -16(%rdx, %r8), %xmm3
+ vmovupd -16(%rcx, %r8), %xmm4
+ vinsertf128 $1, -32(%rsi, %r8), %ymm9, %ymm7
+ vinsertf128 $1, -32(%rdi, %r8), %ymm8, %ymm6
+ vunpcklpd %ymm6, %ymm7, %ymm9
+ vunpckhpd %ymm6, %ymm7, %ymm8
+ vinsertf128 $1, -16(%rsi, %r8), %ymm3, %ymm5
+ vinsertf128 $1, -16(%rdi, %r8), %ymm4, %ymm6
+ vunpcklpd %ymm6, %ymm5, %ymm7
+ vunpckhpd %ymm6, %ymm5, %ymm6
+ vmovupd (%rdx, %r8), %xmm3
+ vmovupd (%rcx, %r8), %xmm5
+ vinsertf128 $1, (%rsi, %r8), %ymm3, %ymm4
+ vinsertf128 $1, (%rdi, %r8), %ymm5, %ymm5
+ vunpcklpd %ymm5, %ymm4, %ymm3
+ vaddpd %ymm3, %ymm1, %ymm1
+ vfmadd213pd %ymm7, %ymm1, %ymm6
+ vfmadd213pd %ymm8, %ymm1, %ymm6
+ vfmadd213pd %ymm9, %ymm1, %ymm6
+ vfmadd213pd %ymm10, %ymm1, %ymm6
+ vfmadd213pd %ymm11, %ymm1, %ymm6
+ vfmadd213pd %ymm12, %ymm1, %ymm6
+ vfmadd213pd %ymm13, %ymm1, %ymm6
+ vfmadd213pd %ymm14, %ymm1, %ymm6
+ vfmadd213pd %ymm15, %ymm1, %ymm6
+ vfmadd213pd %ymm0, %ymm1, %ymm6
+ vorpd %ymm2, %ymm6, %ymm0
+ testl %eax, %eax
-/* Go to special inputs processing branch */
- jne L(SPECIAL_VALUES_BRANCH)
- # LOE rbx r12 r13 r14 r15 eax ymm0
+ /* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 eax ymm0
-/* Restore registers
- * and exit the function
- */
+ /* Restore registers
+ * and exit the function
+ */
L(EXIT):
- movq %rbp, %rsp
- popq %rbp
- cfi_def_cfa(7, 8)
- cfi_restore(6)
- ret
- cfi_def_cfa(6, 16)
- cfi_offset(6, -16)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
-/* Branch to process
- * special inputs
- */
+ /* Branch to process
+ * special inputs
+ */
L(SPECIAL_VALUES_BRANCH):
- vmovupd (%rsp), %ymm1
- vmovupd %ymm0, 64(%rsp)
- vmovupd %ymm1, 32(%rsp)
- # LOE rbx r12 r13 r14 r15 eax ymm0
+ vmovupd (%rsp), %ymm1
+ vmovupd %ymm0, 64(%rsp)
+ vmovupd %ymm1, 32(%rsp)
+ # LOE rbx r12 r13 r14 r15 eax ymm0
- xorl %edx, %edx
- # LOE rbx r12 r13 r14 r15 eax edx
+ xorl %edx, %edx
+ # LOE rbx r12 r13 r14 r15 eax edx
- vzeroupper
- movq %r12, 16(%rsp)
- /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
- .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
- movl %edx, %r12d
- movq %r13, 8(%rsp)
- /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
- .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
- movl %eax, %r13d
- movq %r14, (%rsp)
- /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
- .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
- # LOE rbx r15 r12d r13d
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
-/* Range mask
- * bits check
- */
+ /* Range mask
+ * bits check
+ */
L(RANGEMASK_CHECK):
- btl %r12d, %r13d
+ btl %r12d, %r13d
-/* Call scalar math function */
- jc L(SCALAR_MATH_CALL)
- # LOE rbx r15 r12d r13d
+ /* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
-/* Special inputs
- * processing loop
- */
+ /* Special inputs
+ * processing loop
+ */
L(SPECIAL_VALUES_LOOP):
- incl %r12d
- cmpl $4, %r12d
+ incl %r12d
+ cmpl $4, %r12d
-/* Check bits in range mask */
- jl L(RANGEMASK_CHECK)
- # LOE rbx r15 r12d r13d
+ /* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
- movq 16(%rsp), %r12
- cfi_restore(12)
- movq 8(%rsp), %r13
- cfi_restore(13)
- movq (%rsp), %r14
- cfi_restore(14)
- vmovupd 64(%rsp), %ymm0
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovupd 64(%rsp), %ymm0
-/* Go to exit */
- jmp L(EXIT)
- /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
- .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
- /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
- .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
- /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
- .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
- # LOE rbx r12 r13 r14 r15 ymm0
+ /* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 ymm0
-/* Scalar math fucntion call
- * to process special input
- */
+ /* Scalar math fucntion call
+ * to process special input
+ */
L(SCALAR_MATH_CALL):
- movl %r12d, %r14d
- movsd 32(%rsp,%r14,8), %xmm0
- call tanh@PLT
- # LOE rbx r14 r15 r12d r13d xmm0
+ movl %r12d, %r14d
+ movsd 32(%rsp, %r14, 8), %xmm0
+ call tanh@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
- movsd %xmm0, 64(%rsp,%r14,8)
+ movsd %xmm0, 64(%rsp, %r14, 8)
-/* Process special inputs in loop */
- jmp L(SPECIAL_VALUES_LOOP)
- # LOE rbx r15 r12d r13d
+ /* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
END(_ZGVdN4v_tanh_avx2)
- .section .rodata, "a"
- .align 32
+ .section .rodata, "a"
+ .align 32
#ifdef __svml_dtanh_data_internal_typedef
typedef unsigned int VUINT32;
-typedef struct
-{
- __declspec(align(32)) VUINT32 _dbP[60*16][2];
- __declspec(align(32)) VUINT32 _dbSignMask[4][2];
- __declspec(align(32)) VUINT32 _dbAbsMask[4][2];
- __declspec(align(32)) VUINT32 _iExpMantMask[8][1];
- __declspec(align(32)) VUINT32 _iExpMask[8][1];
- __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1];
- __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1];
+typedef struct {
+ __declspec(align(32)) VUINT32 _dbP[60*16][2];
+ __declspec(align(32)) VUINT32 _dbSignMask[4][2];
+ __declspec(align(32)) VUINT32 _dbAbsMask[4][2];
+ __declspec(align(32)) VUINT32 _iExpMantMask[8][1];
+ __declspec(align(32)) VUINT32 _iExpMask[8][1];
+ __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1];
+ __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1];
} __svml_dtanh_data_internal;
#endif
__svml_dtanh_data_internal:
- /* Polynomial coefficients */
- .quad 0x0000000000000000 /* PL0 = +0.000000000000000000000e-01 */
- .quad 0x0000000000000000 /* PH0 = +0.000000000000000000000e-01 */
- .quad 0x3FF0000000000000 /* P1 = +1.000000000000000014103e+00 */
- .quad 0xBD197DEAD79668D3 /* P2 = -2.264132406596103056796e-14 */
- .quad 0xBFD555555553AF3C /* P3 = -3.333333333273349741024e-01 */
- .quad 0xBE052F7CCA134846 /* P4 = -6.165791385711493738399e-10 */
- .quad 0x3FC11111563849D6 /* P5 = +1.333333655353061107201e-01 */
- .quad 0xBEB038623673FFB2 /* P6 = -9.668021563879858950855e-07 */
- .quad 0xBFAB9F685E64022E /* P7 = -5.395055916051593179252e-02 */
- .quad 0xBF2A54E2B28F2207 /* P8 = -2.008940439550829012647e-04 */
- .quad 0x3F97CFB9328A230E /* P9 = +2.325333949059698582189e-02 */
- .quad 0xBF75CA6D61723E02 /* P10 = -5.320002811586290441790e-03 */
- .quad 0x0000000000000000 /* B = +0 */
- .quad 0x3FF0000000000000 /* A = +1.0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C3708A564FAD29A /* PL0 = +1.248663375337163807466e-18 */
- .quad 0x3FC0E6973998DA48 /* PH0 = +1.320370703922029154143e-01 */
- .quad 0x3FEF712EB25C0888 /* P1 = +9.825662120422444519229e-01 */
- .quad 0xBFC09B296F7C1EA9 /* P2 = -1.297351641044220078331e-01 */
- .quad 0xBFD3DD77541EDDA7 /* P3 = -3.103922196855485849143e-01 */
- .quad 0x3FB58FFCF4309615 /* P4 = +8.422833406128689275566e-02 */
- .quad 0x3FBD3ABE845DCF49 /* P5 = +1.141776154670967208833e-01 */
- .quad 0xBFA791DF538C37FA /* P6 = -4.603479285115947936529e-02 */
- .quad 0xBFA4F872F69CD6E8 /* P7 = -4.095801601799370195284e-02 */
- .quad 0x3F9772E49EF6412B /* P8 = +2.289921970583567527179e-02 */
- .quad 0x3F8CBC0807393909 /* P9 = +1.403051635784581776625e-02 */
- .quad 0xBF85F06A30F93319 /* P10 = -1.071246110873285040939e-02 */
- .quad 0xBFC1000000000000 /* B = -.132813 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C6004EE5739DEAC /* PL0 = +6.947247374112211856530e-18 */
- .quad 0x3FC2DC968E6E0D62 /* PH0 = +1.473568149050193398786e-01 */
- .quad 0x3FEF4E1E606D96DF /* P1 = +9.782859691010478680677e-01 */
- .quad 0xBFC273BD70994AB9 /* P2 = -1.441571044730005866646e-01 */
- .quad 0xBFD382B548270D2C /* P3 = -3.048527912726111386771e-01 */
- .quad 0x3FB7CD2D582A6B29 /* P4 = +9.297450449450351894400e-02 */
- .quad 0x3FBC1278CCCBF0DB /* P5 = +1.096568584434324642303e-01 */
- .quad 0xBFA9C7F5115B86A1 /* P6 = -5.035367810138536095866e-02 */
- .quad 0xBFA371C21BAF618E /* P7 = -3.797728145554222910481e-02 */
- .quad 0x3F9958943F68417E /* P8 = +2.475196492201935923783e-02 */
- .quad 0x3F8930D5CFFD4152 /* P9 = +1.230017701132682667572e-02 */
- .quad 0xBF875CF7ADD31B76 /* P10 = -1.140779017658897660092e-02 */
- .quad 0xBFC3000000000000 /* B = -.148438 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C7EABE24E052A1F /* PL0 = +2.660321779421749543501e-17 */
- .quad 0x3FC4D04783618C71 /* PH0 = +1.626061812886266111366e-01 */
- .quad 0x3FEF2765AF97A4B3 /* P1 = +9.735592298067302883212e-01 */
- .quad 0xBFC443654205FEA5 /* P2 = -1.583067486171689074207e-01 */
- .quad 0xBFD31F2E208A5B97 /* P3 = -2.987780874040536844467e-01 */
- .quad 0x3FB9F235BD339878 /* P4 = +1.013520800512156573576e-01 */
- .quad 0x3FBAD0B0DFCCA141 /* P5 = +1.047468706498238100104e-01 */
- .quad 0xBFABD1B9600E608E /* P6 = -5.433444306908184548967e-02 */
- .quad 0xBFA1CEBEAF07DB58 /* P7 = -3.478046309094534453598e-02 */
- .quad 0x3F9AFC9FB1D8EFD2 /* P8 = +2.635430834764902126383e-02 */
- .quad 0x3F8573444F1AB502 /* P9 = +1.047376028449287564018e-02 */
- .quad 0xBF8874FBC8F24406 /* P10 = -1.194187838544459322219e-02 */
- .quad 0xBFC5000000000000 /* B = -.164063 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C7FB199D361A790 /* PL0 = +2.748994907060158996213e-17 */
- .quad 0x3FC6C170259E21F7 /* PH0 = +1.777782615356639783766e-01 */
- .quad 0x3FEEFD17479F7C65 /* P1 = +9.683948897253570478266e-01 */
- .quad 0xBFC609530FE4DF8D /* P2 = -1.721595599753950294577e-01 */
- .quad 0xBFD2B3465D71B4DE /* P3 = -2.921920692959484052676e-01 */
- .quad 0x3FBBFD2D34AC509B /* P4 = +1.093319181057403192166e-01 */
- .quad 0x3FB9778C3C16A0FE /* P5 = +9.948040453912551395183e-02 */
- .quad 0xBFADAC4D9E63C665 /* P6 = -5.795519407719210697372e-02 */
- .quad 0xBFA0139CCAD02D60 /* P7 = -3.139963126894929339124e-02 */
- .quad 0x3F9C5BF43BA6F19D /* P8 = +2.769452680671379432854e-02 */
- .quad 0x3F8190B703350341 /* P9 = +8.576803002712575184772e-03 */
- .quad 0xBF8936606782858A /* P10 = -1.231074634444230850234e-02 */
- .quad 0xBFC7000000000000 /* B = -.179688 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C6A917CA3624D50 /* PL0 = +1.152216693509785660691e-17 */
- .quad 0x3FC8AFD7B974FABB /* PH0 = +1.928662925292508878439e-01 */
- .quad 0x3FEECF47624A5D03 /* P1 = +9.628025932060214187231e-01 */
- .quad 0xBFC7C4C2CB4FDE4D /* P2 = -1.856921665891938814679e-01 */
- .quad 0xBFD23F69CB2C1F9D /* P3 = -2.851204380135586155453e-01 */
- .quad 0x3FBDEC5703A03814 /* P4 = +1.168875106670557712458e-01 */
- .quad 0x3FB8095003D0CF15 /* P5 = +9.389209836154706616487e-02 */
- .quad 0xBFAF554B47B10CBB /* P6 = -6.119761705533607365968e-02 */
- .quad 0xBF9C89743FE7BC1B /* P7 = -2.786809577986213853937e-02 */
- .quad 0x3F9D74725B746E7C /* P8 = +2.876452143855921824991e-02 */
- .quad 0x3F7B2D8AFB70B88C /* P9 = +6.635229968237631511880e-03 */
- .quad 0xBF89A0A2883EF6CB /* P10 = -1.251341799058582545252e-02 */
- .quad 0xBFC9000000000000 /* B = -.195313 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C7608279E8609CB /* PL0 = +1.910958764623660748269e-17 */
- .quad 0x3FCA9B46D2DDC5E3 /* PH0 = +2.078636674519166172015e-01 */
- .quad 0x3FEE9E0BB72A01A1 /* P1 = +9.567926957534390123919e-01 */
- .quad 0xBFC974FAD10C5330 /* P2 = -1.988824387305156976885e-01 */
- .quad 0xBFD1C40ACCBA4044 /* P3 = -2.775904654781735703430e-01 */
- .quad 0x3FBFBE24E2987853 /* P4 = +1.239951184474830487522e-01 */
- .quad 0x3FB6885B4345E47F /* P5 = +8.801813499839460539687e-02 */
- .quad 0xBFB06563D5670584 /* P6 = -6.404708824176991770896e-02 */
- .quad 0xBF98CD1D620DF6E2 /* P7 = -2.421995078065365147772e-02 */
- .quad 0x3F9E44EF3E844D21 /* P8 = +2.955983943054463683119e-02 */
- .quad 0x3F7325FA0148CAAE /* P9 = +4.674889165971292322643e-03 */
- .quad 0xBF89B4C8556C2D92 /* P10 = -1.255184660614964011319e-02 */
- .quad 0xBFCB000000000000 /* B = -.210938 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C6F19DAA20F51D5 /* PL0 = +1.348790537832000351176e-17 */
- .quad 0x3FCC83876CA98E15 /* PH0 = +2.227639465883021474557e-01 */
- .quad 0x3FEE697B662D07CD /* P1 = +9.503762241004040620296e-01 */
- .quad 0xBFCB194C7ED76ACF /* P2 = -2.117095584242946953999e-01 */
- .quad 0xBFD141A19E419762 /* P3 = -2.696308179350720680191e-01 */
- .quad 0x3FC0B89C64BC7B98 /* P4 = +1.306338779331468503007e-01 */
- .quad 0x3FB4F721150BBFC5 /* P5 = +8.189589275184434216748e-02 */
- .quad 0xBFB105AAFAB87898 /* P6 = -6.649273511036069461061e-02 */
- .quad 0xBF94FB3B31248C01 /* P7 = -2.048962104266749732921e-02 */
- .quad 0x3F9ECD31E588709C /* P8 = +3.007963145692880855964e-02 */
- .quad 0x3F664A91A335C105 /* P9 = +2.721104095762541127495e-03 */
- .quad 0xBF89754E32E1E26E /* P10 = -1.243077366619723806134e-02 */
- .quad 0xBFCD000000000000 /* B = -.226563 */
- .quad 0x3FF0000000000000 /* A = +1 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x0000000000000000 /* Align value = +0 */
- .quad 0x3C6AC6C889D8111D /* PL0 = +1.161245469312620769170e-17 */
- .quad 0x3FCE6864FE55A3D0 /* PH0 = +2.375608674877001114112e-01 */
- .quad 0x3FEE31AEE116B82B /* P1 = +9.435648342384913826391e-01 */
- .quad 0xBFCCB114B69E808B /* P2 = -2.241540805525839833707e-01 */
- .quad 0xBFD0B8AB913BA99D /* P3 = -2.612713735858507980441e-01 */
- .quad 0x3FC1823322BED48A /* P4 = +1.367858810096190233514e-01 */
- .quad 0x3FB35822B7929893 /* P5 = +7.556359273675842651653e-02 */
-