aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2022-03-07 10:47:12 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2022-03-07 21:14:10 -0800
commitc9102dec55e1d182ab617a0f0f2e1d39f3cd8b18 (patch)
treee5d92002a17af8a5a8a8d4e6e1a247c2d8121ec3
parent27be95ecd73a9a65ae6c981fd2b00c28d0642819 (diff)
downloadglibc-c9102dec55e1d182ab617a0f0f2e1d39f3cd8b18.tar.xz
glibc-c9102dec55e1d182ab617a0f0f2e1d39f3cd8b18.zip
x86_64: Fix svml_d_erfc8_core_avx512.S code formatting
This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core_avx512.S7560
1 files changed, 3779 insertions, 3781 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core_avx512.S
index bd2c3bef7d..77228814d3 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_erfc8_core_avx512.S
@@ -29,3832 +29,3830 @@
/* Offsets for data table __svml_derfc_data_internal
*/
-#define _erfc_tbl 0
-#define _AbsMask 55808
-#define _MaxThreshold 55872
-#define _SgnMask 55936
-#define _One 56000
-#define _TwoM128 56064
-#define _SRound 56128
-#define _poly1_0 56192
-#define _poly1_1 56256
-#define _poly3_0 56320
-#define _poly3_1 56384
-#define _poly5_0 56448
-#define _poly5_1 56512
-#define _poly1_2 56576
-#define _poly3_2 56640
-#define _poly5_2 56704
-#define _poly1_3 56768
-#define _poly3_3 56832
-#define _poly5_3 56896
-#define _poly1_4 56960
-#define _poly3_4 57024
-#define _poly1_5 57088
-#define _poly3_5 57152
-#define _poly3_6 57216
-#define _poly1_6 57280
-#define _poly1_7 57344
-#define _UF_Threshold 57408
-#define _Mask32 57472
+#define _erfc_tbl 0
+#define _AbsMask 55808
+#define _MaxThreshold 55872
+#define _SgnMask 55936
+#define _One 56000
+#define _TwoM128 56064
+#define _SRound 56128
+#define _poly1_0 56192
+#define _poly1_1 56256
+#define _poly3_0 56320
+#define _poly3_1 56384
+#define _poly5_0 56448
+#define _poly5_1 56512
+#define _poly1_2 56576
+#define _poly3_2 56640
+#define _poly5_2 56704
+#define _poly1_3 56768
+#define _poly3_3 56832
+#define _poly5_3 56896
+#define _poly1_4 56960
+#define _poly3_4 57024
+#define _poly1_5 57088
+#define _poly3_5 57152
+#define _poly3_6 57216
+#define _poly1_6 57280
+#define _poly1_7 57344
+#define _UF_Threshold 57408
+#define _Mask32 57472
#include <sysdep.h>
- .text
- .section .text.evex512,"ax",@progbits
+ .section .text.evex512, "ax", @progbits
ENTRY(_ZGVeN8v_erfc_skx)
- pushq %rbp
- cfi_def_cfa_offset(16)
- movq %rsp, %rbp
- cfi_def_cfa(6, 16)
- cfi_offset(6, -16)
- andq $-64, %rsp
- subq $192, %rsp
+ pushq %rbp
+ cfi_def_cfa_offset(16)
+ movq %rsp, %rbp
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
+ andq $-64, %rsp
+ subq $192, %rsp
-/* vector gather: erfc_h(x0), (erfc_l(x0), 2/sqrt(pi)*exp(-x0^2)) */
- lea __svml_derfc_data_internal(%rip), %rax
+ /* vector gather: erfc_h(x0), (erfc_l(x0), 2/sqrt(pi)*exp(-x0^2)) */
+ lea __svml_derfc_data_internal(%rip), %rax
-/*
- * erfc(27.25) underflows to 0
- * can compute all results in the main path
- */
- vmovups _MaxThreshold+__svml_derfc_data_internal(%rip), %zmm13
- vmovups _SRound+__svml_derfc_data_internal(%rip), %zmm7
- vmovups _One+__svml_derfc_data_internal(%rip), %zmm14
- kxnorw %k0, %k0, %k2
- kxnorw %k0, %k0, %k1
- vmovaps %zmm0, %zmm11
- vandpd _AbsMask+__svml_derfc_data_internal(%rip), %zmm11, %zmm12
- vandpd _SgnMask+__svml_derfc_data_internal(%rip), %zmm11, %zmm3
- vmovups _TwoM128+__svml_derfc_data_internal(%rip), %zmm0
- vminpd {sae}, %zmm13, %zmm12, %zmm6
+ /*
+ * erfc(27.25) underflows to 0
+ * can compute all results in the main path
+ */
+ vmovups _MaxThreshold+__svml_derfc_data_internal(%rip), %zmm13
+ vmovups _SRound+__svml_derfc_data_internal(%rip), %zmm7
+ vmovups _One+__svml_derfc_data_internal(%rip), %zmm14
+ kxnorw %k0, %k0, %k2
+ kxnorw %k0, %k0, %k1
+ vmovaps %zmm0, %zmm11
+ vandpd _AbsMask+__svml_derfc_data_internal(%rip), %zmm11, %zmm12
+ vandpd _SgnMask+__svml_derfc_data_internal(%rip), %zmm11, %zmm3
+ vmovups _TwoM128+__svml_derfc_data_internal(%rip), %zmm0
+ vminpd {sae}, %zmm13, %zmm12, %zmm6
-/* Start polynomial evaluation */
- vmovups _poly1_0+__svml_derfc_data_internal(%rip), %zmm12
- vmovups _poly3_0+__svml_derfc_data_internal(%rip), %zmm13
- vaddpd {rn-sae}, %zmm7, %zmm6, %zmm1
- vorpd %zmm3, %zmm14, %zmm15
- vmaxpd {sae}, %zmm0, %zmm6, %zmm2
- vmovups _poly1_1+__svml_derfc_data_internal(%rip), %zmm6
- vpsllq $4, %zmm1, %zmm4
- vsubpd {rn-sae}, %zmm7, %zmm1, %zmm5
+ /* Start polynomial evaluation */
+ vmovups _poly1_0+__svml_derfc_data_internal(%rip), %zmm12
+ vmovups _poly3_0+__svml_derfc_data_internal(%rip), %zmm13
+ vaddpd {rn-sae}, %zmm7, %zmm6, %zmm1
+ vorpd %zmm3, %zmm14, %zmm15
+ vmaxpd {sae}, %zmm0, %zmm6, %zmm2
+ vmovups _poly1_1+__svml_derfc_data_internal(%rip), %zmm6
+ vpsllq $4, %zmm1, %zmm4
+ vsubpd {rn-sae}, %zmm7, %zmm1, %zmm5
-/* 2.0 if x<0, 0.0 otherwise */
- vsubpd {rn-sae}, %zmm15, %zmm14, %zmm10
+ /* 2.0 if x<0, 0.0 otherwise */
+ vsubpd {rn-sae}, %zmm15, %zmm14, %zmm10
-/* 2^(-128) with sign of input */
- vorpd %zmm3, %zmm0, %zmm7
- vsubpd {rn-sae}, %zmm5, %zmm2, %zmm9
- vmovups _poly1_2+__svml_derfc_data_internal(%rip), %zmm15
- vmovups _poly5_1+__svml_derfc_data_internal(%rip), %zmm3
- vmovups _poly5_0+__svml_derfc_data_internal(%rip), %zmm14
- vmovups _poly1_3+__svml_derfc_data_internal(%rip), %zmm0
- vmovups _poly5_2+__svml_derfc_data_internal(%rip), %zmm1
- vmovups _poly3_2+__svml_derfc_data_internal(%rip), %zmm2
- vmulpd {rn-sae}, %zmm9, %zmm5, %zmm8
- vmovups _poly3_1+__svml_derfc_data_internal(%rip), %zmm5
- vfmadd231pd {rn-sae}, %zmm8, %zmm12, %zmm6
- vfmadd231pd {rn-sae}, %zmm8, %zmm14, %zmm3
- vfmadd231pd {rn-sae}, %zmm8, %zmm13, %zmm5
- vmovups _poly3_3+__svml_derfc_data_internal(%rip), %zmm12
- vmovups _poly5_3+__svml_derfc_data_internal(%rip), %zmm13
- vfmadd213pd {rn-sae}, %zmm15, %zmm8, %zmm6
- vfmadd213pd {rn-sae}, %zmm1, %zmm8, %zmm3
- vfmadd213pd {rn-sae}, %zmm2, %zmm8, %zmm5
- vmovups _poly3_5+__svml_derfc_data_internal(%rip), %zmm14
+ /* 2^(-128) with sign of input */
+ vorpd %zmm3, %zmm0, %zmm7
+ vsubpd {rn-sae}, %zmm5, %zmm2, %zmm9
+ vmovups _poly1_2+__svml_derfc_data_internal(%rip), %zmm15
+ vmovups _poly5_1+__svml_derfc_data_internal(%rip), %zmm3
+ vmovups _poly5_0+__svml_derfc_data_internal(%rip), %zmm14
+ vmovups _poly1_3+__svml_derfc_data_internal(%rip), %zmm0
+ vmovups _poly5_2+__svml_derfc_data_internal(%rip), %zmm1
+ vmovups _poly3_2+__svml_derfc_data_internal(%rip), %zmm2
+ vmulpd {rn-sae}, %zmm9, %zmm5, %zmm8
+ vmovups _poly3_1+__svml_derfc_data_internal(%rip), %zmm5
+ vfmadd231pd {rn-sae}, %zmm8, %zmm12, %zmm6
+ vfmadd231pd {rn-sae}, %zmm8, %zmm14, %zmm3
+ vfmadd231pd {rn-sae}, %zmm8, %zmm13, %zmm5
+ vmovups _poly3_3+__svml_derfc_data_internal(%rip), %zmm12
+ vmovups _poly5_3+__svml_derfc_data_internal(%rip), %zmm13
+ vfmadd213pd {rn-sae}, %zmm15, %zmm8, %zmm6
+ vfmadd213pd {rn-sae}, %zmm1, %zmm8, %zmm3
+ vfmadd213pd {rn-sae}, %zmm2, %zmm8, %zmm5
+ vmovups _poly3_5+__svml_derfc_data_internal(%rip), %zmm14
-/* P5 = P5 + D2*P07 */
- vmovups _poly3_6+__svml_derfc_data_internal(%rip), %zmm15
- vfmadd213pd {rn-sae}, %zmm0, %zmm8, %zmm6
- vfmadd213pd {rn-sae}, %zmm13, %zmm8, %zmm3
- vfmadd213pd {rn-sae}, %zmm12, %zmm8, %zmm5
- vmovups _poly3_4+__svml_derfc_data_internal(%rip), %zmm12
- vmovups _poly1_5+__svml_derfc_data_internal(%rip), %zmm13
- vfmadd213pd {rn-sae}, %zmm12, %zmm8, %zmm5
- vfmadd213pd {rn-sae}, %zmm14, %zmm8, %zmm5
- vpandq _Mask32+__svml_derfc_data_internal(%rip), %zmm4, %zmm4
- vpmovqd %zmm4, %ymm0
- vmovups _poly1_4+__svml_derfc_data_internal(%rip), %zmm4
- vfmadd213pd {rn-sae}, %zmm4, %zmm8, %zmm6
+ /* P5 = P5 + D2*P07 */
+ vmovups _poly3_6+__svml_derfc_data_internal(%rip), %zmm15
+ vfmadd213pd {rn-sae}, %zmm0, %zmm8, %zmm6
+ vfmadd213pd {rn-sae}, %zmm13, %zmm8, %zmm3
+ vfmadd213pd {rn-sae}, %zmm12, %zmm8, %zmm5
+ vmovups _poly3_4+__svml_derfc_data_internal(%rip), %zmm12
+ vmovups _poly1_5+__svml_derfc_data_internal(%rip), %zmm13
+ vfmadd213pd {rn-sae}, %zmm12, %zmm8, %zmm5
+ vfmadd213pd {rn-sae}, %zmm14, %zmm8, %zmm5
+ vpandq _Mask32+__svml_derfc_data_internal(%rip), %zmm4, %zmm4
+ vpmovqd %zmm4, %ymm0
+ vmovups _poly1_4+__svml_derfc_data_internal(%rip), %zmm4
+ vfmadd213pd {rn-sae}, %zmm4, %zmm8, %zmm6
-/* T^2 */
- vmulpd {rn-sae}, %zmm8, %zmm8, %zmm4
- vfmadd213pd {rn-sae}, %zmm13, %zmm8, %zmm6
- vpxord %zmm1, %zmm1, %zmm1
- vgatherdpd 8(%rax,%ymm0), %zmm1{%k2}
- vpxord %zmm2, %zmm2, %zmm2
- vgatherdpd (%rax,%ymm0), %zmm2{%k1}
+ /* T^2 */
+ vmulpd {rn-sae}, %zmm8, %zmm8, %zmm4
+ vfmadd213pd {rn-sae}, %zmm13, %zmm8, %zmm6
+ vpxord %zmm1, %zmm1, %zmm1
+ vgatherdpd 8(%rax, %ymm0), %zmm1{%k2}
+ vpxord %zmm2, %zmm2, %zmm2
+ vgatherdpd (%rax, %ymm0), %zmm2{%k1}
-/* Diff^2 */
- vmulpd {rn-sae}, %zmm9, %zmm9, %zmm0
- vfmadd231pd {rn-sae}, %zmm0, %zmm3, %zmm15
- vmovups _poly1_6+__svml_derfc_data_internal(%rip), %zmm3
- vfmadd213pd {rn-sae}, %zmm15, %zmm8, %zmm5
- vfmadd213pd {rn-sae}, %zmm3, %zmm8, %zmm6
- vmovups _poly1_7+__svml_derfc_data_internal(%rip), %zmm3
- vfmsub213pd {rn-sae}, %zmm8, %zmm0, %zmm5
- vfmadd213pd {rn-sae}, %zmm3, %zmm8, %zmm6
+ /* Diff^2 */
+ vmulpd {rn-sae}, %zmm9, %zmm9, %zmm0
+ vfmadd231pd {rn-sae}, %zmm0, %zmm3, %zmm15
+ vmovups _poly1_6+__svml_derfc_data_internal(%rip), %zmm3
+ vfmadd213pd {rn-sae}, %zmm15, %zmm8, %zmm5
+ vfmadd213pd {rn-sae}, %zmm3, %zmm8, %zmm6
+ vmovups _poly1_7+__svml_derfc_data_internal(%rip), %zmm3
+ vfmsub213pd {rn-sae}, %zmm8, %zmm0, %zmm5
+ vfmadd213pd {rn-sae}, %zmm3, %zmm8, %zmm6
-/* EXP_X0H *= Diff */
- vmulpd {rn-sae}, %zmm9, %zmm1, %zmm8
+ /* EXP_X0H *= Diff */
+ vmulpd {rn-sae}, %zmm9, %zmm1, %zmm8
-/* Special arguments (for flags only) */
- vmovups _UF_Threshold+__svml_derfc_data_internal(%rip), %zmm9
- vfmadd213pd {rn-sae}, %zmm5, %zmm4, %zmm6
- vcmppd $21, {sae}, %zmm9, %zmm11, %k0
+ /* Special arguments (for flags only) */
+ vmovups _UF_Threshold+__svml_derfc_data_internal(%rip), %zmm9
+ vfmadd213pd {rn-sae}, %zmm5, %zmm4, %zmm6
+ vcmppd $21, {sae}, %zmm9, %zmm11, %k0
-/* EXP_x0H*Diff*(1+P1) */
- vfmadd213pd {rn-sae}, %zmm8, %zmm8, %zmm6
- kmovw %k0, %edx
+ /* EXP_x0H*Diff*(1+P1) */
+ vfmadd213pd {rn-sae}, %zmm8, %zmm8, %zmm6
+ kmovw %k0, %edx
-/* erfc(|_VARG1|) = erfc_h(x0) - P1 */
- vsubpd {rn-sae}, %zmm6, %zmm2, %zmm0
- vfmadd213pd {rn-sae}, %zmm10, %zmm7, %zmm0
- testl %edx, %edx
+ /* erfc(|_VARG1|) = erfc_h(x0) - P1 */
+ vsubpd {rn-sae}, %zmm6, %zmm2, %zmm0
+ vfmadd213pd {rn-sae}, %zmm10, %zmm7, %zmm0
+ testl %edx, %edx
-/* Go to special inputs processing branch */
- jne L(SPECIAL_VALUES_BRANCH)
- # LOE rbx r12 r13 r14 r15 edx zmm0 zmm11
+ /* Go to special inputs processing branch */
+ jne L(SPECIAL_VALUES_BRANCH)
+ # LOE rbx r12 r13 r14 r15 edx zmm0 zmm11
-/* Restore registers
- * and exit the function
- */
+ /* Restore registers
+ * and exit the function
+ */
L(EXIT):
- movq %rbp, %rsp
- popq %rbp
- cfi_def_cfa(7, 8)
- cfi_restore(6)
- ret
- cfi_def_cfa(6, 16)
- cfi_offset(6, -16)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa(7, 8)
+ cfi_restore(6)
+ ret
+ cfi_def_cfa(6, 16)
+ cfi_offset(6, -16)
-/* Branch to process
- * special inputs
- */
+ /* Branch to process
+ * special inputs
+ */
L(SPECIAL_VALUES_BRANCH):
- vmovups %zmm11, 64(%rsp)
- vmovups %zmm0, 128(%rsp)
- # LOE rbx r12 r13 r14 r15 edx zmm0
+ vmovups %zmm11, 64(%rsp)
+ vmovups %zmm0, 128(%rsp)
+ # LOE rbx r12 r13 r14 r15 edx zmm0
- xorl %eax, %eax
- # LOE rbx r12 r13 r14 r15 eax edx
+ xorl %eax, %eax
+ # LOE rbx r12 r13 r14 r15 eax edx
- vzeroupper
- movq %r12, 16(%rsp)
- /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
- .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
- movl %eax, %r12d
- movq %r13, 8(%rsp)
- /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
- .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
- movl %edx, %r13d
- movq %r14, (%rsp)
- /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
- .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
- # LOE rbx r15 r12d r13d
+ vzeroupper
+ movq %r12, 16(%rsp)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ movl %eax, %r12d
+ movq %r13, 8(%rsp)
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ movl %edx, %r13d
+ movq %r14, (%rsp)
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r15 r12d r13d
-/* Range mask
- * bits check
- */
+ /* Range mask
+ * bits check
+ */
L(RANGEMASK_CHECK):
- btl %r12d, %r13d
+ btl %r12d, %r13d
-/* Call scalar math function */
- jc L(SCALAR_MATH_CALL)
- # LOE rbx r15 r12d r13d
+ /* Call scalar math function */
+ jc L(SCALAR_MATH_CALL)
+ # LOE rbx r15 r12d r13d
-/* Special inputs
- * processing loop
- */
+ /* Special inputs
+ * processing loop
+ */
L(SPECIAL_VALUES_LOOP):
- incl %r12d
- cmpl $8, %r12d
+ incl %r12d
+ cmpl $8, %r12d
-/* Check bits in range mask */
- jl L(RANGEMASK_CHECK)
- # LOE rbx r15 r12d r13d
+ /* Check bits in range mask */
+ jl L(RANGEMASK_CHECK)
+ # LOE rbx r15 r12d r13d
- movq 16(%rsp), %r12
- cfi_restore(12)
- movq 8(%rsp), %r13
- cfi_restore(13)
- movq (%rsp), %r14
- cfi_restore(14)
- vmovups 128(%rsp), %zmm0
+ movq 16(%rsp), %r12
+ cfi_restore(12)
+ movq 8(%rsp), %r13
+ cfi_restore(13)
+ movq (%rsp), %r14
+ cfi_restore(14)
+ vmovups 128(%rsp), %zmm0
-/* Go to exit */
- jmp L(EXIT)
- /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
- .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
- /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
- .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
- /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
- .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
- # LOE rbx r12 r13 r14 r15 zmm0
+ /* Go to exit */
+ jmp L(EXIT)
+ /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+ /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
+ .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+ # LOE rbx r12 r13 r14 r15 zmm0
-/* Scalar math fucntion call
- * to process special input
- */
+ /* Scalar math fucntion call
+ * to process special input
+ */
L(SCALAR_MATH_CALL):
- movl %r12d, %r14d
- movsd 64(%rsp,%r14,8), %xmm0
- call erfc@PLT
- # LOE rbx r14 r15 r12d r13d xmm0
+ movl %r12d, %r14d
+ movsd 64(%rsp, %r14, 8), %xmm0
+ call erfc@PLT
+ # LOE rbx r14 r15 r12d r13d xmm0
- movsd %xmm0, 128(%rsp,%r14,8)
+ movsd %xmm0, 128(%rsp, %r14, 8)
-/* Process special inputs in loop */
- jmp L(SPECIAL_VALUES_LOOP)
- # LOE rbx r15 r12d r13d
+ /* Process special inputs in loop */
+ jmp L(SPECIAL_VALUES_LOOP)
+ # LOE rbx r15 r12d r13d
END(_ZGVeN8v_erfc_skx)
- .section .rodata, "a"
- .align 64
+ .section .rodata, "a"
+ .align 64
#ifdef __svml_derfc_data_internal_typedef
typedef unsigned int VUINT32;
-typedef struct
-{
- __declspec(align(64)) VUINT32 _erfc_tbl[3488*2][2];
- __declspec(align(64)) VUINT32 _AbsMask[8][2];
- __declspec(align(64)) VUINT32 _MaxThreshold[8][2];
- __declspec(align(64)) VUINT32 _SgnMask[8][2];
- __declspec(align(64)) VUINT32 _One[8][2];
- __declspec(align(64)) VUINT32 _TwoM128[8][2];
- __declspec(align(64)) VUINT32 _SRound[8][2];
- __declspec(align(64)) VUINT32 _poly1_0[8][2];
- __declspec(align(64)) VUINT32 _poly1_1[8][2];
- __declspec(align(64)) VUINT32 _poly3_0[8][2];
- __declspec(align(64)) VUINT32 _poly3_1[8][2];
- __declspec(align(64)) VUINT32 _poly5_0[8][2];
- __declspec(align(64)) VUINT32 _poly5_1[8][2];
- __declspec(align(64)) VUINT32 _poly1_2[8][2];
- __declspec(align(64)) VUINT32 _poly3_2[8][2];
- __declspec(align(64)) VUINT32 _poly5_2[8][2];
- __declspec(align(64)) VUINT32 _poly1_3[8][2];
- __declspec(align(64)) VUINT32 _poly3_3[8][2];
- __declspec(align(64)) VUINT32 _poly5_3[8][2];
- __declspec(align(64)) VUINT32 _poly1_4[8][2];
- __declspec(align(64)) VUINT32 _poly3_4[8][2];
- __declspec(align(64)) VUINT32 _poly1_5[8][2];
- __declspec(align(64)) VUINT32 _poly3_5[8][2];
- __declspec(align(64)) VUINT32 _poly3_6[8][2];
- __declspec(align(64)) VUINT32 _poly1_6[8][2];
- __declspec(align(64)) VUINT32 _poly1_7[8][2];
- __declspec(align(64)) VUINT32 _UF_Threshold[8][2];
- __declspec(align(64)) VUINT32 _Mask32[8][2];
+typedef struct {
+ __declspec(align(64)) VUINT32 _erfc_tbl[3488*2][2];
+ __declspec(align(64)) VUINT32 _AbsMask[8][2];
+ __declspec(align(64)) VUINT32 _MaxThreshold[8][2];
+ __declspec(align(64)) VUINT32 _SgnMask[8][2];
+ __declspec(align(64)) VUINT32 _One[8][2];
+ __declspec(align(64)) VUINT32 _TwoM128[8][2];
+ __declspec(align(64)) VUINT32 _SRound[8][2];
+ __declspec(align(64)) VUINT32 _poly1_0[8][2];
+ __declspec(align(64)) VUINT32 _poly1_1[8][2];
+ __declspec(align(64)) VUINT32 _poly3_0[8][2];
+ __declspec(align(64)) VUINT32 _poly3_1[8][2];
+ __declspec(align(64)) VUINT32 _poly5_0[8][2];
+ __declspec(align(64)) VUINT32 _poly5_1[8][2];
+ __declspec(align(64)) VUINT32 _poly1_2[8][2];
+ __declspec(align(64)) VUINT32 _poly3_2[8][2];
+ __declspec(align(64)) VUINT32 _poly5_2[8][2];
+ __declspec(align(64)) VUINT32 _poly1_3[8][2];
+ __declspec(align(64)) VUINT32 _poly3_3[8][2];
+ __declspec(align(64)) VUINT32 _poly5_3[8][2];
+ __declspec(align(64)) VUINT32 _poly1_4[8][2];
+ __declspec(align(64)) VUINT32 _poly3_4[8][2];
+ __declspec(align(64)) VUINT32 _poly1_5[8][2];
+ __declspec(align(64)) VUINT32 _poly3_5[8][2];
+ __declspec(align(64)) VUINT32 _poly3_6[8][2];
+ __declspec(align(64)) VUINT32 _poly1_6[8][2];
+ __declspec(align(64)) VUINT32 _poly1_7[8][2];
+ __declspec(align(64)) VUINT32 _UF_Threshold[8][2];
+ __declspec(align(64)) VUINT32 _Mask32[8][2];
} __svml_derfc_data_internal;
#endif
__svml_derfc_data_internal:
- /*== _erfc_tbl ==*/
- .quad 0x47f0000000000000, 0x47f20dd750429b6d
- .quad 0x47efb7c9030853b3, 0x47f20d8f1975c85d
- .quad 0x47ef6f9447be0743, 0x47f20cb67bd452c7
- .quad 0x47ef27640f9853d9, 0x47f20b4d8bac36c1
- .quad 0x47eedf3a9ba22dad, 0x47f209546ad13ccf
- .quad 0x47ee971a2c4436ae, 0x47f206cb4897b148
- .quad 0x47ee4f05010eca8c, 0x47f203b261cd0053
- .quad 0x47ee06fd58842c7e, 0x47f2000a00ae3804
- .quad 0x47edbf056fe2df35, 0x47f1fbd27cdc72d3
- .quad 0x47ed771f82f02f4e, 0x47f1f70c3b4f2cc8
- .quad 0x47ed2f4dcbc2f894, 0x47f1f1b7ae44867f
- .quad 0x47ece792828eae5c, 0x47f1ebd5552f795b
- .quad 0x47ec9fefdd6eaf19, 0x47f1e565bca400d4
- .quad 0x47ec58681031eb6a, 0x47f1de697e413d29
- .quad 0x47ec10fd4c26e896, 0x47f1d6e14099944a
- .quad 0x47ebc9b1bfe82687, 0x47f1cecdb718d61c
- .quad 0x47eb82879728f11e, 0x47f1c62fa1e869b6
- .quad 0x47eb3b80fa82a4bb, 0x47f1bd07cdd189ac
- .quad 0x47eaf4a00f426daa, 0x47f1b357141d95d5
- .quad 0x47eaade6f7378a0e, 0x47f1a91e5a748165
- .quad 0x47ea6757d08215d8, 0x47f19e5e92b964ab
- .quad 0x47ea20f4b5626818, 0x47f19318bae53a04
- .quad 0x47e9dabfbc090901, 0x47f1874ddcdfce24
- .quad 0x47e994baf66747ad, 0x47f17aff0e56ec10
- .quad 0x47e94ee8720076b6, 0x47f16e2d7093cd8c
- .quad 0x47e9094a37bbd66e, 0x47f160da304ed92f
- .quad 0x47e8c3e24bb73372, 0x47f153068581b781
- .quad 0x47e87eb2ad1a4032, 0x47f144b3b337c90c
- .quad 0x47e839bd55eaafc8, 0x47f135e3075d076b
- .quad 0x47e7f5043ae11862, 0x47f12695da8b5bde
- .quad 0x47e7b0894b3ea35c, 0x47f116cd8fd67618
- .quad 0x47e76c4e70a390e7, 0x47f1068b94962e5e
- .quad 0x47e728558ee694fc, 0x47f0f5d1602f7e41
- .quad 0x47e6e4a083ed132f, 0x47f0e4a073dc1b91
- .quad 0x47e6a13127843ec1, 0x47f0d2fa5a70c168
- .quad 0x47e65e094b3b2413, 0x47f0c0e0a8223359
- .quad 0x47e61b2aba3da093, 0x47f0ae54fa490723
- .quad 0x47e5d89739304dcf, 0x47f09b58f724416b
- .quad 0x47e59650860d6469, 0x47f087ee4d9ad247
- .quad 0x47e5545858029b39, 0x47f07416b4fbfe7c
- .quad 0x47e512b05f5006e1, 0x47f05fd3ecbec298
- .quad 0x47e4d15a4527fdc7, 0x47f04b27bc403d30
- .quad 0x47e49057ab900447, 0x47f03613f2812daf
- .quad 0x47e44faa2d42c4a0, 0x47f0209a65e29545
- .quad 0x47e40f535d93160e, 0x47f00abcf3e187a9
- .quad 0x47e3cf54c8501620, 0x47efe8fb01a47307
- .quad 0x47e38faff1aa574a, 0x47efbbbbef34b4b2
- .quad 0x47e35066561a275d, 0x47ef8dc092d58ff8
- .quad 0x47e311796a46f064, 0x47ef5f0cdaf15313
- .quad 0x47e2d2ea9aefb636, 0x47ef2fa4c16c0019
- .quad 0x47e294bb4cd4b2bd, 0x47eeff8c4b1375db
- .quad 0x47e256ecdca212cc, 0x47eecec7870ebca8
- .quad 0x47e219809edbd524, 0x47ee9d5a8e4c934e
- .quad 0x47e1dc77dfcacd02, 0x47ee6b4982f158b9
- .quad 0x47e19fd3e36ac96a, 0x47ee38988fc46e72
- .quad 0x47e16395e559e218, 0x47ee054be79d3042
- .quad 0x47e127bf18c8eadc, 0x47edd167c4cf9d2a
- .quad 0x47e0ec50a86d0dd4, 0x47ed9cf06898cdaf
- .quad 0x47e0b14bb6728cd8, 0x47ed67ea1a8b5368
- .quad 0x47e076b15c70aa28, 0x47ed325927fb9d89
- .quad 0x47e03c82ab5eb831, 0x47ecfc41e36c7df9
- .quad 0x47e002c0ab8a5018, 0x47ecc5a8a3fbea40
- .quad 0x47df92d8b91d5cc7, 0x47ec8e91c4d01368
- .quad 0x47df210d6a9a6a31, 0x47ec5701a484ef9d
- .quad 0x47deb02147ce245c, 0x47ec1efca49a5011
- .quad 0x47de40161b701275, 0x47ebe68728e29d5e
- .quad 0x47ddd0ed9ea4bdd6, 0x47ebada596f25436
- .quad 0x47dd62a978f7c957, 0x47eb745c55905bf8
- .quad 0x47dcf54b4058455f, 0x47eb3aafcc27502e
- .quad 0x47dc88d479173cce, 0x47eb00a46237d5be
- .quad 0x47dc1d4695e87644, 0x47eac63e7ecc1411
- .quad 0x47dbb2a2f7e56520, 0x47ea8b8287ec6a09
- .quad 0x47db48eaee924501, 0x47ea5074e2157620
- .quad 0x47dae01fb7e55a66, 0x47ea1519efaf889e
- .quad 0x47da78428050527e, 0x47e9d97610879642
- .quad 0x47da115462cbbc17, 0x47e99d8da149c13f
- .quad 0x47d9ab5668e4930a, 0x47e96164fafd8de3
- .quad 0x47d946498acbd766, 0x47e925007283d7aa
- .quad 0x47d8e22eaf68291e, 0x47e8e86458169af8