x86_64: Fix svml_d_tan2_core_sse4.S code formatting

This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
author: Sunil K Pandey <skpgkp2@gmail.com> 2022-03-07 10:47:15 -0800
committer: Sunil K Pandey <skpgkp2@gmail.com> 2022-03-07 21:44:09 -0800
commit: 7f852d2592b50ef9c6daed656b8f33c65bfe594a (patch)
tree: a616daeb732d919c2f2b4aa389526f842b14b1d6
parent: 160e183a9a1d2bb54fb899f86ab67ce516a95304 (diff)
download: glibc-7f852d2592b50ef9c6daed656b8f33c65bfe594a.tar.xz
glibc-7f852d2592b50ef9c6daed656b8f33c65bfe594a.zip
1 files changed, 6070 insertions, 6073 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
index 879cfd631d..d572b886be 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
@@ -47,6213 +47,6210 @@
 
 /* Offsets for data table __svml_dtan_data_internal
  */
-#define _dAbsMask                     	0
-#define _dRangeVal                    	16
-#define _dRShift                      	32
-#define _dCoeffs                      	48
-#define _dReductionRangeVal           	26672
-#define _dInvPi                       	26688
-#define _dPI1                         	26704
-#define _dPI2                         	26720
-#define _dPI3                         	26736
-#define _dP1                          	26752
-#define _dP2                          	26768
-#define _dP3                          	26784
-#define _dQ0                          	26800
-#define _dQ1                          	26816
-#define _dQ2                          	26832
-#define _dQ3                          	26848
+#define _dAbsMask			0
+#define _dRangeVal			16
+#define _dRShift			32
+#define _dCoeffs			48
+#define _dReductionRangeVal		26672
+#define _dInvPi				26688
+#define _dPI1				26704
+#define _dPI2				26720
+#define _dPI3				26736
+#define _dP1				26752
+#define _dP2				26768
+#define _dP3				26784
+#define _dQ0				26800
+#define _dQ1				26816
+#define _dQ2				26832
+#define _dQ3				26848
 
 #include <sysdep.h>
 
-        .text
-	.section .text.sse4,"ax",@progbits
+	.section .text.sse4, "ax", @progbits
 ENTRY(_ZGVbN2v_tan_sse4)
-        subq      $72, %rsp
-        cfi_def_cfa_offset(80)
-        movaps    %xmm0, %xmm1
-        movups    _dAbsMask+__svml_dtan_data_internal(%rip), %xmm4
+	subq	$72, %rsp
+	cfi_def_cfa_offset(80)
+	movaps	%xmm0, %xmm1
+	movups	_dAbsMask+__svml_dtan_data_internal(%rip), %xmm4
 
-/* Legacy Code */
-        xorl      %eax, %eax
+	/* Legacy Code */
+	xorl	%eax, %eax
 
-/* b) Remove sign using AND 0x7fffffffffffffff operation */
-        movaps    %xmm4, %xmm5
+	/* b) Remove sign using AND 0x7fffffffffffffff operation */
+	movaps	%xmm4, %xmm5
 
-/* 1) Range reduction to [-Pi/4; +Pi/4] interval */
-        pxor      %xmm11, %xmm11
+	/* 1) Range reduction to [-Pi/4; +Pi/4] interval */
+	pxor	%xmm11, %xmm11
 
-/*
- * c) Getting octant Y by 2/Pi multiplication
- * d) Add "Right Shifter" (0x4330000000000000) value
- */
-        movups    _dInvPi+__svml_dtan_data_internal(%rip), %xmm3
-        andps     %xmm1, %xmm5
-        mulpd     %xmm5, %xmm3
-        movups    _dRShift+__svml_dtan_data_internal(%rip), %xmm6
+	/*
+	 * c) Getting octant Y by 2/Pi multiplication
+	 * d) Add "Right Shifter" (0x4330000000000000) value
+	 */
+	movups	_dInvPi+__svml_dtan_data_internal(%rip), %xmm3
+	andps	%xmm1, %xmm5
+	mulpd	%xmm5, %xmm3
+	movups	_dRShift+__svml_dtan_data_internal(%rip), %xmm6
 
-/*
- * Range reduction
- * X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
- */
-        movaps    %xmm5, %xmm2
-        addpd     %xmm6, %xmm3
+	/*
+	 * Range reduction
+	 * X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+	 */
+	movaps	%xmm5, %xmm2
+	addpd	%xmm6, %xmm3
 
-/* g) Subtract "Right Shifter" (0x4330000000000000) value */
-        movaps    %xmm3, %xmm9
+	/* g) Subtract "Right Shifter" (0x4330000000000000) value */
+	movaps	%xmm3, %xmm9
 
-/* a) Grab sign from source argument and save it. */
-        andnps    %xmm1, %xmm4
-        subpd     %xmm6, %xmm9
-        movups    _dPI1+__svml_dtan_data_internal(%rip), %xmm7
+	/* a) Grab sign from source argument and save it. */
+	andnps	%xmm1, %xmm4
+	subpd	%xmm6, %xmm9
+	movups	_dPI1+__svml_dtan_data_internal(%rip), %xmm7
 
-/*
- * e) Treat obtained value as integer for destination sign setting.
- * Shift first bit of this value to the last (sign) position (S << 63)
- * f) Change destination sign if source sign is negative
- * using XOR operation.
- */
-        movaps    %xmm3, %xmm6
-        mulpd     %xmm9, %xmm7
-        movups    _dPI2+__svml_dtan_data_internal(%rip), %xmm8
-        psllq     $62, %xmm3
-        mulpd     %xmm9, %xmm8
-        subpd     %xmm7, %xmm2
-        cmpneqpd  %xmm11, %xmm3
-        subpd     %xmm8, %xmm2
-        movups    _dPI3+__svml_dtan_data_internal(%rip), %xmm10
+	/*
+	 * e) Treat obtained value as integer for destination sign setting.
+	 * Shift first bit of this value to the last (sign) position (S << 63)
+	 * f) Change destination sign if source sign is negative
+	 * using XOR operation.
+	 */
+	movaps	%xmm3, %xmm6
+	mulpd	%xmm9, %xmm7
+	movups	_dPI2+__svml_dtan_data_internal(%rip), %xmm8
+	psllq	$62, %xmm3
+	mulpd	%xmm9, %xmm8
+	subpd	%xmm7, %xmm2
+	cmpneqpd %xmm11, %xmm3
+	subpd	%xmm8, %xmm2
+	movups	_dPI3+__svml_dtan_data_internal(%rip), %xmm10
 
-/*
- * c) Swap P and Q if first bit of obtained value after
- * Right Shifting is set to 1. Using And, Andnot & Or operations.
- */
-        movaps    %xmm3, %xmm0
-        mulpd     %xmm9, %xmm10
-        subpd     %xmm10, %xmm2
+	/*
+	 * c) Swap P and Q if first bit of obtained value after
+	 * Right Shifting is set to 1. Using And, Andnot & Or operations.
+	 */
+	movaps	%xmm3, %xmm0
+	mulpd	%xmm9, %xmm10
+	subpd	%xmm10, %xmm2
 
-/* a) Calculate X^2 = X * X */
-        movaps    %xmm2, %xmm15
-        movaps    %xmm3, %xmm14
-        mulpd     %xmm2, %xmm15
+	/* a) Calculate X^2 = X * X */
+	movaps	%xmm2, %xmm15
+	movaps	%xmm3, %xmm14
+	mulpd	%xmm2, %xmm15
 
-/*
- * b) Calculate 2 polynomials:
- * P = X * (P0 + X^2 * (P1 + x^2 * (P2 + x^2 * (P3))));
- * Q = Q0 + X^2 * (Q1 + x^2 * (Q2 + x^2 * (Q3)));
- * Assume P0 = 1
- */
-        movups    _dP3+__svml_dtan_data_internal(%rip), %xmm13
-        psllq     $63, %xmm6
-        mulpd     %xmm15, %xmm13
-        movups    _dQ3+__svml_dtan_data_internal(%rip), %xmm12
-        pxor      %xmm4, %xmm6
-        addpd     _dP2+__svml_dtan_data_internal(%rip), %xmm13
-        mulpd     %xmm15, %xmm12
-        mulpd     %xmm15, %xmm13
-        addpd     _dQ2+__svml_dtan_data_internal(%rip), %xmm12
-        addpd     _dP1+__svml_dtan_data_internal(%rip), %xmm13
-        mulpd     %xmm15, %xmm12
-        mulpd     %xmm15, %xmm13
-        addpd     _dQ1+__svml_dtan_data_internal(%rip), %xmm12
-        mulpd     %xmm2, %xmm13
-        mulpd     %xmm12, %xmm15
-        addpd     %xmm13, %xmm2
-        addpd     _dQ0+__svml_dtan_data_internal(%rip), %xmm15
-        andnps    %xmm2, %xmm0
-        andps     %xmm15, %xmm14
-        andps     %xmm3, %xmm2
-        andnps    %xmm15, %xmm3
-        orps      %xmm14, %xmm0
-        orps      %xmm3, %xmm2
+	/*
+	 * b) Calculate 2 polynomials:
+	 * P = X * (P0 + X^2 * (P1 + x^2 * (P2 + x^2 * (P3))));
+	 * Q = Q0 + X^2 * (Q1 + x^2 * (Q2 + x^2 * (Q3)));
+	 * Assume P0 = 1
+	 */
+	movups	_dP3+__svml_dtan_data_internal(%rip), %xmm13
+	psllq	$63, %xmm6
+	mulpd	%xmm15, %xmm13
+	movups	_dQ3+__svml_dtan_data_internal(%rip), %xmm12
+	pxor	%xmm4, %xmm6
+	addpd	_dP2+__svml_dtan_data_internal(%rip), %xmm13
+	mulpd	%xmm15, %xmm12
+	mulpd	%xmm15, %xmm13
+	addpd	_dQ2+__svml_dtan_data_internal(%rip), %xmm12
+	addpd	_dP1+__svml_dtan_data_internal(%rip), %xmm13
+	mulpd	%xmm15, %xmm12
+	mulpd	%xmm15, %xmm13
+	addpd	_dQ1+__svml_dtan_data_internal(%rip), %xmm12
+	mulpd	%xmm2, %xmm13
+	mulpd	%xmm12, %xmm15
+	addpd	%xmm13, %xmm2
+	addpd	_dQ0+__svml_dtan_data_internal(%rip), %xmm15
+	andnps	%xmm2, %xmm0
+	andps	%xmm15, %xmm14
+	andps	%xmm3, %xmm2
+	andnps	%xmm15, %xmm3
+	orps	%xmm14, %xmm0
+	orps	%xmm3, %xmm2
 
-/* d) Divide R = P / Q; */
-        divpd     %xmm2, %xmm0
+	/* d) Divide R = P / Q; */
+	divpd	%xmm2, %xmm0
 
-/* Large values check */
-        movaps    %xmm5, %xmm4
+	/* Large values check */
+	movaps	%xmm5, %xmm4
 
-/*
- * 3) Destination sign setting
- * a) Set shifted destination sign using XOR operation:
- * R = XOR( R, S );
- */
-        pxor      %xmm6, %xmm0
-        cmpnlepd  _dReductionRangeVal+__svml_dtan_data_internal(%rip), %xmm4
-        movmskpd  %xmm4, %edx
-        testl     %edx, %edx
+	/*
+	 * 3) Destination sign setting
+	 * a) Set shifted destination sign using XOR operation:
+	 * R = XOR( R, S );
+	 */
+	pxor	%xmm6, %xmm0
+	cmpnlepd _dReductionRangeVal+__svml_dtan_data_internal(%rip), %xmm4
+	movmskpd %xmm4, %edx
+	testl	%edx, %edx
 
-/* Go to auxilary branch */
-        jne       L(AUX_BRANCH)
-                                # LOE rbx rbp r12 r13 r14 r15 eax xmm0 xmm1 xmm4 xmm5
+	/* Go to auxilary branch */
+	jne	L(AUX_BRANCH)
+	# LOE rbx rbp r12 r13 r14 r15 eax xmm0 xmm1 xmm4 xmm5
 
-/* Return from auxilary branch
- * for out of main path inputs
- */
+	/* Return from auxilary branch
+	 * for out of main path inputs
+	 */
 
 L(AUX_BRANCH_RETURN):
-        testl     %eax, %eax
+	testl	%eax, %eax
 
-/* Go to special inputs processing branch */
-        jne       L(SPECIAL_VALUES_BRANCH)
-                                # LOE rbx rbp r12 r13 r14 r15 eax xmm0 xmm1
+	/* Go to special inputs processing branch */
+	jne	L(SPECIAL_VALUES_BRANCH)
+	# LOE rbx rbp r12 r13 r14 r15 eax xmm0 xmm1
 
-/* Restore registers
- * and exit the function
- */
+	/* Restore registers
+	 * and exit the function
+	 */
 
 L(EXIT):
-        addq      $72, %rsp
-        cfi_def_cfa_offset(8)
-        ret
-        cfi_def_cfa_offset(80)
+	addq	$72, %rsp
+	cfi_def_cfa_offset(8)
+	ret
+	cfi_def_cfa_offset(80)
 
-/* Branch to process
- * special inputs
- */
+	/* Branch to process
+	 * special inputs
+	 */
 
 L(SPECIAL_VALUES_BRANCH):
-        movups    %xmm1, 32(%rsp)
-        movups    %xmm0, 48(%rsp)
-                                # LOE rbx rbp r12 r13 r14 r15 eax xmm0
+	movups	%xmm1, 32(%rsp)
+	movups	%xmm0, 48(%rsp)
+	# LOE rbx rbp r12 r13 r14 r15 eax xmm0
 
-        xorl      %edx, %edx
-        movq      %r12, 16(%rsp)
-        cfi_offset(12, -64)
-        movl      %edx, %r12d
-        movq      %r13, 8(%rsp)
-        cfi_offset(13, -72)
-        movl      %eax, %r13d
-        movq      %r14, (%rsp)
-        cfi_offset(14, -80)
-                                # LOE rbx rbp r15 r12d r13d
+	xorl	%edx, %edx
+	movq	%r12, 16(%rsp)
+	cfi_offset(12, -64)
+	movl	%edx, %r12d
+	movq	%r13, 8(%rsp)
+	cfi_offset(13, -72)
+	movl	%eax, %r13d
+	movq	%r14, (%rsp)
+	cfi_offset(14, -80)
+	# LOE rbx rbp r15 r12d r13d
 
-/* Range mask
- * bits check
- */
+	/* Range mask
+	 * bits check
+	 */
 
 L(RANGEMASK_CHECK):
-        btl       %r12d, %r13d
+	btl	%r12d, %r13d
 
-/* Call scalar math function */
-        jc        L(SCALAR_MATH_CALL)
-                                # LOE rbx rbp r15 r12d r13d
+	/* Call scalar math function */
+	jc	L(SCALAR_MATH_CALL)
+	# LOE rbx rbp r15 r12d r13d
 
-/* Special inputs
- * processing loop
- */
+	/* Special inputs
+	 * processing loop
+	 */
 
 L(SPECIAL_VALUES_LOOP):
-        incl      %r12d
-        cmpl      $2, %r12d
+	incl	%r12d
+	cmpl	$2, %r12d
 
-/* Check bits in range mask */
-        jl        L(RANGEMASK_CHECK)
-                                # LOE rbx rbp r15 r12d r13d
+	/* Check bits in range mask */
+	jl	L(RANGEMASK_CHECK)
+	# LOE rbx rbp r15 r12d r13d
 
-        movq      16(%rsp), %r12
-        cfi_restore(12)
-        movq      8(%rsp), %r13
-        cfi_restore(13)
-        movq      (%rsp), %r14
-        cfi_restore(14)
-        movups    48(%rsp), %xmm0
+	movq	16(%rsp), %r12
+	cfi_restore(12)
+	movq	8(%rsp), %r13
+	cfi_restore(13)
+	movq	(%rsp), %r14
+	cfi_restore(14)
+	movups	48(%rsp), %xmm0
 
-/* Go to exit */
-        jmp       L(EXIT)
-        cfi_offset(12, -64)
-        cfi_offset(13, -72)
-        cfi_offset(14, -80)
-                                # LOE rbx rbp r12 r13 r14 r15 xmm0
+	/* Go to exit */
+	jmp	L(EXIT)
+	cfi_offset(12, -64)
+	cfi_offset(13, -72)
+	cfi_offset(14, -80)
+	# LOE rbx rbp r12 r13 r14 r15 xmm0
 
-/* Scalar math fucntion call
- * to process special input
- */
+	/* Scalar math fucntion call
+	 * to process special input
+	 */
 
 L(SCALAR_MATH_CALL):
-        movl      %r12d, %r14d
-        movsd     32(%rsp,%r14,8), %xmm0
-        call      tan@PLT
-                                # LOE rbx rbp r14 r15 r12d r13d xmm0
+	movl	%r12d, %r14d
+	movsd	32(%rsp, %r14, 8), %xmm0
+	call	tan@PLT
+	# LOE rbx rbp r14 r15 r12d r13d xmm0
 
-        movsd     %xmm0, 48(%rsp,%r14,8)
+	movsd	%xmm0, 48(%rsp, %r14, 8)
 
-/* Process special inputs in loop */
-        jmp       L(SPECIAL_VALUES_LOOP)
-        cfi_restore(12)
-        cfi_restore(13)
-        cfi_restore(14)
-                                # LOE rbx rbp r15 r12d r13d
+	/* Process special inputs in loop */
+	jmp	L(SPECIAL_VALUES_LOOP)
+	cfi_restore(12)
+	cfi_restore(13)
+	cfi_restore(14)
+	# LOE rbx rbp r15 r12d r13d
 
-/* Auxilary branch
- * for out of main path inputs
- */
+	/* Auxilary branch
+	 * for out of main path inputs
+	 */
 
 L(AUX_BRANCH):
-        movdqu    .FLT_17(%rip), %xmm3
+	movdqu	.FLT_17(%rip), %xmm3
 
-/*
- * Get the (2^a / 2pi) mod 1 values from the table.
- * Because doesn't have L-type gather, we need a trivial cast
- */
-        lea       __svml_dtan_reduction_data_internal(%rip), %r8
-        pand      %xmm1, %xmm3
-        psrlq     $52, %xmm3
+	/*
+	 * Get the (2^a / 2pi) mod 1 values from the table.
+	 * Because doesn't have L-type gather, we need a trivial cast
+	 */
+	lea	__svml_dtan_reduction_data_internal(%rip), %r8
+	pand	%xmm1, %xmm3
+	psrlq	$52, %xmm3
 
-/*
- * Also get the significand as an integer
- * NB: adding in the integer bit is wrong for denorms!
- * To make this work for denorms we should do something slightly different
- */
-        movdqu    .FLT_18(%rip), %xmm2
-        movd      %xmm3, %edx
-        pand      %xmm1, %xmm2
-        paddq     .FLT_19(%rip), %xmm2
-        pextrw    $4, %xmm3, %esi
-        movups    _dRangeVal+__svml_dtan_data_internal(%rip), %xmm10
-        lea       (%rdx,%rdx,2), %ecx
-        shll      $3, %ecx
-        lea       (%rsi,%rsi,2), %edi
-        shll      $3, %edi
-        movdqa    %xmm2, %xmm6
-        movq      16(%rcx,%r8), %xmm8
-        andps     %xmm10, %xmm5
-        movhpd    16(%rdi,%r8), %xmm8
-        psrlq     $32, %xmm6
-        movups    %xmm0, 16(%rsp)
-        movaps    %xmm8, %xmm0
+	/*
+	 * Also get the significand as an integer
+	 * NB: adding in the integer bit is wrong for denorms!
+	 * To make this work for denorms we should do something slightly different
+	 */
+	movdqu	.FLT_18(%rip), %xmm2
+	movd	%xmm3, %edx
+	pand	%xmm1, %xmm2
+	paddq	.FLT_19(%rip), %xmm2
+	pextrw	$4, %xmm3, %esi
+	movups	_dRangeVal+__svml_dtan_data_internal(%rip), %xmm10
+	lea	(%rdx, %rdx, 2), %ecx
+	shll	$3, %ecx
+	lea	(%rsi, %rsi, 2), %edi
+	shll	$3, %edi
+	movdqa	%xmm2, %xmm6
+	movq	16(%rcx, %r8), %xmm8
+	andps	%xmm10, %xmm5
+	movhpd	16(%rdi, %r8), %xmm8
+	psrlq	$32, %xmm6
+	movups	%xmm0, 16(%rsp)
+	movaps	%xmm8, %xmm0
 
-/*
- * Break the P_xxx and m into 32-bit chunks ready for
- * the long multiplication via 32x32->64 multiplications
- */
-        movdqu    .FLT_20(%rip), %xmm15
-        psrlq     $32, %xmm0
-        movq      8(%rcx,%r8), %xmm13
-        pand      %xmm15, %xmm2
-        cmpeqpd   %xmm10, %xmm5
-        movdqa    %xmm6, %xmm10
-        movdqa    %xmm2, %xmm11
-        movhpd    8(%rdi,%r8), %xmm13
-        pand      %xmm15, %xmm8
-        pmuludq   %xmm0, %xmm10
-        movaps    %xmm13, %xmm14
-        pmuludq   %xmm2, %xmm0
-        pmuludq   %xmm6, %xmm8
-        movmskpd  %xmm5, %eax
-        pand      %xmm15, %xmm13
-        psrlq     $32, %xmm0
-        pmuludq   %xmm13, %xmm11
-        psrlq     $32, %xmm14
-        pmuludq   %xmm6, %xmm13
-        paddq     %xmm0, %xmm10
-        movdqa    %xmm2, %xmm12
-        movdqa    %xmm15, %xmm3
-        pmuludq   %xmm14, %xmm12
-        pand      %xmm11, %xmm3
-        pmuludq   %xmm6, %xmm14
-        paddq     %xmm10, %xmm3
-        movq      (%rcx,%r8), %xmm7
-        movdqa    %xmm15, %xmm9
-        movhpd    (%rdi,%r8), %xmm7
-        psrlq     $32, %xmm8
-        psrlq     $32, %xmm11
-        pand      %xmm7, %xmm9
-        movdqa    %xmm2, %xmm5
-        movdqa    %xmm15, %xmm10
-        paddq     %xmm3, %xmm8
-        paddq     %xmm11, %xmm13
-        pmuludq   %xmm9, %xmm5
+	/*
+	 * Break the P_xxx and m into 32-bit chunks ready for
+	 * the long multiplication via 32x32->64 multiplications
+	 */
+	movdqu	.FLT_20(%rip), %xmm15
+	psrlq	$32, %xmm0
+	movq	8(%rcx, %r8), %xmm13
+	pand	%xmm15, %xmm2
+	cmpeqpd	%xmm10, %xmm5
+	movdqa	%xmm6, %xmm10
+	movdqa	%xmm2, %xmm11
+	movhpd	8(%rdi, %r8), %xmm13
+	pand	%xmm15, %xmm8
+	pmuludq	%xmm0, %xmm10
+	movaps	%xmm13, %xmm14
+	pmuludq	%xmm2, %xmm0
+	pmuludq	%xmm6, %xmm8
+	movmskpd %xmm5, %eax
+	pand	%xmm15, %xmm13
+	psrlq	$32, %xmm0
+	pmuludq	%xmm13, %xmm11
+	psrlq	$32, %xmm14
+	pmuludq	%xmm6, %xmm13
+	paddq	%xmm0, %xmm10
+	movdqa	%xmm2, %xmm12
+	movdqa	%xmm15, %xmm3
+	pmuludq	%xmm14, %xmm12
+	pand	%xmm11, %xmm3
+	pmuludq	%xmm6, %xmm14
+	paddq	%xmm10, %xmm3
+	movq	(%rcx, %r8), %xmm7
+	movdqa	%xmm15, %xmm9
+	movhpd	(%rdi, %r8), %xmm7
+	psrlq	$32, %xmm8
+	psrlq	$32, %xmm11
+	pand	%xmm7, %xmm9
+	movdqa	%xmm2, %xmm5
+	movdqa	%xmm15, %xmm10
+	paddq	%xmm3, %xmm8
+	paddq	%xmm11, %xmm13
+	pmuludq	%xmm9, %xmm5
 
-/* Now do the big multiplication and carry propagation */
-        pmuludq   %xmm9, %xmm6
-        pand      %xmm12, %xmm10
-        movaps    %xmm8, %xmm0
-        paddq     %xmm13, %xmm10
-        psrlq     $32, %xmm0
-        psrlq     $32, %xmm12
-        psrlq     $32, %xmm7
-        movdqa    %xmm15, %xmm11
-        paddq     %xmm10, %xmm0
-        paddq     %xmm12, %xmm14
-        pmuludq   %xmm7, %xmm2
-        pand      %xmm5, %xmm11
-        movdqa    %xmm0, %xmm13
-        paddq     %xmm14, %xmm11
-        psrlq     $32, %xmm13
-        psrlq     $32, %xmm5
-        paddq     %xmm11, %xmm13
-        paddq     %xmm5, %xmm6
-        pand      %xmm15, %xmm2
-        movdqa    %xmm13, %xmm3
-        paddq     %xmm6, %xmm2
-        psrlq     $32, %xmm3
-        pand      %xmm15, %xmm13
-        paddq     %xmm2, %xmm3
-        psllq     $32, %xmm3
+	/* Now do the big multiplication and carry propagation */
+	pmuludq	%xmm9, %xmm6
+	pand	%xmm12, %xmm10
+	movaps	%xmm8, %xmm0
+	paddq	%xmm13, %xmm10
+	psrlq	$32, %xmm0
+	psrlq	$32, %xmm12
+	psrlq	$32, %xmm7
+	movdqa	%xmm15, %xmm11
+	paddq	%xmm10, %xmm0
+	paddq	%xmm12, %xmm14
+	pmuludq	%xmm7, %xmm2
+	pand	%xmm5, %xmm11
+	movdqa	%xmm0, %xmm13
+	paddq	%xmm14, %xmm11
+	psrlq	$32, %xmm13
+	psrlq	$32, %xmm5
+	paddq	%xmm11, %xmm13
+	paddq	%xmm5, %xmm6
+	pand	%xmm15, %xmm2
+	movdqa	%xmm13, %xmm3
+	paddq	%xmm6, %xmm2
+	psrlq	$32, %xmm3
+	pand	%xmm15, %xmm13
+	paddq	%xmm2, %xmm3
+	psllq	$32, %xmm3
 
-/* Assemble reduced argument from the pieces */
-        pand      %xmm15, %xmm8
-        paddq     %xmm13, %xmm3
+	/* Assemble reduced argument from the pieces */
+	pand	%xmm15, %xmm8
+	paddq	%xmm13, %xmm3
 
-/*
- * We want to incorporate the original sign now too.
- * Do it here for convenience in getting the right N value,
- * though we could wait right to the end if we were prepared
- * to modify the sign of N later too.
- * So get the appropriate sign mask now (or sooner).
- */
-        movdqu    .FLT_21(%rip), %xmm9
-        movdqa    %xmm3, %xmm5
+	/*
+	 * We want to incorporate the original sign now too.
+	 * Do it here for convenience in getting the right N value,
+	 * though we could wait right to the end if we were prepared
+	 * to modify the sign of N later too.
+	 * So get the appropriate sign mask now (or sooner).
+	 */
+	movdqu	.FLT_21(%rip), %xmm9
+	movdqa	%xmm3, %xmm5
 
-/*
- * Create floating-point high part, implicitly adding integer bit 1
- * Incorporate overall sign at this stage too.
- */
-        movdqu    .FLT_22(%rip), %xmm15
-        pand      %xmm1, %xmm9
+	/*
+	 * Create floating-point high part, implicitly adding integer bit 1
+	 * Incorporate overall sign at this stage too.
+	 */
+	movdqu	.FLT_22(%rip), %xmm15
+	pand	%xmm1, %xmm9
 
-/*
- * Now round at the 2^-9 bit position for reduction mod pi/2^8
- * instead of the original 2pi (but still with the same 2pi scaling).
- * Use a shifter of 2^43 + 2^42.
- * The N we get is our final version; it has an offset of
- * 2^9 because of the implicit integer bit, and anyway for negative
- * starting value it's a 2s complement thing. But we need to mask
- * off the exponent part anyway so it's fine.
- */
-        movups    .FLT_23(%rip), %xmm12
-        psrlq     $12, %xmm5
-        pxor      %xmm9, %xmm15
-        movaps    %xmm12, %xmm10
-        por       %xmm15, %xmm5
-        psllq     $32, %xmm0
-        addpd     %xmm5, %xmm10
-        paddq     %xmm8, %xmm0
-        movaps    %xmm10, %xmm14
+	/*
+	 * Now round at the 2^-9 bit position for reduction mod pi/2^8
+	 * instead of the original 2pi (but still with the same 2pi scaling).
+	 * Use a shifter of 2^43 + 2^42.
+	 * The N we get is our final version; it has an offset of
+	 * 2^9 because of the implicit integer bit, and anyway for negative
+	 * starting value it's a 2s complement thing. But we need to mask
+	 * off the exponent part anyway so it's fine.
+	 */
+	movups	.FLT_23(%rip), %xmm12
+	psrlq	$12, %xmm5
+	pxor	%xmm9, %xmm15
+	movaps	%xmm12, %xmm10
+	por	%xmm15, %xmm5
+	psllq	$32, %xmm0
+	addpd	%xmm5, %xmm10
+	paddq	%xmm8, %xmm0
+	movaps	%xmm10, %xmm14
 
-/*  Load constants (not all needed at once)  */
-        lea       _dCoeffs+96+__svml_dtan_data_internal(%rip), %rdx
-        movdqu    .FLT_27(%rip), %xmm6
-        movdqu    .FLT_25(%rip), %xmm7
-        pand      %xmm3, %xmm6
+	/*  Load constants (not all needed at once)  */
+	lea	_dCoeffs+96+__svml_dtan_data_internal(%rip), %rdx
+	movdqu	.FLT_27(%rip), %xmm6
+	movdqu	.FLT_25(%rip), %xmm7
+	pand	%xmm3, %xmm6
 
-/*
- * Create floating-point low and medium parts, respectively
- * lo_23, ... lo_0, 0, ..., 0
- * hi_11, ... hi_0, lo_63, ..., lo_24
- * then subtract off the implicitly added integer bits,
- * 2^-104 and 2^-52, respectively.
- * Put the original sign into all of them at this stage.
- */
-        movdqu    .FLT_24(%rip), %xmm8
-        pand      %xmm0, %xmm7
-        subpd     %xmm12, %xmm14
-        psllq     $40, %xmm6
-        psrlq     $24, %xmm0
-        pxor      %xmm9, %xmm8
-        por       %xmm0, %xmm6
-        pxor      .FLT_26(%rip), %xmm9
-        psllq     $28, %xmm7
-        subpd     %xmm14, %xmm5
-        por       %xmm9, %xmm6
author	Sunil K Pandey <skpgkp2@gmail.com>	2022-03-07 10:47:15 -0800
committer	Sunil K Pandey <skpgkp2@gmail.com>	2022-03-07 21:44:09 -0800
commit	7f852d2592b50ef9c6daed656b8f33c65bfe594a (patch)
tree	a616daeb732d919c2f2b4aa389526f842b14b1d6
parent	160e183a9a1d2bb54fb899f86ab67ce516a95304 (diff)
download	glibc-7f852d2592b50ef9c6daed656b8f33c65bfe594a.tar.xz glibc-7f852d2592b50ef9c6daed656b8f33c65bfe594a.zip