diff options
| author | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-08-02 16:35:25 +0300 |
|---|---|---|
| committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-08-02 16:35:25 +0300 |
| commit | 533f9bebf969060e64c66681e275c03d6e49fcc9 (patch) | |
| tree | 500694859540a9f7583760f1a99af70044ca726b | |
| parent | f88aab5d508c13ae4a88124e65773d7d827cd47b (diff) | |
| download | glibc-533f9bebf969060e64c66681e275c03d6e49fcc9.tar.xz glibc-533f9bebf969060e64c66681e275c03d6e49fcc9.zip | |
x86_64: Call finite scalar versions in vectorized log, pow, exp (bz #20033).
Vector math functions require -ffast-math which sets -ffinite-math-only,
so it is needed to call finite scalar versions (which are called from
vector functions in some cases).
Since finite version of pow() returns qNaN instead of 1.0 for several
inputs, those inputs are excluded for tests of vector math functions.
[BZ #20033]
* sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S: Call
finite version.
* sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_exp2_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_log2_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_pow2_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_expf4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_logf4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_powf4_core.S: Likewise.
* math/libm-test.inc (pow_test_data): Exclude tests for qNaN
in power zero.
26 files changed, 93 insertions, 58 deletions
@@ -1,3 +1,34 @@ +2016-08-02 Andrew Senkevich <andrew.senkevich@intel.com> + + [BZ #20033] + * sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S: Call + finite version. + * sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_exp2_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_log2_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_pow2_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_expf4_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_logf4_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_powf4_core.S: Likewise. + * math/libm-test.inc (pow_test_data): Exclude tests for qNaN + in zero power. + 2016-08-02 Florian Weimer <fweimer@redhat.com> [BZ #20370] diff --git a/math/libm-test.inc b/math/libm-test.inc index 4ac7a0c80d..117057c315 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -179,6 +179,7 @@ #define IGNORE_RESULT 0x20000 #define NON_FINITE 0x40000 #define TEST_SNAN 0x80000 +#define NO_TEST_MATHVEC 0x100000 #define __CONCATX(a,b) __CONCAT(a,b) @@ -1056,6 +1057,9 @@ enable_test (int exceptions) return 0; if (!SNAN_TESTS (FLOAT) && (exceptions & TEST_SNAN) != 0) return 0; + if (TEST_MATHVEC && (exceptions & NO_TEST_MATHVEC) != 0) + return 0; + return 1; } @@ -10631,10 +10635,10 @@ nexttoward_test (void) static const struct test_ff_f_data pow_test_data[] = { - TEST_ff_f (pow, qnan_value, 0, 1, ERRNO_UNCHANGED), - TEST_ff_f (pow, -qnan_value, 0, 1, ERRNO_UNCHANGED), - TEST_ff_f (pow, qnan_value, minus_zero, 1, ERRNO_UNCHANGED), - TEST_ff_f (pow, -qnan_value, minus_zero, 1, ERRNO_UNCHANGED), + TEST_ff_f (pow, qnan_value, 0, 1, ERRNO_UNCHANGED|NO_TEST_MATHVEC), + TEST_ff_f (pow, -qnan_value, 0, 1, ERRNO_UNCHANGED|NO_TEST_MATHVEC), + TEST_ff_f (pow, qnan_value, minus_zero, 1, ERRNO_UNCHANGED|NO_TEST_MATHVEC), + TEST_ff_f (pow, -qnan_value, minus_zero, 1, ERRNO_UNCHANGED|NO_TEST_MATHVEC), TEST_ff_f (pow, 1.1L, plus_infty, plus_infty, ERRNO_UNCHANGED|NO_TEST_INLINE), TEST_ff_f (pow, plus_infty, plus_infty, plus_infty, ERRNO_UNCHANGED|NO_TEST_INLINE), diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S index 9a779593cd..1e119e24a6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S @@ -207,7 +207,7 @@ ENTRY (_ZGVbN2v_exp_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -217,7 +217,7 @@ ENTRY (_ZGVbN2v_exp_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S index 2a35fe3846..f350800dcb 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S @@ -193,7 +193,7 @@ ENTRY (_ZGVdN4v_exp_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -204,7 +204,7 @@ ENTRY (_ZGVdN4v_exp_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S index ea840911e7..18fb059a6f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S @@ -223,7 +223,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -231,7 +231,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif @@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp vmovsd 1160(%rsp,%r15), %xmm0 vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp vmovsd 1152(%rsp,%r15), %xmm0 vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call JUMPTARGET(exp) + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S index 67959729d7..67876997c8 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S @@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_log_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_log_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S index 267dae0a1f..8d21e5e94f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S @@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_log_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_log_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S index 62854bb07d..cf2da9d769 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S @@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -230,7 +230,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif @@ -443,7 +443,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call JUMPTARGET(log) + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S index 699f74ed44..c642f940cc 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S @@ -413,7 +413,7 @@ ENTRY (_ZGVbN2vv_pow_sse4) movsd 72(%rsp,%r15), %xmm0 movsd 136(%rsp,%r15), %xmm1 - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) movsd %xmm0, 200(%rsp,%r15) jmp .LBL_1_8 @@ -424,7 +424,7 @@ ENTRY (_ZGVbN2vv_pow_sse4) movsd 64(%rsp,%r15), %xmm0 movsd 128(%rsp,%r15), %xmm1 - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) movsd %xmm0, 192(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S index 35ba076caa..8ad9a57543 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S @@ -367,7 +367,7 @@ ENTRY (_ZGVdN4vv_pow_avx2) vmovsd 264(%rsp,%r15), %xmm1 vzeroupper - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 328(%rsp,%r15) jmp .LBL_1_8 @@ -379,7 +379,7 @@ ENTRY (_ZGVdN4vv_pow_avx2) vmovsd 256(%rsp,%r15), %xmm1 vzeroupper - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 320(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S index c6b6474438..026feee1ca 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S @@ -392,7 +392,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 vmovsd 1224(%rsp,%r15), %xmm1 - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_1_8 @@ -401,7 +401,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 vmovsd 1216(%rsp,%r15), %xmm1 - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_1_7 @@ -720,7 +720,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_2_8 @@ -732,7 +732,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call JUMPTARGET(pow) + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S index 18b8a5e3af..42bd67096d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S @@ -212,14 +212,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call JUMPTARGET(expf) + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call JUMPTARGET(expf) + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 @@ -422,7 +422,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call JUMPTARGET(expf) + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -433,7 +433,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call JUMPTARGET(expf) + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S index d3db509ec4..59933da08b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S @@ -195,7 +195,7 @@ ENTRY (_ZGVbN4v_expf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call JUMPTARGET(expf) + call JUMPTARGET(__expf_finite) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -204,7 +204,7 @@ ENTRY (_ZGVbN4v_expf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call JUMPTARGET(expf) + call JUMPTARGET(__expf_finite) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2. |
