diff options
| author | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-07-01 14:15:38 +0300 |
|---|---|---|
| committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2016-07-01 14:15:38 +0300 |
| commit | ee2196bb6766ca7e63a1ba22ebb7619a3266776a (patch) | |
| tree | a99accc0d97a405f535249efd7657de270726850 | |
| parent | fd1cf1dc3b2d90c2a61332363feb1043f6916564 (diff) | |
| download | glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.tar.xz glibc-ee2196bb6766ca7e63a1ba22ebb7619a3266776a.zip | |
Fixed wrong vector sincos/sincosf ABI to have it compatible with
current vector function declaration "#pragma omp declare simd notinbranch",
according to which vector sincos should have vector of pointers for second and
third parameters. It is fixed with implementation as wrapper to version
having second and third parameters as pointers.
[BZ #20024]
* sysdeps/x86/fpu/test-math-vector-sincos.h: New.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Fixed ABI
of this implementation of vector function.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos2_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf8_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S: Likewise.
* sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Use another wrapper
for testing vector sincos with fixed ABI.
* sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c: New test.
* sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c: Likewise.
* sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c: Likewise.
* sysdeps/x86_64/fpu/test-double-libmvec-sincos.c: Likewise.
* sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c: Likewise.
* sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c: Likewise.
* sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c: Likewise.
* sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c: Likewise.
* sysdeps/x86_64/fpu/Makefile: Added new tests.
33 files changed, 2587 insertions, 39 deletions
@@ -1,3 +1,42 @@ +2016-07-01 Andrew Senkevich <andrew.senkevich@intel.com> + + [BZ #20024] + * sysdeps/x86/fpu/test-math-vector-sincos.h: New. + * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Fixed ABI + of this implementation of vector function. + * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S: + Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_sincos2_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_sincos4_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_sincosf4_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_sincosf8_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Use another wrapper + for testing vector sincos with fixed ABI. + * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c: New test. + * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c: Likewise. + * sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c: Likewise. + * sysdeps/x86_64/fpu/test-double-libmvec-sincos.c: Likewise. + * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c: Likewise. + * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c: Likewise. + * sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c: Likewise. + * sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c: Likewise. + * sysdeps/x86_64/fpu/Makefile: Added new tests. + 2016-06-30 Aurelien Jarno <aurelien@aurel32.net> * sysdeps/unix/sysv/linux/sparc/sparc64/localplt.data: Add _Qp_cmp. diff --git a/sysdeps/x86/fpu/test-math-vector-sincos.h b/sysdeps/x86/fpu/test-math-vector-sincos.h new file mode 100644 index 0000000000..0263fc5935 --- /dev/null +++ b/sysdeps/x86/fpu/test-math-vector-sincos.h @@ -0,0 +1,98 @@ +/* Wrappers definitions for tests of ABI of vector sincos/sincosf having + vector declaration "#pragma omp declare simd notinbranch". + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define INIT_VEC_PTRS_LOOP(vec, val, len) \ + do \ + { \ + for (i = 0; i < len; i++) \ + { \ + vec[i] = &val[i]; \ + } \ + } \ + while (0) + +/* Wrapper for vector sincos/sincosf compatible with x86_64 and x32 variants + of _ZGVbN2vvv_sincos, _ZGVdN4vvv_sincos, _ZGVeN8vvv_sincos; + x32 variants of _ZGVbN4vvv_sincosf, _ZGVcN4vvv_sincos, _ZGVdN8vvv_sincosf, + _ZGVeN16vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_2(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN], r1_loc[VEC_LEN]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (((FLOAT **) &mr), r_loc, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (((FLOAT **) &mr1), r1_loc, VEC_LEN); \ + vector_func (mx, mr, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} + +/* Wrapper for vector sincos/sincosf compatible with x86_64 variants of + _ZGVcN4vvv_sincos, _ZGVeN16vvv_sincosf, _ZGVbN4vvv_sincosf, + _ZGVdN8vvv_sincosf, _ZGVcN8vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_3(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN/2], r1_loc[VEC_LEN/2]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (((FLOAT **) &mr), r_loc, VEC_LEN/2); \ + INIT_VEC_PTRS_LOOP (((FLOAT **) &mr1), r1_loc, VEC_LEN/2); \ + vector_func (mx, mr, mr, mr1, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN/2); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN/2); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} + +/* Wrapper for vector sincosf compatible with x86_64 variant of + _ZGVcN8vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_4(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN/4], r1_loc[VEC_LEN/4]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (((FLOAT **) &mr), r_loc, VEC_LEN/4); \ + INIT_VEC_PTRS_LOOP (((FLOAT **) &mr1), r1_loc, VEC_LEN/4); \ + vector_func (mx, mr, mr, mr, mr, mr1, mr1, mr1, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN/4); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN/4); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index 36c4ae99a2..034e1158a9 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -35,15 +35,16 @@ tests += test-double-libmvec-alias test-double-libmvec-alias-avx \ test-double-libmvec-alias-avx-main test-double-libmvec-alias-avx2-main \ test-float-libmvec-alias test-float-libmvec-alias-avx \ test-float-libmvec-alias-avx2 test-float-libmvec-alias-main \ - test-float-libmvec-alias-avx-main test-float-libmvec-alias-avx2-main - + test-float-libmvec-alias-avx-main test-float-libmvec-alias-avx2-main \ + test-double-libmvec-sincos test-double-libmvec-sincos-avx \ + test-double-libmvec-sincos-avx2 test-float-libmvec-sincosf \ + test-float-libmvec-sincosf-avx test-float-libmvec-sincosf-avx2 modules-names += test-double-libmvec-alias-mod \ test-double-libmvec-alias-avx-mod \ test-double-libmvec-alias-avx2-mod \ test-float-libmvec-alias-mod \ test-float-libmvec-alias-avx-mod \ test-float-libmvec-alias-avx2-mod - test-double-libmvec-alias-mod.so-no-z-defs = yes test-double-libmvec-alias-avx-mod.so-no-z-defs = yes test-double-libmvec-alias-avx2-mod.so-no-z-defs = yes @@ -105,12 +106,32 @@ $(objpfx)test-float-libmvec-alias-avx2-main: \ $(objpfx)test-float-libmvec-alias-avx2-mod.os \ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) +$(objpfx)test-double-libmvec-sincos: \ + $(objpfx)test-double-libmvec-sincos.o $(libmvec) + +$(objpfx)test-double-libmvec-sincos-avx: \ + $(objpfx)test-double-libmvec-sincos-avx.o $(libmvec) + +$(objpfx)test-double-libmvec-sincos-avx2: \ + $(objpfx)test-double-libmvec-sincos-avx2.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf: \ + $(objpfx)test-float-libmvec-sincosf.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf-avx: \ + $(objpfx)test-float-libmvec-sincosf-avx.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf-avx2: \ + $(objpfx)test-float-libmvec-sincosf-avx2.o $(libmvec) + ifeq (yes,$(config-cflags-avx512)) libmvec-tests += double-vlen8 float-vlen16 tests += test-double-libmvec-alias-avx512 \ test-float-libmvec-alias-avx512 \ test-double-libmvec-alias-avx512-main \ - test-float-libmvec-alias-avx512-main + test-float-libmvec-alias-avx512-main \ + test-double-libmvec-sincos-avx512 \ + test-float-libmvec-sincosf-avx512 modules-names += test-double-libmvec-alias-avx512-mod \ test-float-libmvec-alias-avx512-mod test-double-libmvec-alias-avx512-mod.so-no-z-defs = yes @@ -133,6 +154,12 @@ $(objpfx)test-float-libmvec-alias-avx512-mod.so: \ $(objpfx)test-float-libmvec-alias-avx512-main: \ $(objpfx)test-float-libmvec-alias-avx512-mod.os \ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-sincos-avx512: \ + $(objpfx)test-double-libmvec-sincos-avx512.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf-avx512: \ + $(objpfx)test-float-libmvec-sincosf-avx512.o $(libmvec) endif double-vlen4-arch-ext-cflags = -mavx @@ -143,8 +170,8 @@ float-vlen8-arch-ext-cflags = -mavx float-vlen8-arch-ext2-cflags = -mavx2 float-vlen16-arch-ext-cflags = -mavx512f -libmvec-alias-cflags = $(libm-test-fast-math-cflags) -fno-inline -fopenmp \ - -ffloat-store -Wno-unknown-pragmas -ffinite-math-only +libmvec-sincos-cflags = $(libm-test-fast-math-cflags) -fno-inline -fopenmp -Wno-unknown-pragmas +libmvec-alias-cflags = $(libmvec-sincos-cflags) -ffloat-store -ffinite-math-only CFLAGS-test-double-libmvec-alias-mod.c = $(libmvec-alias-cflags) CFLAGS-test-double-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX @@ -162,5 +189,14 @@ CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags) CFLAGS-test-float-vlen8-avx2.c = $(libm-test-vec-cflags) CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags) +CFLAGS-test-double-libmvec-sincos.c = $(libmvec-sincos-cflags) +CFLAGS-test-double-libmvec-sincos-avx.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext-cflags) -DREQUIRE_AVX +CFLAGS-test-double-libmvec-sincos-avx2.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext2-cflags) -DREQUIRE_AVX2 +CFLAGS-test-double-libmvec-sincos-avx512.c = $(libmvec-sincos-cflags) $(double-vlen8-arch-ext-cflags) -DREQUIRE_AVX512F + +CFLAGS-test-float-libmvec-sincosf.c = $(libmvec-sincos-cflags) +CFLAGS-test-float-libmvec-sincosf-avx.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext-cflags) -DREQUIRE_AVX +CFLAGS-test-float-libmvec-sincosf-avx2.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext2-cflags) -DREQUIRE_AVX2 +CFLAGS-test-float-libmvec-sincosf-avx512.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags) -DREQUIRE_AVX512F endif endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index d37275d7ab..6dfc61ee93 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVbN2vvv_sincos_sse4) +ENTRY (_ZGVbN2vl8l8_sincos_sse4) /* ALGORITHM DESCRIPTION: @@ -311,4 +311,58 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVbN2vl8l8_sincos_sse4) +libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVbN2vvv_sincos_sse4) +#ifndef __ILP32__ + subq $72, %rsp + .cfi_def_cfa_offset 80 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movq 32(%rsp), %rdx + movq 48(%rsp), %rsi + movq 40(%rsp), %r8 + movq 56(%rsp), %r10 + movq (%rsp), %rax + movq 16(%rsp), %rcx + movq 8(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq %r9, (%r10) + addq $72, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movdqa 16(%esp), %xmm1 + movsd 32(%esp), %xmm0 + movq %xmm1, %rax + movdqa (%esp), %xmm2 + movsd %xmm0, (%eax) + movsd 40(%esp), %xmm0 + pextrd $1, %xmm1, %eax + movsd %xmm0, (%eax) + movsd 48(%esp), %xmm0 + movq %xmm2, %rax + movsd %xmm0, (%eax) + movsd 56(%esp), %xmm0 + pextrd $1, %xmm2, %eax + movsd %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN2vvv_sincos_sse4) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S index 24b57f4e8c..12f60100fa 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVdN4vvv_sincos_avx2) +ENTRY (_ZGVdN4vl8l8_sincos_avx2) /* ALGORITHM DESCRIPTION: @@ -274,4 +274,100 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 |
