diff options
| author | Igor Zamyatin <igor.zamyatin@intel.com> | 2014-03-13 11:10:22 -0700 |
|---|---|---|
| committer | H.J. Lu <hjl.tools@gmail.com> | 2014-03-13 11:19:08 -0700 |
| commit | 2d63a517e4084ec80403cd9f278690fa8b676cc4 (patch) | |
| tree | d5d5dd025d9a59bd41fedf9c16799271b5dbb722 | |
| parent | 44c4e5d598bfcbb309f05ceb7a57ab02662e7f34 (diff) | |
| download | glibc-2d63a517e4084ec80403cd9f278690fa8b676cc4.tar.xz glibc-2d63a517e4084ec80403cd9f278690fa8b676cc4.zip | |
Save and restore AVX-512 zmm registers to x86-64 ld.so
AVX-512 ISA adds 512-bit zmm registers. This patch updates
_dl_runtime_profile to pass zmm registers to run-time audit. It also
changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm
registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL
is used. Its performance impact is minimum.
* config.h.in (HAVE_AVX512_SUPPORT): New #undef.
(HAVE_AVX512_ASM_SUPPORT): Likewise.
* sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
(La_x86_64_vector): Add zmm.
* sysdeps/x86_64/Makefile (tests): Add tst-audit10.
(modules-names): Add tst-auditmod10a and tst-auditmod10b.
($(objpfx)tst-audit10): New target.
($(objpfx)tst-audit10.out): Likewise.
(tst-audit10-ENV): New.
(AVX512-CFLAGS): Likewise.
(CFLAGS-tst-audit10.c): Likewise.
(CFLAGS-tst-auditmod10a.c): Likewise.
(CFLAGS-tst-auditmod10b.c): Likewise.
* sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
AVX-512 zmm register support.
(_dl_x86_64_save_sse): Likewise.
(_dl_x86_64_restore_sse): Likewise.
* sysdeps/x86_64/dl-trampoline.h: Updated to support different
size vector registers.
* sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
(ZMM_SIZE): Likewise.
* sysdeps/x86_64/tst-audit10.c: New file.
* sysdeps/x86_64/tst-auditmod10a.c: Likewise.
* sysdeps/x86_64/tst-auditmod10b.c: Likewise.
| -rw-r--r-- | ChangeLog | 30 | ||||
| -rw-r--r-- | config.h.in | 6 | ||||
| -rw-r--r-- | sysdeps/x86/bits/link.h | 3 | ||||
| -rw-r--r-- | sysdeps/x86_64/Makefile | 15 | ||||
| -rw-r--r-- | sysdeps/x86_64/configure | 53 | ||||
| -rw-r--r-- | sysdeps/x86_64/configure.ac | 24 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 122 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-trampoline.h | 40 | ||||
| -rw-r--r-- | sysdeps/x86_64/link-defines.sym | 2 | ||||
| -rw-r--r-- | sysdeps/x86_64/tst-audit10.c | 70 | ||||
| -rw-r--r-- | sysdeps/x86_64/tst-auditmod10a.c | 65 | ||||
| -rw-r--r-- | sysdeps/x86_64/tst-auditmod10b.c | 219 |
12 files changed, 609 insertions, 40 deletions
@@ -1,3 +1,33 @@ +2014-03-13 Igor Zamyatin <igor.zamyatin@intel.com> + + * config.h.in (HAVE_AVX512_SUPPORT): New #undef. + (HAVE_AVX512_ASM_SUPPORT): Likewise. + * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New. + (La_x86_64_vector): Add zmm. + * sysdeps/x86_64/Makefile (tests): Add tst-audit10. + (modules-names): Add tst-auditmod10a and tst-auditmod10b. + ($(objpfx)tst-audit10): New target. + ($(objpfx)tst-audit10.out): Likewise. + (tst-audit10-ENV): New. + (AVX512-CFLAGS): Likewise. + (CFLAGS-tst-audit10.c): Likewise. + (CFLAGS-tst-auditmod10a.c): Likewise. + (CFLAGS-tst-auditmod10b.c): Likewise. + * sysdeps/x86_64/configure.ac: Set config-cflags-avx512, + HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT. + * sysdeps/x86_64/configure: Regenerated. + * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add + AVX-512 zmm register support. + (_dl_x86_64_save_sse): Likewise. + (_dl_x86_64_restore_sse): Likewise. + * sysdeps/x86_64/dl-trampoline.h: Updated to support different + size vector registers. + * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New. + (ZMM_SIZE): Likewise. + * sysdeps/x86_64/tst-audit10.c: New file. + * sysdeps/x86_64/tst-auditmod10a.c: Likewise. + * sysdeps/x86_64/tst-auditmod10b.c: Likewise. + 2014-03-13 Roland McGrath <roland@hack.frob.com> * configure.ac (HAVE_EHDR_START): New check. diff --git a/config.h.in b/config.h.in index ed3c5933eb..3fc34bdb1e 100644 --- a/config.h.in +++ b/config.h.in @@ -98,6 +98,12 @@ /* Define if gcc supports VEX encoding. */ #undef HAVE_SSE2AVX_SUPPORT +/* Define if compiler supports AVX512. */ +#undef HAVE_AVX512_SUPPORT + +/* Define if assembler supports AVX512. */ +#undef HAVE_AVX512_ASM_SUPPORT + /* Define if gcc supports FMA4. */ #undef HAVE_FMA4_SUPPORT diff --git a/sysdeps/x86/bits/link.h b/sysdeps/x86/bits/link.h index 4ebc5c1743..8673b2119b 100644 --- a/sysdeps/x86/bits/link.h +++ b/sysdeps/x86/bits/link.h @@ -66,6 +66,8 @@ __END_DECLS typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16))); typedef float La_x86_64_ymm __attribute__ ((__vector_size__ (32), __aligned__ (16))); +typedef double La_x86_64_zmm + __attribute__ ((__vector_size__ (64), __aligned__ (16))); # else typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__))); # endif @@ -74,6 +76,7 @@ typedef union { # if __GNUC_PREREQ (4,0) La_x86_64_ymm ymm[2]; + La_x86_64_zmm zmm[1]; # endif La_x86_64_xmm xmm[4]; } La_x86_64_vector __attribute__ ((__aligned__ (16))); diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 08db331923..58900a5c28 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -38,7 +38,7 @@ tests-pie += $(quad-pie-test) $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o -tests += tst-audit3 tst-audit4 tst-audit5 +tests += tst-audit3 tst-audit4 tst-audit5 tst-audit10 ifeq (yes,$(config-cflags-avx)) tests += tst-audit6 tst-audit7 endif @@ -46,7 +46,8 @@ modules-names += tst-auditmod3a tst-auditmod3b \ tst-auditmod4a tst-auditmod4b \ tst-auditmod5a tst-auditmod5b \ tst-auditmod6a tst-auditmod6b tst-auditmod6c \ - tst-auditmod7a tst-auditmod7b + tst-auditmod7a tst-auditmod7b \ + tst-auditmod10a tst-auditmod10b $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so @@ -69,6 +70,10 @@ $(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so $(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so +$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so +$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so +tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so + ifeq (yes,$(config-cflags-avx)) AVX-CFLAGS=-mavx ifeq (yes,$(config-cflags-novzeroupper)) @@ -81,6 +86,12 @@ CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS) endif +ifeq (yes,$(config-cflags-avx512)) +AVX512-CFLAGS = -mavx512f +CFLAGS-tst-audit10.c += $(AVX512-CFLAGS) +CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) +CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS) +endif endif ifeq ($(subdir),csu) diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 5a83a53aae..b931e682ea 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -95,6 +95,59 @@ fi config_vars="$config_vars config-cflags-avx = $libc_cv_cc_avx" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5 +$as_echo_n "checking for AVX512 support... " >&6; } +if ${libc_cv_cc_avx512+:} false; then : + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + libc_cv_cc_avx512=yes +else + libc_cv_cc_avx512=no +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5 +$as_echo "$libc_cv_cc_avx512" >&6; } +if test $libc_cv_cc_avx512 = yes; then + $as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h + +fi +config_vars="$config_vars +config-cflags-avx512 = $libc_cv_cc_avx512" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5 +$as_echo_n "checking for AVX512 support in assembler... " >&6; } +if ${libc_cv_asm_avx512+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.s <<\EOF + vmovdqu64 %zmm0, (%rsp) +EOF +if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_asm_avx512=yes +else + libc_cv_asm_avx512=no +fi +rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5 +$as_echo "$libc_cv_asm_avx512" >&6; } +if test $libc_cv_asm_avx512 == yes; then + $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h + +fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5 $as_echo_n "checking for AVX encoding of SSE instructions... " >&6; } if ${libc_cv_cc_sse2avx+:} false; then : diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index c682f93c3e..5e5d61b317 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -23,6 +23,30 @@ if test $libc_cv_cc_avx = yes; then fi LIBC_CONFIG_VAR([config-cflags-avx], [$libc_cv_cc_avx]) +dnl Check if -mavx512f works. +AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl +LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=yes], [libc_cv_cc_avx512=no]) +]) +if test $libc_cv_cc_avx512 = yes; then + AC_DEFINE(HAVE_AVX512_SUPPORT) +fi +LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512]) + +dnl Check if asm supports AVX512. +AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl +cat > conftest.s <<\EOF + vmovdqu64 %zmm0, (%rsp) +EOF +if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then + libc_cv_asm_avx512=yes +else + libc_cv_asm_avx512=no +fi +rm -f conftest*]) +if test $libc_cv_asm_avx512 == yes; then + AC_DEFINE(HAVE_AVX512_ASM_SUPPORT) +fi + dnl Check if -msse2avx works. AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl LIBC_TRY_CC_OPTION([-msse2avx], diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index ae38677e13..77c4d0f147 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -96,7 +96,7 @@ _dl_runtime_profile: /* Actively align the La_x86_64_regs structure. */ andq $0xfffffffffffffff0, %rsp -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers to detect if any xmm0-xmm7 registers are changed by audit module. */ @@ -130,7 +130,7 @@ _dl_runtime_profile: movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT .data L(have_avx): .zero 4 @@ -138,7 +138,7 @@ L(have_avx): .previous cmpl $0, L(have_avx)(%rip) - jne 1f + jne L(defined) movq %rbx, %r11 # Save rbx movl $1, %eax cpuid @@ -147,18 +147,54 @@ L(have_avx): // AVX and XSAVE supported? andl $((1 << 28) | (1 << 27)), %ecx cmpl $((1 << 28) | (1 << 27)), %ecx - jne 2f + jne 10f +# ifdef HAVE_AVX512_ASM_SUPPORT + // AVX512 supported in processor? + movq %rbx, %r11 # Save rbx + xorl %ecx, %ecx + mov $0x7, %eax + cpuid + andl $(1 << 16), %ebx +# endif xorl %ecx, %ecx // Get XFEATURE_ENABLED_MASK xgetbv - andl $0x6, %eax -2: subl $0x5, %eax +# ifdef HAVE_AVX512_ASM_SUPPORT + test %ebx, %ebx + movq %r11, %rbx # Restore rbx + je 20f + // Verify that XCR0[7:5] = '111b' and + // XCR0[2:1] = '11b' which means + // that zmm state is enabled + andl $0xe6, %eax + cmpl $0xe6, %eax + jne 20f + movl %eax, L(have_avx)(%rip) +L(avx512): +# define RESTORE_AVX +# define VMOV vmovdqu64 +# define VEC(i) zmm##i +# define MORE_CODE +# include "dl-trampoline.h" +# undef VMOV +# undef VEC +# undef RESTORE_AVX +# endif +20: andl $0x6, %eax +10: subl $0x5, %eax movl %eax, L(have_avx)(%rip) cmpl $0, %eax -1: js L(no_avx) +L(defined): + js L(no_avx) +# ifdef HAVE_AVX512_ASM_SUPPORT + cmpl $0xe6, L(have_avx)(%rip) + je L(avx512) +# endif # define RESTORE_AVX +# define VMOV vmovdqu +# define VEC(i) ymm##i # define MORE_CODE # include "dl-trampoline.h" @@ -180,9 +216,9 @@ L(no_avx): .align 16 cfi_startproc _dl_x86_64_save_sse: -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT cmpl $0, L(have_avx)(%rip) - jne 1f + jne L(defined_5) movq %rbx, %r11 # Save rbx movl $1, %eax cpuid @@ -191,21 +227,43 @@ _dl_x86_64_save_sse: // AVX and XSAVE supported? andl $((1 << 28) | (1 << 27)), %ecx cmpl $((1 << 28) | (1 << 27)), %ecx - jne 2f + jne 1f +# ifdef HAVE_AVX512_ASM_SUPPORT + // AVX512 supported in a processor? + movq %rbx, %r11 # Save rbx + xorl %ecx,%ecx + mov $0x7,%eax + cpuid + andl $(1 << 16), %ebx +# endif xorl %ecx, %ecx // Get XFEATURE_ENABLED_MASK xgetbv - andl $0x6, %eax - cmpl $0x6, %eax - // Nonzero if SSE and AVX state saving is enabled. - sete %al -2: leal -1(%eax,%eax), %eax +# ifdef HAVE_AVX512_ASM_SUPPORT + test %ebx, %ebx + movq %r11, %rbx # Restore rbx + je 2f + // Verify that XCR0[7:5] = '111b' and + // XCR0[2:1] = '11b' which means + // that zmm state is enabled + andl $0xe6, %eax + movl %eax, L(have_avx)(%rip) + cmpl $0xe6, %eax + je L(avx512_5) +# endif + +2: andl $0x6, %eax +1: subl $0x5, %eax movl %eax, L(have_avx)(%rip) cmpl $0, %eax -1: js L(no_avx5) +L(defined_5): + js L(no_avx5) +# ifdef HAVE_AVX512_ASM_SUPPORT + cmpl $0xe6, L(have_avx)(%rip) + je L(avx512_5) +# endif -# define YMM_SIZE 32 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE @@ -215,6 +273,18 @@ _dl_x86_64_save_sse: vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE ret +# ifdef HAVE_AVX512_ASM_SUPPORT +L(avx512_5): + vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE + vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE + vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE + vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE + vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE + vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE + vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE + vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE + ret +# endif L(no_avx5): # endif movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE @@ -235,9 +305,13 @@ L(no_avx5): .align 16 cfi_startproc _dl_x86_64_restore_sse: -# ifdef HAVE_AVX_SUPPORT +# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT cmpl $0, L(have_avx)(%rip) js L(no_avx6) +# ifdef HAVE_AVX512_ASM_SUPPORT + cmpl $0xe6, L(have_avx)(%rip) + je L(avx512_6) +# endif vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 @@ -248,6 +322,18 @@ _dl_x86_64_restore_sse: vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 ret +# ifdef HAVE_AVX512_ASM_SUPPORT +L(avx512_6): + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6 + vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7 + ret +# endif L(no_avx6): # endif movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index 5d1b75ff54..161af0ff57 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -19,14 +19,14 @@ #ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) - vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) - vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) - vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) - vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) - vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) - vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) - vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) + VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) /* Save xmm0-xmm7 registers to detect if any of them are changed by audit module. */ @@ -72,7 +72,7 @@ je 2f vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 +2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 @@ -81,7 +81,7 @@ je 2f vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 @@ -90,7 +90,7 @@ je 2f vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 @@ -99,7 +99,7 @@ je 2f vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 @@ -108,7 +108,7 @@ je 2f vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 @@ -117,7 +117,7 @@ je 2f vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 @@ -126,7 +126,7 @@ je 2f vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 @@ -135,7 +135,7 @@ je 2f vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) jmp 1f -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 1: @@ -213,8 +213,8 @@ #ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) - vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) + VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) + VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) /* Save xmm0/xmm1 registers to detect if they are changed by audit module. */ @@ -243,13 +243,13 @@ vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 + VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 + VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) 1: #endif diff --git a/sysdeps/x86_64/link-defines.sym b/sysdeps/x86_64/link-defines.sym index 1694d883ad..85d35ad034 100644 --- a/sysdeps/x86_64/link-defines.sym +++ b/sysdeps/x86_64/link-defines.sym @@ -4,6 +4,8 @@ -- VECTOR_SIZE sizeof (La_x86_64_vector) XMM_SIZE sizeof (La_x86_64_xmm) +YMM_SIZE sizeof (La_x86_64_ymm) +ZMM_SIZE sizeof (La_x86_64_zmm) LR_SIZE sizeof (struct La_x86_64_regs) LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx) diff --git a/sysdeps/x86_64/tst-audit10.c b/sysdeps/x86_64/tst-audit10.c new file mode 100644 index 0000000000..24c9696839 --- /dev/null +++ b/sysdeps/x86_64/tst-audit10.c @@ -0,0 +1,70 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Publi |
