diff options
| author | H.J. Lu <hjl.tools@gmail.com> | 2015-08-13 03:37:47 -0700 |
|---|---|---|
| committer | H.J. Lu <hjl.tools@gmail.com> | 2015-08-13 03:41:22 -0700 |
| commit | e2e4f56056adddc3c1efe676b40a4b4f2453103b (patch) | |
| tree | c9f54be6f6e8b57c8e58bdfac594aa3927378231 | |
| parent | 63e952d9be87db68f0e4164d4a5760b32e77ebff (diff) | |
| download | glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.tar.xz glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.zip | |
Add _dl_x86_cpu_features to rtld_global
This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so
and initializes it early before __libc_start_main is called so that
cpu_features is always available when it is used and we can avoid
calling __init_cpu_features in IFUNC selectors.
* sysdeps/i386/dl-machine.h: Include <cpu-features.c>.
(dl_platform_init): Call init_cpu_features.
* sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New.
* sysdeps/i386/i686/cacheinfo.c
(DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed.
* sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch.
* sysdeps/i386/i686/multiarch/Versions: Removed.
* sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET):
Removed.
* sysdeps/i386/ldsodefs.h: Include <cpu-features.h>.
* sysdeps/unix/sysv/linux/x86/Makefile
(libpthread-sysdep_routines): Remove init-arch.
* sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include
<sysdeps/x86_64/dl-procinfo.c> instead of
sysdeps/generic/dl-procinfo.c>.
* sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers):
Add cpu-features-offsets.sym and rtld-global-offsets.sym.
[$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features.
[$(subdir) == elf] (tests): Add tst-get-cpu-features.
[$(subdir) == elf] (tests-static): Add
tst-get-cpu-features-static.
* sysdeps/x86/Versions: New file.
* sysdeps/x86/cpu-features-offsets.sym: Likewise.
* sysdeps/x86/cpu-features.c: Likewise.
* sysdeps/x86/cpu-features.h: Likewise.
* sysdeps/x86/dl-get-cpu-features.c: Likewise.
* sysdeps/x86/libc-start.c: Likewise.
* sysdeps/x86/rtld-global-offsets.sym: Likewise.
* sysdeps/x86/tst-get-cpu-features-static.c: Likewise.
* sysdeps/x86/tst-get-cpu-features.c: Likewise.
* sysdeps/x86_64/dl-procinfo.c: Likewise.
* sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed.
Assume USE_MULTIARCH is defined and don't check it.
(is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features).
(is_amd): Likewise.
(max_cpuid): Likewise.
(intel_check_word): Likewise.
(__cache_sysconf): Don't call __init_cpu_features.
(__x86_preferred_memory_instruction): Removed.
(init_cacheinfo): Don't call __init_cpu_features. Replace
__cpu_features with GLRO(dl_x86_cpu_features).
* sysdeps/x86_64/dl-machine.h: <cpu-features.c>.
(dl_platform_init): Call init_cpu_features.
* sysdeps/x86_64/ldsodefs.h: Include <cpu-features.h>.
* sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch.
* sysdeps/x86_64/multiarch/Versions: Removed.
* sysdeps/x86_64/multiarch/cacheinfo.c: Likewise.
* sysdeps/x86_64/multiarch/init-arch.c: Likewise.
* sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET):
Removed.
* sysdeps/x86_64/multiarch/init-arch.h: Rewrite.
28 files changed, 565 insertions, 396 deletions
@@ -1,3 +1,57 @@ +2015-08-13 H.J. Lu <hongjiu.lu@intel.com> + + * sysdeps/i386/dl-machine.h: Include <cpu-features.c>. + (dl_platform_init): Call init_cpu_features. + * sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New. + * sysdeps/i386/i686/cacheinfo.c + (DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed. + * sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch. + * sysdeps/i386/i686/multiarch/Versions: Removed. + * sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET): + Removed. + * sysdeps/i386/ldsodefs.h: Include <cpu-features.h>. + * sysdeps/unix/sysv/linux/x86/Makefile + (libpthread-sysdep_routines): Remove init-arch. + * sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include + <sysdeps/x86_64/dl-procinfo.c> instead of + sysdeps/generic/dl-procinfo.c>. + * sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers): + Add cpu-features-offsets.sym and rtld-global-offsets.sym. + [$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features. + [$(subdir) == elf] (tests): Add tst-get-cpu-features. + [$(subdir) == elf] (tests-static): Add + tst-get-cpu-features-static. + * sysdeps/x86/Versions: New file. + * sysdeps/x86/cpu-features-offsets.sym: Likewise. + * sysdeps/x86/cpu-features.c: Likewise. + * sysdeps/x86/cpu-features.h: Likewise. + * sysdeps/x86/dl-get-cpu-features.c: Likewise. + * sysdeps/x86/libc-start.c: Likewise. + * sysdeps/x86/rtld-global-offsets.sym: Likewise. + * sysdeps/x86/tst-get-cpu-features-static.c: Likewise. + * sysdeps/x86/tst-get-cpu-features.c: Likewise. + * sysdeps/x86_64/dl-procinfo.c: Likewise. + * sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed. + Assume USE_MULTIARCH is defined and don't check it. + (is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features). + (is_amd): Likewise. + (max_cpuid): Likewise. + (intel_check_word): Likewise. + (__cache_sysconf): Don't call __init_cpu_features. + (__x86_preferred_memory_instruction): Removed. + (init_cacheinfo): Don't call __init_cpu_features. Replace + __cpu_features with GLRO(dl_x86_cpu_features). + * sysdeps/x86_64/dl-machine.h: <cpu-features.c>. + (dl_platform_init): Call init_cpu_features. + * sysdeps/x86_64/ldsodefs.h: Include <cpu-features.h>. + * sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch. + * sysdeps/x86_64/multiarch/Versions: Removed. + * sysdeps/x86_64/multiarch/cacheinfo.c: Likewise. + * sysdeps/x86_64/multiarch/init-arch.c: Likewise. + * sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET): + Removed. + * sysdeps/x86_64/multiarch/init-arch.h: Rewrite. + 2015-08-12 Paul Pluzhnikov <ppluzhnikov@google.com> [BZ #18820] diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index 04f9247bfa..4a28eb3765 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -25,6 +25,7 @@ #include <sysdep.h> #include <tls.h> #include <dl-tlsdesc.h> +#include <cpu-features.c> /* Return nonzero iff ELF header is compatible with the running host. */ static inline int __attribute__ ((unused)) @@ -235,6 +236,8 @@ dl_platform_init (void) if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') /* Avoid an empty string which would disturb us. */ GLRO(dl_platform) = NULL; + + init_cpu_features (&GLRO(dl_x86_cpu_features)); } static inline Elf32_Addr diff --git a/sysdeps/i386/dl-procinfo.c b/sysdeps/i386/dl-procinfo.c index b673b3c848..e95f3352aa 100644 --- a/sysdeps/i386/dl-procinfo.c +++ b/sysdeps/i386/dl-procinfo.c @@ -43,6 +43,22 @@ # define PROCINFO_CLASS #endif +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_cpu_features +# else +PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features +# endif +# ifndef PROCINFO_DECL += { } +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif + #if !defined PROCINFO_DECL && defined SHARED ._dl_x86_cap_flags #else diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c index 0f869df4d8..0b50c6d346 100644 --- a/sysdeps/i386/i686/cacheinfo.c +++ b/sysdeps/i386/i686/cacheinfo.c @@ -1,4 +1,3 @@ #define DISABLE_PREFETCHW -#define DISABLE_PREFERRED_MEMORY_INSTRUCTION #include <sysdeps/x86_64/cacheinfo.c> diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 11ce4ba1e3..31bfd39bae 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,5 +1,4 @@ ifeq ($(subdir),csu) -aux += init-arch tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym index eb1538abcc..96e9cfaf61 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-defines.sym +++ b/sysdeps/i386/i686/multiarch/ifunc-defines.sym @@ -4,7 +4,6 @@ -- CPU_FEATURES_SIZE sizeof (struct cpu_features) -KIND_OFFSET offsetof (struct cpu_features, kind) CPUID_OFFSET offsetof (struct cpu_features, cpuid) CPUID_SIZE sizeof (struct cpuid_registers) CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) diff --git a/sysdeps/i386/ldsodefs.h b/sysdeps/i386/ldsodefs.h index d80cf0185e..dae2d04ed6 100644 --- a/sysdeps/i386/ldsodefs.h +++ b/sysdeps/i386/ldsodefs.h @@ -20,6 +20,7 @@ #define _I386_LDSODEFS_H 1 #include <elf.h> +#include <cpu-features.h> struct La_i86_regs; struct La_i86_retval; diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile index d6be472612..9e6ec44b3a 100644 --- a/sysdeps/unix/sysv/linux/x86/Makefile +++ b/sysdeps/unix/sysv/linux/x86/Makefile @@ -15,7 +15,6 @@ sysdep_headers += sys/elf.h sys/perm.h sys/reg.h sys/vm86.h sys/debugreg.h sys/i endif ifeq ($(subdir),nptl) -libpthread-sysdep_routines += init-arch libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ elision-trylock endif diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c b/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c index 8ac351ec97..a3c0c1931e 100644 --- a/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c +++ b/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c @@ -1,5 +1,5 @@ #if IS_IN (ldconfig) # include <sysdeps/i386/dl-procinfo.c> #else -# include <sysdeps/generic/dl-procinfo.c> +# include <sysdeps/x86_64/dl-procinfo.c> #endif diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 19f5eca741..c262fdf487 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -8,3 +8,14 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/x86/tst-ld-sse-use.sh $(objpfx)ld.so $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ $(evaluate-test) endif + +ifeq ($(subdir),csu) +gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym +endif + +ifeq ($(subdir),elf) +sysdep-dl-routines += dl-get-cpu-features + +tests += tst-get-cpu-features +tests-static += tst-get-cpu-features-static +endif diff --git a/sysdeps/i386/i686/multiarch/Versions b/sysdeps/x86/Versions index 59b185ac8d..e02923708e 100644 --- a/sysdeps/i386/i686/multiarch/Versions +++ b/sysdeps/x86/Versions @@ -1,4 +1,4 @@ -libc { +ld { GLIBC_PRIVATE { __get_cpu_features; } diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym new file mode 100644 index 0000000000..a9d53d195f --- /dev/null +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86/cpu-features.c index aaad5fa841..587080c1a4 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86/cpu-features.c @@ -1,7 +1,6 @@ /* Initialize CPU feature data. This file is part of the GNU C Library. Copyright (C) 2008-2015 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -17,48 +16,40 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <atomic.h> #include <cpuid.h> -#include "init-arch.h" +#include <cpu-features.h> - -struct cpu_features __cpu_features attribute_hidden; - - -static void -get_common_indeces (unsigned int *family, unsigned int *model) +static inline void +get_common_indeces (struct cpu_features *cpu_features, + unsigned int *family, unsigned int *model) { - __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; *family = (eax >> 8) & 0x0f; *model = (eax >> 4) & 0x0f; } - -void -__init_cpu_features (void) +static inline void +init_cpu_features (struct cpu_features *cpu_features) { - unsigned int ebx; - unsigned int ecx; - unsigned int edx; + unsigned int ebx, ecx, edx; unsigned int family = 0; unsigned int model = 0; enum cpu_features_kind kind; - __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); + __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); /* This spells out "GenuineIntel". */ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) { kind = arch_kind_intel; - get_common_indeces (&family, &model); + get_common_indeces (cpu_features, &family, &model); - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; + unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax; unsigned int extended_family = (eax >> 20) & 0xff; unsigned int extended_model = (eax >> 12) & 0xf0; if (family == 0x0f) @@ -68,14 +59,14 @@ __init_cpu_features (void) } else if (family == 0x06) { - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; model += extended_model; switch (model) { case 0x1c: case 0x26: /* BSF is slow on Atom. */ - __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF; + cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF; break; case 0x37: @@ -91,7 +82,7 @@ __init_cpu_features (void) #if index_Fast_Unaligned_Load != index_Slow_SSE4_2 # error index_Fast_Unaligned_Load != index_Slow_SSE4_2 #endif - __cpu_features.feature[index_Fast_Unaligned_Load] + cpu_features->feature[index_Fast_Unaligned_Load] |= (bit_Fast_Unaligned_Load | bit_Prefer_PMINUB_for_stringop | bit_Slow_SSE4_2); @@ -121,7 +112,7 @@ __init_cpu_features (void) #if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop # error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop #endif - __cpu_features.feature[index_Fast_Rep_String] + cpu_features->feature[index_Fast_Rep_String] |= (bit_Fast_Rep_String | bit_Fast_Copy_Backward | bit_Fast_Unaligned_Load @@ -135,31 +126,31 @@ __init_cpu_features (void) { kind = arch_kind_amd; - get_common_indeces (&family, &model); + get_common_indeces (cpu_features, &family, &model); - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; unsigned int eax; __cpuid (0x80000000, eax, ebx, ecx, edx); if (eax >= 0x80000001) __cpuid (0x80000001, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); } else kind = arch_kind_other; - if (__cpu_features.max_cpuid >= 7) + if (cpu_features->max_cpuid >= 7) __cpuid_count (7, 0, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); /* Can we call xgetbv? */ - if (CPUID_OSXSAVE) + if (HAS_CPU_FEATURE (OSXSAVE)) { unsigned int xcrlow; unsigned int xcrhigh; @@ -169,15 +160,15 @@ __init_cpu_features (void) (bit_YMM_state | bit_XMM_state)) { /* Determine if AVX is usable. */ - if (CPUID_AVX) - __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; + if (HAS_CPU_FEATURE (AVX)) + cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable; #if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load # error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load #endif /* Determine if AVX2 is usable. Unaligned load with 256-bit AVX registers are faster on processors with AVX2. */ - if (CPUID_AVX2) - __cpu_features.feature[index_AVX2_Usable] + if (HAS_CPU_FEATURE (AVX2)) + cpu_features->feature[index_AVX2_Usable] |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled. */ @@ -186,38 +177,26 @@ __init_cpu_features (void) (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) { /* Determine if AVX512F is usable. */ - if (CPUID_AVX512F) + if (HAS_CPU_FEATURE (AVX512F)) { - __cpu_features.featur |
