diff options
| author | H.J. Lu <hjl.tools@gmail.com> | 2018-12-03 05:54:43 -0800 |
|---|---|---|
| committer | H.J. Lu <hjl.tools@gmail.com> | 2018-12-03 05:54:56 -0800 |
| commit | c22e4c2a1431c5e77bf4288d35bf7629f2f093aa (patch) | |
| tree | 666ae70088b899ad87a250a08b3be7a24bbc0157 | |
| parent | 7b36d26b22d147ffc347f427f9fd584700578a94 (diff) | |
| download | glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.tar.xz glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.zip | |
x86: Extend CPUID support in struct cpu_features
Extend CPUID support for all feature bits from CPUID. Add a new macro,
CPU_FEATURE_USABLE, which can be used to check if a feature is usable at
run-time, instead of HAS_CPU_FEATURE and HAS_ARCH_FEATURE.
Add COMMON_CPUID_INDEX_D_ECX_1, COMMON_CPUID_INDEX_80000007 and
COMMON_CPUID_INDEX_80000008 to check CPU feature bits in them.
Tested on i686 and x86-64 as well as using build-many-glibcs.py with
x86 targets.
* sysdeps/x86/cacheinfo.c (intel_check_word): Updated for
cpu_features_basic.
(__cache_sysconf): Likewise.
(init_cacheinfo): Likewise.
* sysdeps/x86/cpu-features.c (get_extended_indeces): Also
populate COMMON_CPUID_INDEX_80000007 and
COMMON_CPUID_INDEX_80000008.
(get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1.
Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if
XSAVEC is available. Set the bit_arch_XXX_Usable bits.
(init_cpu_features): Use _Static_assert on
index_arch_Fast_Unaligned_Load.
__get_cpuid_registers and __get_arch_feature. Updated for
cpu_features_basic. Set stepping in cpu_features.
* sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum.
(FEATURE_INDEX_2): New.
(FEATURE_INDEX_MAX): Changed to enum.
(COMMON_CPUID_INDEX_D_ECX_1): New.
(COMMON_CPUID_INDEX_80000007): Likewise.
(COMMON_CPUID_INDEX_80000008): Likewise.
(cpuid_registers): Likewise.
(cpu_features_basic): Likewise.
(CPU_FEATURE_USABLE): Likewise.
(bit_arch_XXX_Usable): Likewise.
(cpu_features): Use cpuid_registers and cpu_features_basic.
(bit_arch_XXX): Reweritten.
(bit_cpu_XXX): Likewise.
(index_cpu_XXX): Likewise.
(reg_XXX): Likewise.
* sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and
<support/check.h>.
(CHECK_CPU_FEATURE): New.
(CHECK_CPU_FEATURE_USABLE): Likewise.
(cpu_kinds): Likewise.
(do_test): Print vendor, family, model and stepping. Check
HAS_CPU_FEATURE and CPU_FEATURE_USABLE.
(TEST_FUNCTION): Removed.
Include <support/test-driver.c> instead of
"../../test-skeleton.c".
* sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount):
Check POPCNT instead of POPCOUNT.
* sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.
| -rw-r--r-- | ChangeLog | 45 | ||||
| -rw-r--r-- | sysdeps/x86/cacheinfo.c | 20 | ||||
| -rw-r--r-- | sysdeps/x86/cpu-features.c | 141 | ||||
| -rw-r--r-- | sysdeps/x86/cpu-features.h | 1063 | ||||
| -rw-r--r-- | sysdeps/x86/tst-get-cpu-features.c | 264 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/sched_cpucount.c | 2 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/test-multiarch.c | 4 |
7 files changed, 1294 insertions, 245 deletions
@@ -1,3 +1,48 @@ +2018-12-03 H.J. Lu <hongjiu.lu@intel.com> + + * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for + cpu_features_basic. + (__cache_sysconf): Likewise. + (init_cacheinfo): Likewise. + * sysdeps/x86/cpu-features.c (get_extended_indeces): Also + populate COMMON_CPUID_INDEX_80000007 and + COMMON_CPUID_INDEX_80000008. + (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1. + Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if + XSAVEC is available. Set the bit_arch_XXX_Usable bits. + (init_cpu_features): Use _Static_assert on + index_arch_Fast_Unaligned_Load. + __get_cpuid_registers and __get_arch_feature. Updated for + cpu_features_basic. Set stepping in cpu_features. + * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum. + (FEATURE_INDEX_2): New. + (FEATURE_INDEX_MAX): Changed to enum. + (COMMON_CPUID_INDEX_D_ECX_1): New. + (COMMON_CPUID_INDEX_80000007): Likewise. + (COMMON_CPUID_INDEX_80000008): Likewise. + (cpuid_registers): Likewise. + (cpu_features_basic): Likewise. + (CPU_FEATURE_USABLE): Likewise. + (bit_arch_XXX_Usable): Likewise. + (cpu_features): Use cpuid_registers and cpu_features_basic. + (bit_arch_XXX): Reweritten. + (bit_cpu_XXX): Likewise. + (index_cpu_XXX): Likewise. + (reg_XXX): Likewise. + * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and + <support/check.h>. + (CHECK_CPU_FEATURE): New. + (CHECK_CPU_FEATURE_USABLE): Likewise. + (cpu_kinds): Likewise. + (do_test): Print vendor, family, model and stepping. Check + HAS_CPU_FEATURE and CPU_FEATURE_USABLE. + (TEST_FUNCTION): Removed. + Include <support/test-driver.c> instead of + "../../test-skeleton.c". + * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount): + Check POPCNT instead of POPCOUNT. + * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise. + 2018-12-03 Samuel Thibault <samuel.thibault@ens-lyon.org> * scripts/gen-as-const.py (main): Avoid emitting empty line when diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index b9444ddd52..58f0a3ccfb 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -205,8 +205,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, /* Intel reused this value. For family 15, model 6 it specifies the 3rd level cache. Otherwise the 2nd level cache. */ - unsigned int family = cpu_features->family; - unsigned int model = cpu_features->model; + unsigned int family = cpu_features->basic.family; + unsigned int model = cpu_features->basic.model; if (family == 15 && model == 6) { @@ -258,7 +258,7 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, static long int __attribute__ ((noinline)) handle_intel (int name, const struct cpu_features *cpu_features) { - unsigned int maxidx = cpu_features->max_cpuid; + unsigned int maxidx = cpu_features->basic.max_cpuid; /* Return -1 for older CPUs. */ if (maxidx < 2) @@ -443,10 +443,10 @@ __cache_sysconf (int name) { const struct cpu_features *cpu_features = __get_cpu_features (); - if (cpu_features->kind == arch_kind_intel) + if (cpu_features->basic.kind == arch_kind_intel) return handle_intel (name, cpu_features); - if (cpu_features->kind == arch_kind_amd) + if (cpu_features->basic.kind == arch_kind_amd) return handle_amd (name); // XXX Fill in more vendors. @@ -497,9 +497,9 @@ init_cacheinfo (void) unsigned int level; unsigned int threads = 0; const struct cpu_features *cpu_features = __get_cpu_features (); - int max_cpuid = cpu_features->max_cpuid; + int max_cpuid = cpu_features->basic.max_cpuid; - if (cpu_features->kind == arch_kind_intel) + if (cpu_features->basic.kind == arch_kind_intel) { data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features); @@ -538,8 +538,8 @@ init_cacheinfo (void) highest cache level. */ if (max_cpuid >= 4) { - unsigned int family = cpu_features->family; - unsigned int model = cpu_features->model; + unsigned int family = cpu_features->basic.family; + unsigned int model = cpu_features->basic.model; int i = 0; @@ -700,7 +700,7 @@ intel_bug_no_cache_info: shared += core; } } - else if (cpu_features->kind == arch_kind_amd) + else if (cpu_features->basic.kind == arch_kind_amd) { data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 80b3054cf8..5f9eefd408 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -53,7 +53,18 @@ get_extended_indices (struct cpu_features *cpu_features) cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); - + if (eax >= 0x80000007) + __cpuid (0x80000007, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx); + if (eax >= 0x80000008) + __cpuid (0x80000008, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx); } static void @@ -79,13 +90,20 @@ get_common_indices (struct cpu_features *cpu_features, } } - if (cpu_features->max_cpuid >= 7) + if (cpu_features->basic.max_cpuid >= 7) __cpuid_count (7, 0, cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + if (cpu_features->basic.max_cpuid >= 0xd) + __cpuid_count (0xd, 1, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx); + /* Can we call xgetbv? */ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) { @@ -117,6 +135,18 @@ get_common_indices (struct cpu_features *cpu_features, if (CPU_FEATURES_CPU_P (cpu_features, FMA)) cpu_features->feature[index_arch_FMA_Usable] |= bit_arch_FMA_Usable; + /* Determine if VAES is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, VAES)) + cpu_features->feature[index_arch_VAES_Usable] + |= bit_arch_VAES_Usable; + /* Determine if VPCLMULQDQ is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ)) + cpu_features->feature[index_arch_VPCLMULQDQ_Usable] + |= bit_arch_VPCLMULQDQ_Usable; + /* Determine if XOP is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, XOP)) + cpu_features->feature[index_arch_XOP_Usable] + |= bit_arch_XOP_Usable; } /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and @@ -130,17 +160,69 @@ get_common_indices (struct cpu_features *cpu_features, { cpu_features->feature[index_arch_AVX512F_Usable] |= bit_arch_AVX512F_Usable; + /* Determine if AVX512CD is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD)) + cpu_features->feature[index_arch_AVX512CD_Usable] + |= bit_arch_AVX512CD_Usable; + /* Determine if AVX512ER is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) + cpu_features->feature[index_arch_AVX512ER_Usable] + |= bit_arch_AVX512ER_Usable; + /* Determine if AVX512PF is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF)) + cpu_features->feature[index_arch_AVX512PF_Usable] + |= bit_arch_AVX512PF_Usable; + /* Determine if AVX512VL is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL)) + cpu_features->feature[index_arch_AVX512VL_Usable] + |= bit_arch_AVX512VL_Usable; /* Determine if AVX512DQ is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)) cpu_features->feature[index_arch_AVX512DQ_Usable] |= bit_arch_AVX512DQ_Usable; + /* Determine if AVX512BW is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW)) + cpu_features->feature[index_arch_AVX512BW_Usable] + |= bit_arch_AVX512BW_Usable; + /* Determine if AVX512_4FMAPS is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS)) + cpu_features->feature[index_arch_AVX512_4FMAPS_Usable] + |= bit_arch_AVX512_4FMAPS_Usable; + /* Determine if AVX512_4VNNIW is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW)) + cpu_features->feature[index_arch_AVX512_4VNNIW_Usable] + |= bit_arch_AVX512_4VNNIW_Usable; + /* Determine if AVX512_BITALG is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG)) + cpu_features->feature[index_arch_AVX512_BITALG_Usable] + |= bit_arch_AVX512_BITALG_Usable; + /* Determine if AVX512_IFMA is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA)) + cpu_features->feature[index_arch_AVX512_IFMA_Usable] + |= bit_arch_AVX512_IFMA_Usable; + /* Determine if AVX512_VBMI is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI)) + cpu_features->feature[index_arch_AVX512_VBMI_Usable] + |= bit_arch_AVX512_VBMI_Usable; + /* Determine if AVX512_VBMI2 is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2)) + cpu_features->feature[index_arch_AVX512_VBMI2_Usable] + |= bit_arch_AVX512_VBMI2_Usable; + /* Determine if is AVX512_VNNI usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI)) + cpu_features->feature[index_arch_AVX512_VNNI_Usable] + |= bit_arch_AVX512_VNNI_Usable; + /* Determine if AVX512_VPOPCNTDQ is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ)) + cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable] + |= bit_arch_AVX512_VPOPCNTDQ_Usable; } } } /* For _dl_runtime_resolve, set xsave_state_size to xsave area size + integer register save size and align it to 64 bytes. */ - if (cpu_features->max_cpuid >= 0xd) + if (cpu_features->basic.max_cpuid >= 0xd) { unsigned int eax, ebx, ecx, edx; @@ -155,10 +237,8 @@ get_common_indices (struct cpu_features *cpu_features, cpu_features->xsave_state_full_size = xsave_state_full_size; - __cpuid_count (0xd, 1, eax, ebx, ecx, edx); - /* Check if XSAVEC is available. */ - if ((eax & (1 << 1)) != 0) + if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) { unsigned int xstate_comp_offsets[32]; unsigned int xstate_comp_sizes[32]; @@ -210,12 +290,25 @@ get_common_indices (struct cpu_features *cpu_features, } } +_Static_assert (((index_arch_Fast_Unaligned_Load + == index_arch_Fast_Unaligned_Copy) + && (index_arch_Fast_Unaligned_Load + == index_arch_Prefer_PMINUB_for_stringop) + && (index_arch_Fast_Unaligned_Load + == index_arch_Slow_SSE4_2) + && (index_arch_Fast_Unaligned_Load + == index_arch_Fast_Rep_String) + && (index_arch_Fast_Unaligned_Load + == index_arch_Fast_Copy_Backward)), + "Incorrect index_arch_Fast_Unaligned_Load"); + static inline void init_cpu_features (struct cpu_features *cpu_features) { unsigned int ebx, ecx, edx; unsigned int family = 0; unsigned int model = 0; + unsigned int stepping = 0; enum cpu_features_kind kind; #if !HAS_CPUID @@ -226,12 +319,12 @@ init_cpu_features (struct cpu_features *cpu_features) } #endif - __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); + __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx); /* This spells out "GenuineIntel". */ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) { - unsigned int extended_model, stepping; + unsigned int extended_model; kind = arch_kind_intel; @@ -270,15 +363,6 @@ init_cpu_features (struct cpu_features *cpu_features) case 0x5d: /* Unaligned load versions are faster than SSSE3 on Silvermont. */ -#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop -# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop -#endif -#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 -# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 -#endif -#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy -# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy -#endif cpu_features->feature[index_arch_Fast_Unaligned_Load] |= (bit_arch_Fast_Unaligned_Load | bit_arch_Fast_Unaligned_Copy @@ -301,15 +385,6 @@ init_cpu_features (struct cpu_features *cpu_features) case 0x2f: /* Rep string instructions, unaligned load, unaligned copy, and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load -# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load -#endif -#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop -# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop -#endif -#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy -# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy -#endif cpu_features->feature[index_arch_Fast_Rep_String] |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load @@ -353,7 +428,7 @@ init_cpu_features (struct cpu_features *cpu_features) /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) { - unsigned int extended_model, stepping; + unsigned int extended_model; kind = arch_kind_amd; @@ -375,9 +450,6 @@ init_cpu_features (struct cpu_features *cpu_features) if (family == 0x15) { -#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward -# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward -#endif /* "Excavator" */ if (model >= 0x60 && model <= 0x7f) { @@ -409,9 +481,10 @@ init_cpu_features (struct cpu_features *cpu_features) no_cpuid: #endif - cpu_features->family = family; - cpu_features->model = model; - cpu_features->kind = kind; + cpu_features->basic.kind = kind; + cpu_features->basic.family = family; + cpu_features->basic.model = model; + cpu_features->basic.stepping = stepping; #if HAVE_TUNABLES TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); @@ -432,7 +505,7 @@ no_cpuid: #ifdef __x86_64__ GLRO(dl_hwcap) = HWCAP_X86_64; - if (cpu_features->kind == arch_kind_intel) + if (cpu_features->basic.kind == arch_kind_intel) { const char *platform = NULL; diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index fb22d7b9d6..4917182e99 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -18,108 +18,58 @@ #ifndef cpu_features_h #define cpu_features_h -#define bit_arch_Fast_Rep_String (1 << 0) -#define bit_arch_Fast_Copy_Backward (1 << 1) -#define bit_arch_Slow_BSF (1 << 2) -#define bit_arch_Fast_Unaligned_Load (1 << 4) -#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5) -#define bit_arch_AVX_Usable (1 << 6) -#define bit_arch_FMA_Usable (1 << 7) -#define bit_arch_FMA4_Usable (1 << 8) -#define bit_arch_Slow_SSE4_2 (1 << 9) -#define bit_arch_AVX2_Usable (1 << 10) -#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11) -#define bit_arch_AVX512F_Usable (1 << 12) -#define bit_arch_AVX512DQ_Usable (1 << 13) -#define bit_arch_I586 (1 << 14) -#define bit_arch_I686 (1 << 15) -#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16) -#define bit_arch_Prefer_No_VZEROUPPER (1 << 17) -#define bit_arch_Fast_Unaligned_Copy (1 << 18) -#define bit_arch_Prefer_ERMS (1 << 19) -#define bit_arch_Prefer_No_AVX512 (1 << 20) -#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 21) -#define bit_arch_XSAVEC_Usable (1 << 22) -#define bit_arch_Prefer_FSRM (1 << 23) - -/* CPUID Feature flags. */ - -/* COMMON_CPUID_INDEX_1. */ -#define bit_cpu_CX8 (1 << 8) -#define bit_cpu_CMOV (1 << 15) -#define bit_cpu_SSE (1 << 25) -#define bit_cpu_SSE2 (1 << 26) -#define bit_cpu_SSSE3 (1 << 9) -#define bit_cpu_SSE4_1 (1 << 19) -#define bit_cpu_SSE4_2 (1 << 20) -#define bit_cpu_OSXSAVE (1 << 27) -#define bit_cpu_AVX (1 << 28) -#define bit_cpu_POPCOUNT (1 << 23) -#define bit_cpu_FMA (1 << 12) -#define bit_cpu_FMA4 (1 << 16) -#define bit_cpu_HTT (1 << 28) -#define bit_cpu_LZCNT (1 << 5) -#define bit_cpu_MOVBE (1 << 22) -#define bit_cpu_POPCNT (1 << 23) - -/* COMMON_CPUID_INDEX_7. */ -#define bit_cpu_BMI1 (1 << 3) -#define bit_cpu_BMI2 (1 << 8) -#define bit_cpu_ERMS (1 << 9) -#define bit_cpu_RTM (1 << 11) -#define bit_cpu_AVX2 (1 << 5) -#define bit_cpu_AVX512F (1 << 16) -#define bit_cpu_AVX512DQ (1 << 17) -#define bit_cpu_AVX512PF (1 << 26) -#define bit_cpu_AVX512ER (1 << 27) -#define bit_cpu_AVX512CD (1 << 28) -#define bit_cpu_AVX512BW (1 << 30) -#define bit_cpu_AVX512VL (1u << 31) -#define bit_cpu_IBT (1u << 20) -#define bit_cpu_SHSTK (1u << 7) -#define bit_cpu_FSRM (1 << 4) - -/* XCR0 Feature flags. */ -#define bit_XMM_state (1 << 1) -#define bit_YMM_state (1 << 2) -#define bit_Opmask_state (1 << 5) -#define bit_ZMM0_15_state (1 << 6) -#define bit_ZMM16_31_state (1 << 7) +enum +{ + /* The integer bit array index for the first set of internal feature + bits. */ + FEATURE_INDEX_1 = 0, + FEATURE_INDEX_2, + /* The current maximum size of the feature integer bit array. */ + FEATURE_INDEX_MAX +}; -/* The integer bit array index for the first set of internal feature bits. */ -#define FEATURE_INDEX_1 0 +enum +{ + COMMON_CPUID_INDEX_1 = 0, + COMMON_CPUID_INDEX_7, + COMMON_CPUID_INDEX_80000001, + COMMON_CPUID_INDEX_D_ECX_1, + COMMON_CPUID_INDEX_80000007, + COMMON_CPUID_INDEX_80000008, + /* Keep the following line at the end. */ + COMMON_CPUID_INDEX_MAX +}; -/* The current maximum size of the feature integer bit array. */ -#define FEATURE_INDEX_MAX 1 +struct cpuid_registers +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; +}; -enum - { - COMMON_CPUID_INDEX_1 = 0, - COMMON_CPUID_INDEX_7, - COMMON_CPUID_INDEX_80000001, - /* Keep the following line at the end. */ - COMMON_CPUID_INDEX_MAX - }; +enum cpu_features_kind +{ + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other +}; -struct cpu_features +struct cpu_features_basic { - enum cpu_features_kind - { - arch_kind_unknown = 0, - arch_kind_intel, - arch_kind_amd, - arch_kind_other - } kind; + enum cpu_features_kind kind; int max_cpuid; - struct cpuid_registers - { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - } cpuid[COMMON_CPUID_INDEX_MAX]; unsigned int family; unsigned int model; + unsigned int stepping; +}; + +struct cpu_features +{ + struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX]; + unsigned int feature[FEATURE_INDEX_MAX]; + struct cpu_features_basic basic; /* The state size for XSAVEC or XSAVE. The type must be unsigned long int so that we use @@ -132,7 +82,6 @@ struct cpu_features GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC_Usable */ unsigned int xsave_state_full_size; - unsigned int feature[FEATURE_INDEX_MAX]; /* Data cache size for use in memory and string routines, typically L1 size. */ unsigned long int data_cache_size; @@ -148,112 +97,838 @@ struct cpu_features extern const struct cpu_features *__get_cpu_features (void) __attribute__ ((const)); -# if defined (_LIBC) && !IS_IN (nonlib) -/* Unused for x86. */ -# define INIT_ARCH() -# define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) -# endif - - /* Only used directly in cpu-features.c. */ # define CPU_FEATURES_CPU_P(ptr, name) \ ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0) # define CPU_FEATURES_ARCH_P(ptr, name) \ ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0) -/* HAS_* evaluates to true if we may use the feature at runtime. */ -# define HAS_CPU_FEATURE(name) \ - CPU_FEATURES_CPU_P (__get_cpu_features (), name) +/* HAS_CPU_FEATURE evaluates to true if CPU supports the feature. */ +#define HAS_CPU_FEATURE(name) \ + CPU_FEATURES_CPU_P (__get_cpu_features (), name) +/* HAS_ARCH_FEATURE evaluates to true if we may use the feature at + runtime. */ # define HAS_ARCH_F |
