aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2018-12-03 05:54:43 -0800
committerH.J. Lu <hjl.tools@gmail.com>2018-12-03 05:54:56 -0800
commitc22e4c2a1431c5e77bf4288d35bf7629f2f093aa (patch)
tree666ae70088b899ad87a250a08b3be7a24bbc0157
parent7b36d26b22d147ffc347f427f9fd584700578a94 (diff)
downloadglibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.tar.xz
glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.zip
x86: Extend CPUID support in struct cpu_features
Extend CPUID support for all feature bits from CPUID. Add a new macro, CPU_FEATURE_USABLE, which can be used to check if a feature is usable at run-time, instead of HAS_CPU_FEATURE and HAS_ARCH_FEATURE. Add COMMON_CPUID_INDEX_D_ECX_1, COMMON_CPUID_INDEX_80000007 and COMMON_CPUID_INDEX_80000008 to check CPU feature bits in them. Tested on i686 and x86-64 as well as using build-many-glibcs.py with x86 targets. * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for cpu_features_basic. (__cache_sysconf): Likewise. (init_cacheinfo): Likewise. * sysdeps/x86/cpu-features.c (get_extended_indeces): Also populate COMMON_CPUID_INDEX_80000007 and COMMON_CPUID_INDEX_80000008. (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1. Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if XSAVEC is available. Set the bit_arch_XXX_Usable bits. (init_cpu_features): Use _Static_assert on index_arch_Fast_Unaligned_Load. __get_cpuid_registers and __get_arch_feature. Updated for cpu_features_basic. Set stepping in cpu_features. * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum. (FEATURE_INDEX_2): New. (FEATURE_INDEX_MAX): Changed to enum. (COMMON_CPUID_INDEX_D_ECX_1): New. (COMMON_CPUID_INDEX_80000007): Likewise. (COMMON_CPUID_INDEX_80000008): Likewise. (cpuid_registers): Likewise. (cpu_features_basic): Likewise. (CPU_FEATURE_USABLE): Likewise. (bit_arch_XXX_Usable): Likewise. (cpu_features): Use cpuid_registers and cpu_features_basic. (bit_arch_XXX): Reweritten. (bit_cpu_XXX): Likewise. (index_cpu_XXX): Likewise. (reg_XXX): Likewise. * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and <support/check.h>. (CHECK_CPU_FEATURE): New. (CHECK_CPU_FEATURE_USABLE): Likewise. (cpu_kinds): Likewise. (do_test): Print vendor, family, model and stepping. Check HAS_CPU_FEATURE and CPU_FEATURE_USABLE. (TEST_FUNCTION): Removed. Include <support/test-driver.c> instead of "../../test-skeleton.c". * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount): Check POPCNT instead of POPCOUNT. * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.
-rw-r--r--ChangeLog45
-rw-r--r--sysdeps/x86/cacheinfo.c20
-rw-r--r--sysdeps/x86/cpu-features.c141
-rw-r--r--sysdeps/x86/cpu-features.h1063
-rw-r--r--sysdeps/x86/tst-get-cpu-features.c264
-rw-r--r--sysdeps/x86_64/multiarch/sched_cpucount.c2
-rw-r--r--sysdeps/x86_64/multiarch/test-multiarch.c4
7 files changed, 1294 insertions, 245 deletions
diff --git a/ChangeLog b/ChangeLog
index 030c567a77..a11a9c6961 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,48 @@
+2018-12-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for
+ cpu_features_basic.
+ (__cache_sysconf): Likewise.
+ (init_cacheinfo): Likewise.
+ * sysdeps/x86/cpu-features.c (get_extended_indeces): Also
+ populate COMMON_CPUID_INDEX_80000007 and
+ COMMON_CPUID_INDEX_80000008.
+ (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1.
+ Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if
+ XSAVEC is available. Set the bit_arch_XXX_Usable bits.
+ (init_cpu_features): Use _Static_assert on
+ index_arch_Fast_Unaligned_Load.
+ __get_cpuid_registers and __get_arch_feature. Updated for
+ cpu_features_basic. Set stepping in cpu_features.
+ * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum.
+ (FEATURE_INDEX_2): New.
+ (FEATURE_INDEX_MAX): Changed to enum.
+ (COMMON_CPUID_INDEX_D_ECX_1): New.
+ (COMMON_CPUID_INDEX_80000007): Likewise.
+ (COMMON_CPUID_INDEX_80000008): Likewise.
+ (cpuid_registers): Likewise.
+ (cpu_features_basic): Likewise.
+ (CPU_FEATURE_USABLE): Likewise.
+ (bit_arch_XXX_Usable): Likewise.
+ (cpu_features): Use cpuid_registers and cpu_features_basic.
+ (bit_arch_XXX): Reweritten.
+ (bit_cpu_XXX): Likewise.
+ (index_cpu_XXX): Likewise.
+ (reg_XXX): Likewise.
+ * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and
+ <support/check.h>.
+ (CHECK_CPU_FEATURE): New.
+ (CHECK_CPU_FEATURE_USABLE): Likewise.
+ (cpu_kinds): Likewise.
+ (do_test): Print vendor, family, model and stepping. Check
+ HAS_CPU_FEATURE and CPU_FEATURE_USABLE.
+ (TEST_FUNCTION): Removed.
+ Include <support/test-driver.c> instead of
+ "../../test-skeleton.c".
+ * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount):
+ Check POPCNT instead of POPCOUNT.
+ * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.
+
2018-12-03 Samuel Thibault <samuel.thibault@ens-lyon.org>
* scripts/gen-as-const.py (main): Avoid emitting empty line when
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index b9444ddd52..58f0a3ccfb 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -205,8 +205,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* Intel reused this value. For family 15, model 6 it
specifies the 3rd level cache. Otherwise the 2nd
level cache. */
- unsigned int family = cpu_features->family;
- unsigned int model = cpu_features->model;
+ unsigned int family = cpu_features->basic.family;
+ unsigned int model = cpu_features->basic.model;
if (family == 15 && model == 6)
{
@@ -258,7 +258,7 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
static long int __attribute__ ((noinline))
handle_intel (int name, const struct cpu_features *cpu_features)
{
- unsigned int maxidx = cpu_features->max_cpuid;
+ unsigned int maxidx = cpu_features->basic.max_cpuid;
/* Return -1 for older CPUs. */
if (maxidx < 2)
@@ -443,10 +443,10 @@ __cache_sysconf (int name)
{
const struct cpu_features *cpu_features = __get_cpu_features ();
- if (cpu_features->kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel)
return handle_intel (name, cpu_features);
- if (cpu_features->kind == arch_kind_amd)
+ if (cpu_features->basic.kind == arch_kind_amd)
return handle_amd (name);
// XXX Fill in more vendors.
@@ -497,9 +497,9 @@ init_cacheinfo (void)
unsigned int level;
unsigned int threads = 0;
const struct cpu_features *cpu_features = __get_cpu_features ();
- int max_cpuid = cpu_features->max_cpuid;
+ int max_cpuid = cpu_features->basic.max_cpuid;
- if (cpu_features->kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
@@ -538,8 +538,8 @@ init_cacheinfo (void)
highest cache level. */
if (max_cpuid >= 4)
{
- unsigned int family = cpu_features->family;
- unsigned int model = cpu_features->model;
+ unsigned int family = cpu_features->basic.family;
+ unsigned int model = cpu_features->basic.model;
int i = 0;
@@ -700,7 +700,7 @@ intel_bug_no_cache_info:
shared += core;
}
}
- else if (cpu_features->kind == arch_kind_amd)
+ else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 80b3054cf8..5f9eefd408 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -53,7 +53,18 @@ get_extended_indices (struct cpu_features *cpu_features)
cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
-
+ if (eax >= 0x80000007)
+ __cpuid (0x80000007,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx);
+ if (eax >= 0x80000008)
+ __cpuid (0x80000008,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx);
}
static void
@@ -79,13 +90,20 @@ get_common_indices (struct cpu_features *cpu_features,
}
}
- if (cpu_features->max_cpuid >= 7)
+ if (cpu_features->basic.max_cpuid >= 7)
__cpuid_count (7, 0,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+ if (cpu_features->basic.max_cpuid >= 0xd)
+ __cpuid_count (0xd, 1,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx);
+
/* Can we call xgetbv? */
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
{
@@ -117,6 +135,18 @@ get_common_indices (struct cpu_features *cpu_features,
if (CPU_FEATURES_CPU_P (cpu_features, FMA))
cpu_features->feature[index_arch_FMA_Usable]
|= bit_arch_FMA_Usable;
+ /* Determine if VAES is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, VAES))
+ cpu_features->feature[index_arch_VAES_Usable]
+ |= bit_arch_VAES_Usable;
+ /* Determine if VPCLMULQDQ is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ))
+ cpu_features->feature[index_arch_VPCLMULQDQ_Usable]
+ |= bit_arch_VPCLMULQDQ_Usable;
+ /* Determine if XOP is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, XOP))
+ cpu_features->feature[index_arch_XOP_Usable]
+ |= bit_arch_XOP_Usable;
}
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
@@ -130,17 +160,69 @@ get_common_indices (struct cpu_features *cpu_features,
{
cpu_features->feature[index_arch_AVX512F_Usable]
|= bit_arch_AVX512F_Usable;
+ /* Determine if AVX512CD is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
+ cpu_features->feature[index_arch_AVX512CD_Usable]
+ |= bit_arch_AVX512CD_Usable;
+ /* Determine if AVX512ER is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+ cpu_features->feature[index_arch_AVX512ER_Usable]
+ |= bit_arch_AVX512ER_Usable;
+ /* Determine if AVX512PF is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
+ cpu_features->feature[index_arch_AVX512PF_Usable]
+ |= bit_arch_AVX512PF_Usable;
+ /* Determine if AVX512VL is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
+ cpu_features->feature[index_arch_AVX512VL_Usable]
+ |= bit_arch_AVX512VL_Usable;
/* Determine if AVX512DQ is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
cpu_features->feature[index_arch_AVX512DQ_Usable]
|= bit_arch_AVX512DQ_Usable;
+ /* Determine if AVX512BW is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW))
+ cpu_features->feature[index_arch_AVX512BW_Usable]
+ |= bit_arch_AVX512BW_Usable;
+ /* Determine if AVX512_4FMAPS is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS))
+ cpu_features->feature[index_arch_AVX512_4FMAPS_Usable]
+ |= bit_arch_AVX512_4FMAPS_Usable;
+ /* Determine if AVX512_4VNNIW is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW))
+ cpu_features->feature[index_arch_AVX512_4VNNIW_Usable]
+ |= bit_arch_AVX512_4VNNIW_Usable;
+ /* Determine if AVX512_BITALG is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG))
+ cpu_features->feature[index_arch_AVX512_BITALG_Usable]
+ |= bit_arch_AVX512_BITALG_Usable;
+ /* Determine if AVX512_IFMA is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA))
+ cpu_features->feature[index_arch_AVX512_IFMA_Usable]
+ |= bit_arch_AVX512_IFMA_Usable;
+ /* Determine if AVX512_VBMI is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI))
+ cpu_features->feature[index_arch_AVX512_VBMI_Usable]
+ |= bit_arch_AVX512_VBMI_Usable;
+ /* Determine if AVX512_VBMI2 is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2))
+ cpu_features->feature[index_arch_AVX512_VBMI2_Usable]
+ |= bit_arch_AVX512_VBMI2_Usable;
+ /* Determine if is AVX512_VNNI usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI))
+ cpu_features->feature[index_arch_AVX512_VNNI_Usable]
+ |= bit_arch_AVX512_VNNI_Usable;
+ /* Determine if AVX512_VPOPCNTDQ is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
+ cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable]
+ |= bit_arch_AVX512_VPOPCNTDQ_Usable;
}
}
}
/* For _dl_runtime_resolve, set xsave_state_size to xsave area
size + integer register save size and align it to 64 bytes. */
- if (cpu_features->max_cpuid >= 0xd)
+ if (cpu_features->basic.max_cpuid >= 0xd)
{
unsigned int eax, ebx, ecx, edx;
@@ -155,10 +237,8 @@ get_common_indices (struct cpu_features *cpu_features,
cpu_features->xsave_state_full_size
= xsave_state_full_size;
- __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
-
/* Check if XSAVEC is available. */
- if ((eax & (1 << 1)) != 0)
+ if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
{
unsigned int xstate_comp_offsets[32];
unsigned int xstate_comp_sizes[32];
@@ -210,12 +290,25 @@ get_common_indices (struct cpu_features *cpu_features,
}
}
+_Static_assert (((index_arch_Fast_Unaligned_Load
+ == index_arch_Fast_Unaligned_Copy)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Prefer_PMINUB_for_stringop)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Slow_SSE4_2)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Fast_Rep_String)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Fast_Copy_Backward)),
+ "Incorrect index_arch_Fast_Unaligned_Load");
+
static inline void
init_cpu_features (struct cpu_features *cpu_features)
{
unsigned int ebx, ecx, edx;
unsigned int family = 0;
unsigned int model = 0;
+ unsigned int stepping = 0;
enum cpu_features_kind kind;
#if !HAS_CPUID
@@ -226,12 +319,12 @@ init_cpu_features (struct cpu_features *cpu_features)
}
#endif
- __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
+ __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
/* This spells out "GenuineIntel". */
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
{
- unsigned int extended_model, stepping;
+ unsigned int extended_model;
kind = arch_kind_intel;
@@ -270,15 +363,6 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x5d:
/* Unaligned load versions are faster than SSSE3
on Silvermont. */
-#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
-# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
-#endif
-#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
-# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
-#endif
-#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
-# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
-#endif
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= (bit_arch_Fast_Unaligned_Load
| bit_arch_Fast_Unaligned_Copy
@@ -301,15 +385,6 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x2f:
/* Rep string instructions, unaligned load, unaligned copy,
and pminub are fast on Intel Core i3, i5 and i7. */
-#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
-# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
-#endif
-#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
-# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
-#endif
-#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
-# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
-#endif
cpu_features->feature[index_arch_Fast_Rep_String]
|= (bit_arch_Fast_Rep_String
| bit_arch_Fast_Unaligned_Load
@@ -353,7 +428,7 @@ init_cpu_features (struct cpu_features *cpu_features)
/* This spells out "AuthenticAMD". */
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
{
- unsigned int extended_model, stepping;
+ unsigned int extended_model;
kind = arch_kind_amd;
@@ -375,9 +450,6 @@ init_cpu_features (struct cpu_features *cpu_features)
if (family == 0x15)
{
-#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
-# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
-#endif
/* "Excavator" */
if (model >= 0x60 && model <= 0x7f)
{
@@ -409,9 +481,10 @@ init_cpu_features (struct cpu_features *cpu_features)
no_cpuid:
#endif
- cpu_features->family = family;
- cpu_features->model = model;
- cpu_features->kind = kind;
+ cpu_features->basic.kind = kind;
+ cpu_features->basic.family = family;
+ cpu_features->basic.model = model;
+ cpu_features->basic.stepping = stepping;
#if HAVE_TUNABLES
TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
@@ -432,7 +505,7 @@ no_cpuid:
#ifdef __x86_64__
GLRO(dl_hwcap) = HWCAP_X86_64;
- if (cpu_features->kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel)
{
const char *platform = NULL;
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index fb22d7b9d6..4917182e99 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -18,108 +18,58 @@
#ifndef cpu_features_h
#define cpu_features_h
-#define bit_arch_Fast_Rep_String (1 << 0)
-#define bit_arch_Fast_Copy_Backward (1 << 1)
-#define bit_arch_Slow_BSF (1 << 2)
-#define bit_arch_Fast_Unaligned_Load (1 << 4)
-#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_arch_AVX_Usable (1 << 6)
-#define bit_arch_FMA_Usable (1 << 7)
-#define bit_arch_FMA4_Usable (1 << 8)
-#define bit_arch_Slow_SSE4_2 (1 << 9)
-#define bit_arch_AVX2_Usable (1 << 10)
-#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11)
-#define bit_arch_AVX512F_Usable (1 << 12)
-#define bit_arch_AVX512DQ_Usable (1 << 13)
-#define bit_arch_I586 (1 << 14)
-#define bit_arch_I686 (1 << 15)
-#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
-#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
-#define bit_arch_Fast_Unaligned_Copy (1 << 18)
-#define bit_arch_Prefer_ERMS (1 << 19)
-#define bit_arch_Prefer_No_AVX512 (1 << 20)
-#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 21)
-#define bit_arch_XSAVEC_Usable (1 << 22)
-#define bit_arch_Prefer_FSRM (1 << 23)
-
-/* CPUID Feature flags. */
-
-/* COMMON_CPUID_INDEX_1. */
-#define bit_cpu_CX8 (1 << 8)
-#define bit_cpu_CMOV (1 << 15)
-#define bit_cpu_SSE (1 << 25)
-#define bit_cpu_SSE2 (1 << 26)
-#define bit_cpu_SSSE3 (1 << 9)
-#define bit_cpu_SSE4_1 (1 << 19)
-#define bit_cpu_SSE4_2 (1 << 20)
-#define bit_cpu_OSXSAVE (1 << 27)
-#define bit_cpu_AVX (1 << 28)
-#define bit_cpu_POPCOUNT (1 << 23)
-#define bit_cpu_FMA (1 << 12)
-#define bit_cpu_FMA4 (1 << 16)
-#define bit_cpu_HTT (1 << 28)
-#define bit_cpu_LZCNT (1 << 5)
-#define bit_cpu_MOVBE (1 << 22)
-#define bit_cpu_POPCNT (1 << 23)
-
-/* COMMON_CPUID_INDEX_7. */
-#define bit_cpu_BMI1 (1 << 3)
-#define bit_cpu_BMI2 (1 << 8)
-#define bit_cpu_ERMS (1 << 9)
-#define bit_cpu_RTM (1 << 11)
-#define bit_cpu_AVX2 (1 << 5)
-#define bit_cpu_AVX512F (1 << 16)
-#define bit_cpu_AVX512DQ (1 << 17)
-#define bit_cpu_AVX512PF (1 << 26)
-#define bit_cpu_AVX512ER (1 << 27)
-#define bit_cpu_AVX512CD (1 << 28)
-#define bit_cpu_AVX512BW (1 << 30)
-#define bit_cpu_AVX512VL (1u << 31)
-#define bit_cpu_IBT (1u << 20)
-#define bit_cpu_SHSTK (1u << 7)
-#define bit_cpu_FSRM (1 << 4)
-
-/* XCR0 Feature flags. */
-#define bit_XMM_state (1 << 1)
-#define bit_YMM_state (1 << 2)
-#define bit_Opmask_state (1 << 5)
-#define bit_ZMM0_15_state (1 << 6)
-#define bit_ZMM16_31_state (1 << 7)
+enum
+{
+ /* The integer bit array index for the first set of internal feature
+ bits. */
+ FEATURE_INDEX_1 = 0,
+ FEATURE_INDEX_2,
+ /* The current maximum size of the feature integer bit array. */
+ FEATURE_INDEX_MAX
+};
-/* The integer bit array index for the first set of internal feature bits. */
-#define FEATURE_INDEX_1 0
+enum
+{
+ COMMON_CPUID_INDEX_1 = 0,
+ COMMON_CPUID_INDEX_7,
+ COMMON_CPUID_INDEX_80000001,
+ COMMON_CPUID_INDEX_D_ECX_1,
+ COMMON_CPUID_INDEX_80000007,
+ COMMON_CPUID_INDEX_80000008,
+ /* Keep the following line at the end. */
+ COMMON_CPUID_INDEX_MAX
+};
-/* The current maximum size of the feature integer bit array. */
-#define FEATURE_INDEX_MAX 1
+struct cpuid_registers
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+};
-enum
- {
- COMMON_CPUID_INDEX_1 = 0,
- COMMON_CPUID_INDEX_7,
- COMMON_CPUID_INDEX_80000001,
- /* Keep the following line at the end. */
- COMMON_CPUID_INDEX_MAX
- };
+enum cpu_features_kind
+{
+ arch_kind_unknown = 0,
+ arch_kind_intel,
+ arch_kind_amd,
+ arch_kind_other
+};
-struct cpu_features
+struct cpu_features_basic
{
- enum cpu_features_kind
- {
- arch_kind_unknown = 0,
- arch_kind_intel,
- arch_kind_amd,
- arch_kind_other
- } kind;
+ enum cpu_features_kind kind;
int max_cpuid;
- struct cpuid_registers
- {
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- } cpuid[COMMON_CPUID_INDEX_MAX];
unsigned int family;
unsigned int model;
+ unsigned int stepping;
+};
+
+struct cpu_features
+{
+ struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
+ unsigned int feature[FEATURE_INDEX_MAX];
+ struct cpu_features_basic basic;
/* The state size for XSAVEC or XSAVE. The type must be unsigned long
int so that we use
@@ -132,7 +82,6 @@ struct cpu_features
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC_Usable
*/
unsigned int xsave_state_full_size;
- unsigned int feature[FEATURE_INDEX_MAX];
/* Data cache size for use in memory and string routines, typically
L1 size. */
unsigned long int data_cache_size;
@@ -148,112 +97,838 @@ struct cpu_features
extern const struct cpu_features *__get_cpu_features (void)
__attribute__ ((const));
-# if defined (_LIBC) && !IS_IN (nonlib)
-/* Unused for x86. */
-# define INIT_ARCH()
-# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
-# endif
-
-
/* Only used directly in cpu-features.c. */
# define CPU_FEATURES_CPU_P(ptr, name) \
((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
# define CPU_FEATURES_ARCH_P(ptr, name) \
((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
-/* HAS_* evaluates to true if we may use the feature at runtime. */
-# define HAS_CPU_FEATURE(name) \
- CPU_FEATURES_CPU_P (__get_cpu_features (), name)
+/* HAS_CPU_FEATURE evaluates to true if CPU supports the feature. */
+#define HAS_CPU_FEATURE(name) \
+ CPU_FEATURES_CPU_P (__get_cpu_features (), name)
+/* HAS_ARCH_FEATURE evaluates to true if we may use the feature at
+ runtime. */
# define HAS_ARCH_F