From 107e6a3c2212ba7a3a4ec7cae8d82d73f7c95d0b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 29 Jun 2020 16:36:08 -0700 Subject: x86: Support usable check for all CPU features Support usable check for all CPU features with the following changes: 1. Change struct cpu_features to struct cpuid_features { struct cpuid_registers cpuid; struct cpuid_registers usable; }; struct cpu_features { struct cpu_features_basic basic; struct cpuid_features features[COMMON_CPUID_INDEX_MAX]; unsigned int preferred[PREFERRED_FEATURE_INDEX_MAX]; ... }; so that there is a usable bit for each cpuid bit. 2. After the cpuid bits have been initialized, copy the known bits to the usable bits. EAX/EBX from INDEX_1 and EAX from INDEX_7 aren't used for CPU feature detection. 3. Clear the usable bits which require OS support. 4. If the feature is supported by OS, copy its cpuid bit to its usable bit. 5. Replace HAS_CPU_FEATURE and CPU_FEATURES_CPU_P with CPU_FEATURE_USABLE and CPU_FEATURE_USABLE_P to check if a feature is usable. 6. Add DEPR_FPU_CS_DS for INDEX_7_EBX_13. 7. Unset MPX feature since it has been deprecated. The results are 1. If the feature is known and doesn't requre OS support, its usable bit is copied from the cpuid bit. 2. Otherwise, its usable bit is copied from the cpuid bit only if the feature is known to supported by OS. 3. CPU_FEATURE_USABLE/CPU_FEATURE_USABLE_P are used to check if the feature can be used. 4. HAS_CPU_FEATURE/CPU_FEATURE_CPU_P are used to check if CPU supports the feature. --- sysdeps/i386/fpu/fclrexcpt.c | 2 +- sysdeps/i386/fpu/fedisblxcpt.c | 2 +- sysdeps/i386/fpu/feenablxcpt.c | 2 +- sysdeps/i386/fpu/fegetenv.c | 2 +- sysdeps/i386/fpu/fegetmode.c | 2 +- sysdeps/i386/fpu/feholdexcpt.c | 2 +- sysdeps/i386/fpu/fesetenv.c | 2 +- sysdeps/i386/fpu/fesetmode.c | 2 +- sysdeps/i386/fpu/fesetround.c | 2 +- sysdeps/i386/fpu/feupdateenv.c | 2 +- sysdeps/i386/fpu/fgetexcptflg.c | 2 +- sysdeps/i386/fpu/fsetexcptflg.c | 2 +- sysdeps/i386/fpu/ftestexcept.c | 2 +- sysdeps/i386/i686/fpu/multiarch/s_cosf.c | 2 +- sysdeps/i386/i686/fpu/multiarch/s_sincosf.c | 2 +- sysdeps/i386/i686/fpu/multiarch/s_sinf.c | 2 +- sysdeps/i386/i686/multiarch/ifunc-impl-list.c | 152 +++---- sysdeps/i386/i686/multiarch/ifunc-memmove.h | 2 +- sysdeps/i386/i686/multiarch/ifunc-memset.h | 2 +- sysdeps/i386/i686/multiarch/ifunc-sse2-bsf.h | 2 +- sysdeps/i386/i686/multiarch/ifunc-sse2-ssse3.h | 4 +- sysdeps/i386/i686/multiarch/ifunc-sse2.h | 2 +- sysdeps/i386/i686/multiarch/ifunc-sse4_2.h | 2 +- sysdeps/i386/i686/multiarch/ifunc-ssse3-sse4_2.h | 4 +- sysdeps/i386/i686/multiarch/s_fma.c | 2 +- sysdeps/i386/i686/multiarch/s_fmaf.c | 2 +- sysdeps/i386/i686/multiarch/wcscpy.c | 2 +- sysdeps/i386/setfpucw.c | 2 +- sysdeps/unix/sysv/linux/x86/elision-conf.c | 2 +- sysdeps/x86/cacheinfo.c | 12 +- sysdeps/x86/cpu-features.c | 436 ++++++++++++--------- sysdeps/x86/cpu-features.h | 258 ++++++------ sysdeps/x86/cpu-tunables.c | 168 +++----- sysdeps/x86/dl-cet.c | 4 +- sysdeps/x86/tst-get-cpu-features.c | 122 ++++++ sysdeps/x86_64/Makefile | 6 +- sysdeps/x86_64/dl-machine.h | 6 +- sysdeps/x86_64/fpu/math-tests-arch.h | 6 +- sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h | 8 +- sysdeps/x86_64/fpu/multiarch/ifunc-fma.h | 4 +- sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h | 6 +- sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h | 4 +- .../x86_64/fpu/multiarch/ifunc-mathvec-avx512.h | 4 +- .../x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h | 2 +- sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h | 2 +- sysdeps/x86_64/fpu/multiarch/s_fma.c | 4 +- sysdeps/x86_64/fpu/multiarch/s_fmaf.c | 4 +- sysdeps/x86_64/multiarch/ifunc-avx2.h | 2 +- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 228 +++++------ sysdeps/x86_64/multiarch/ifunc-memcmp.h | 8 +- sysdeps/x86_64/multiarch/ifunc-memmove.h | 10 +- sysdeps/x86_64/multiarch/ifunc-memset.h | 10 +- sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 2 +- sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 6 +- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 4 +- sysdeps/x86_64/multiarch/ifunc-wmemset.h | 4 +- sysdeps/x86_64/multiarch/sched_cpucount.c | 2 +- sysdeps/x86_64/multiarch/strchr.c | 2 +- sysdeps/x86_64/multiarch/strcmp.c | 4 +- sysdeps/x86_64/multiarch/strncmp.c | 6 +- sysdeps/x86_64/multiarch/test-multiarch.c | 24 +- sysdeps/x86_64/multiarch/wcscpy.c | 2 +- sysdeps/x86_64/multiarch/wcsnlen.c | 4 +- 63 files changed, 854 insertions(+), 732 deletions(-) (limited to 'sysdeps') diff --git a/sysdeps/i386/fpu/fclrexcpt.c b/sysdeps/i386/fpu/fclrexcpt.c index 7bf7dd0a8a..7dc357f2d6 100644 --- a/sysdeps/i386/fpu/fclrexcpt.c +++ b/sysdeps/i386/fpu/fclrexcpt.c @@ -41,7 +41,7 @@ __feclearexcept (int excepts) __asm__ ("fldenv %0" : : "m" (*&temp)); /* If the CPU supports SSE, we clear the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xnew_exc; diff --git a/sysdeps/i386/fpu/fedisblxcpt.c b/sysdeps/i386/fpu/fedisblxcpt.c index 0e518f7f3d..5399bc1f25 100644 --- a/sysdeps/i386/fpu/fedisblxcpt.c +++ b/sysdeps/i386/fpu/fedisblxcpt.c @@ -38,7 +38,7 @@ fedisableexcept (int excepts) __asm__ ("fldcw %0" : : "m" (*&new_exc)); /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xnew_exc; diff --git a/sysdeps/i386/fpu/feenablxcpt.c b/sysdeps/i386/fpu/feenablxcpt.c index b1f70815b1..b9d7e65668 100644 --- a/sysdeps/i386/fpu/feenablxcpt.c +++ b/sysdeps/i386/fpu/feenablxcpt.c @@ -38,7 +38,7 @@ feenableexcept (int excepts) __asm__ ("fldcw %0" : : "m" (*&new_exc)); /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xnew_exc; diff --git a/sysdeps/i386/fpu/fegetenv.c b/sysdeps/i386/fpu/fegetenv.c index cb6ef35ac4..637bc85454 100644 --- a/sysdeps/i386/fpu/fegetenv.c +++ b/sysdeps/i386/fpu/fegetenv.c @@ -31,7 +31,7 @@ __fegetenv (fenv_t *envp) would block all exceptions. */ __asm__ ("fldenv %0" : : "m" (*envp)); - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); /* Success. */ diff --git a/sysdeps/i386/fpu/fegetmode.c b/sysdeps/i386/fpu/fegetmode.c index e14768976c..e5154eab02 100644 --- a/sysdeps/i386/fpu/fegetmode.c +++ b/sysdeps/i386/fpu/fegetmode.c @@ -26,7 +26,7 @@ int fegetmode (femode_t *modep) { _FPU_GETCW (modep->__control_word); - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) __asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr)); return 0; } diff --git a/sysdeps/i386/fpu/feholdexcpt.c b/sysdeps/i386/fpu/feholdexcpt.c index ad25339b4e..8d2d0ee275 100644 --- a/sysdeps/i386/fpu/feholdexcpt.c +++ b/sysdeps/i386/fpu/feholdexcpt.c @@ -30,7 +30,7 @@ __feholdexcept (fenv_t *envp) __asm__ volatile ("fnstenv %0; fnclex" : "=m" (*envp)); /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xwork; diff --git a/sysdeps/i386/fpu/fesetenv.c b/sysdeps/i386/fpu/fesetenv.c index 5ec7bd6126..cd9afeae28 100644 --- a/sysdeps/i386/fpu/fesetenv.c +++ b/sysdeps/i386/fpu/fesetenv.c @@ -79,7 +79,7 @@ __fesetenv (const fenv_t *envp) __asm__ ("fldenv %0" : : "m" (temp)); - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int mxcsr; __asm__ ("stmxcsr %0" : "=m" (mxcsr)); diff --git a/sysdeps/i386/fpu/fesetmode.c b/sysdeps/i386/fpu/fesetmode.c index 4563da0901..e3b30657b1 100644 --- a/sysdeps/i386/fpu/fesetmode.c +++ b/sysdeps/i386/fpu/fesetmode.c @@ -35,7 +35,7 @@ fesetmode (const femode_t *modep) else cw = modep->__control_word; _FPU_SETCW (cw); - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int mxcsr; __asm__ ("stmxcsr %0" : "=m" (mxcsr)); diff --git a/sysdeps/i386/fpu/fesetround.c b/sysdeps/i386/fpu/fesetround.c index 18320a646b..5c3fd34cd4 100644 --- a/sysdeps/i386/fpu/fesetround.c +++ b/sysdeps/i386/fpu/fesetround.c @@ -37,7 +37,7 @@ __fesetround (int round) __asm__ ("fldcw %0" : : "m" (*&cw)); /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xcw; diff --git a/sysdeps/i386/fpu/feupdateenv.c b/sysdeps/i386/fpu/feupdateenv.c index 7387831dec..ef7132e4f0 100644 --- a/sysdeps/i386/fpu/feupdateenv.c +++ b/sysdeps/i386/fpu/feupdateenv.c @@ -32,7 +32,7 @@ __feupdateenv (const fenv_t *envp) __asm__ ("fnstsw %0" : "=m" (*&temp)); /* If the CPU supports SSE we test the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) __asm__ ("stmxcsr %0" : "=m" (*&xtemp)); temp = (temp | xtemp) & FE_ALL_EXCEPT; diff --git a/sysdeps/i386/fpu/fgetexcptflg.c b/sysdeps/i386/fpu/fgetexcptflg.c index 82b2aa53de..2c32c83636 100644 --- a/sysdeps/i386/fpu/fgetexcptflg.c +++ b/sysdeps/i386/fpu/fgetexcptflg.c @@ -34,7 +34,7 @@ __fegetexceptflag (fexcept_t *flagp, int excepts) *flagp = temp & excepts & FE_ALL_EXCEPT; /* If the CPU supports SSE, we clear the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int sse_exc; diff --git a/sysdeps/i386/fpu/fsetexcptflg.c b/sysdeps/i386/fpu/fsetexcptflg.c index dc257b8077..02a1bd526d 100644 --- a/sysdeps/i386/fpu/fsetexcptflg.c +++ b/sysdeps/i386/fpu/fsetexcptflg.c @@ -41,7 +41,7 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts) __asm__ ("fldenv %0" : : "m" (*&temp)); /* If the CPU supports SSE, we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xnew_exc; diff --git a/sysdeps/i386/fpu/ftestexcept.c b/sysdeps/i386/fpu/ftestexcept.c index 9c22689ca5..a00c44e6db 100644 --- a/sysdeps/i386/fpu/ftestexcept.c +++ b/sysdeps/i386/fpu/ftestexcept.c @@ -32,7 +32,7 @@ fetestexcept (int excepts) __asm__ ("fnstsw %0" : "=a" (temp)); /* If the CPU supports SSE we test the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) __asm__ ("stmxcsr %0" : "=m" (*&xtemp)); return (temp | xtemp) & excepts & FE_ALL_EXCEPT; diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c index 8da7d4bd66..9cd14a103b 100644 --- a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c +++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c @@ -23,7 +23,7 @@ extern float __cosf_sse2 (float); extern float __cosf_ia32 (float); -libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32); +libm_ifunc (__cosf, CPU_FEATURE_USABLE (SSE2) ? __cosf_sse2 : __cosf_ia32); libm_alias_float (__cos, cos); #define COSF __cosf_ia32 diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c index 06d094dced..9b479142d0 100644 --- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c +++ b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c @@ -24,7 +24,7 @@ extern void __sincosf_sse2 (float, float *, float *); extern void __sincosf_ia32 (float, float *, float *); libm_ifunc (__sincosf, - HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32); + CPU_FEATURE_USABLE (SSE2) ? __sincosf_sse2 : __sincosf_ia32); libm_alias_float (__sincos, sincos); #define SINCOSF __sincosf_ia32 diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c index abd355ebac..84977e63e8 100644 --- a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c +++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c @@ -23,7 +23,7 @@ extern float __sinf_sse2 (float); extern float __sinf_ia32 (float); -libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32); +libm_ifunc (__sinf, CPU_FEATURE_USABLE (SSE2) ? __sinf_sse2 : __sinf_ia32); libm_alias_float (__sin, sin); #define SINF __sinf_ia32 #include diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c index 23774fbe8a..89afdc0326 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c @@ -38,35 +38,35 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/bcopy.S. */ IFUNC_IMPL (i, name, bcopy, - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, bcopy, CPU_FEATURE_USABLE (SSSE3), __bcopy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, bcopy, CPU_FEATURE_USABLE (SSSE3), __bcopy_ssse3) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, bcopy, CPU_FEATURE_USABLE (SSE2), __bcopy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) /* Support sysdeps/i386/i686/multiarch/bzero.S. */ IFUNC_IMPL (i, name, bzero, - IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, bzero, CPU_FEATURE_USABLE (SSE2), __bzero_sse2_rep) - IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, bzero, CPU_FEATURE_USABLE (SSE2), __bzero_sse2) IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32)) /* Support sysdeps/i386/i686/multiarch/memchr.S. */ IFUNC_IMPL (i, name, memchr, - IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memchr, CPU_FEATURE_USABLE (SSE2), __memchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memchr, CPU_FEATURE_USABLE (SSE2), __memchr_sse2) IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32)) /* Support sysdeps/i386/i686/multiarch/memcmp.S. */ IFUNC_IMPL (i, name, memcmp, - IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_2), __memcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3), __memcmp_ssse3) IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32)) @@ -74,13 +74,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */ IFUNC_IMPL (i, name, __memmove_chk, IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __memmove_chk_ssse3_rep) IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __memmove_chk_ssse3) IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSE2), + CPU_FEATURE_USABLE (SSE2), __memmove_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, __memmove_chk_ia32)) @@ -88,19 +88,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/memmove.S. */ IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3), __memmove_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3), __memmove_ssse3) - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSE2), __memmove_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) /* Support sysdeps/i386/i686/multiarch/memrchr.S. */ IFUNC_IMPL (i, name, memrchr, - IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memrchr, CPU_FEATURE_USABLE (SSE2), __memrchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memrchr, CPU_FEATURE_USABLE (SSE2), __memrchr_sse2) IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32)) @@ -108,10 +108,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/memset_chk.S. */ IFUNC_IMPL (i, name, __memset_chk, IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_CPU_FEATURE (SSE2), + CPU_FEATURE_USABLE (SSE2), __memset_chk_sse2_rep) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_CPU_FEATURE (SSE2), + CPU_FEATURE_USABLE (SSE2), __memset_chk_sse2) IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_ia32)) @@ -119,102 +119,102 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/memset.S. */ IFUNC_IMPL (i, name, memset, - IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memset, CPU_FEATURE_USABLE (SSE2), __memset_sse2_rep) - IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memset, CPU_FEATURE_USABLE (SSE2), __memset_sse2) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32)) /* Support sysdeps/i386/i686/multiarch/rawmemchr.S. */ IFUNC_IMPL (i, name, rawmemchr, - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, rawmemchr, CPU_FEATURE_USABLE (SSE2), __rawmemchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, rawmemchr, CPU_FEATURE_USABLE (SSE2), __rawmemchr_sse2) IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32)) /* Support sysdeps/i386/i686/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (SSSE3), __stpncpy_ssse3) - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (SSE2), __stpncpy_sse2) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32)) /* Support sysdeps/i386/i686/multiarch/stpcpy.S. */ IFUNC_IMPL (i, name, stpcpy, - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (SSSE3), __stpcpy_ssse3) - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (SSE2), __stpcpy_sse2) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */ IFUNC_IMPL (i, name, strcasecmp, IFUNC_IMPL_ADD (array, i, strcasecmp, - HAS_CPU_FEATURE (SSE4_2), + CPU_FEATURE_USABLE (SSE4_2), __strcasecmp_sse4_2) IFUNC_IMPL_ADD (array, i, strcasecmp, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __strcasecmp_ssse3) IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32)) /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S. */ IFUNC_IMPL (i, name, strcasecmp_l, IFUNC_IMPL_ADD (array, i, strcasecmp_l, - HAS_CPU_FEATURE (SSE4_2), + CPU_FEATURE_USABLE (SSE4_2), __strcasecmp_l_sse4_2) IFUNC_IMPL_ADD (array, i, strcasecmp_l, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __strcasecmp_l_ssse3) IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, __strcasecmp_l_ia32)) /* Support sysdeps/i386/i686/multiarch/strcat.S. */ IFUNC_IMPL (i, name, strcat, - IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (SSSE3), __strcat_ssse3) - IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (SSE2), __strcat_sse2) IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32)) /* Support sysdeps/i386/i686/multiarch/strchr.S. */ IFUNC_IMPL (i, name, strchr, - IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strchr, CPU_FEATURE_USABLE (SSE2), __strchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strchr, CPU_FEATURE_USABLE (SSE2), __strchr_sse2) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32)) /* Support sysdeps/i386/i686/multiarch/strcmp.S. */ IFUNC_IMPL (i, name, strcmp, - IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2), __strcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSSE3), __strcmp_ssse3) IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32)) /* Support sysdeps/i386/i686/multiarch/strcpy.S. */ IFUNC_IMPL (i, name, strcpy, - IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (SSSE3), __strcpy_ssse3) - IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (SSE2), __strcpy_sse2) IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strcspn.S. */ IFUNC_IMPL (i, name, strcspn, - IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), __strcspn_sse42) IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32)) /* Support sysdeps/i386/i686/multiarch/strncase.S. */ IFUNC_IMPL (i, name, strncasecmp, IFUNC_IMPL_ADD (array, i, strncasecmp, - HAS_CPU_FEATURE (SSE4_2), + CPU_FEATURE_USABLE (SSE4_2), __strncasecmp_sse4_2) IFUNC_IMPL_ADD (array, i, strncasecmp, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __strncasecmp_ssse3) IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ia32)) @@ -222,91 +222,91 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/strncase_l.S. */ IFUNC_IMPL (i, name, strncasecmp_l, IFUNC_IMPL_ADD (array, i, strncasecmp_l, - HAS_CPU_FEATURE (SSE4_2), + CPU_FEATURE_USABLE (SSE4_2), __strncasecmp_l_sse4_2) IFUNC_IMPL_ADD (array, i, strncasecmp_l, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __strncasecmp_l_ssse3) IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, __strncasecmp_l_ia32)) /* Support sysdeps/i386/i686/multiarch/strncat.S. */ IFUNC_IMPL (i, name, strncat, - IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (SSSE3), __strncat_ssse3) - IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (SSE2), __strncat_sse2) IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32)) /* Support sysdeps/i386/i686/multiarch/strncpy.S. */ IFUNC_IMPL (i, name, strncpy, - IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (SSSE3), __strncpy_ssse3) - IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (SSE2), __strncpy_sse2) IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strnlen.S. */ IFUNC_IMPL (i, name, strnlen, - IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strnlen, CPU_FEATURE_USABLE (SSE2), __strnlen_sse2) IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32)) /* Support sysdeps/i386/i686/multiarch/strpbrk.S. */ IFUNC_IMPL (i, name, strpbrk, - IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), __strpbrk_sse42) IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32)) /* Support sysdeps/i386/i686/multiarch/strrchr.S. */ IFUNC_IMPL (i, name, strrchr, - IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strrchr, CPU_FEATURE_USABLE (SSE2), __strrchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strrchr, CPU_FEATURE_USABLE (SSE2), __strrchr_sse2) IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32)) /* Support sysdeps/i386/i686/multiarch/strspn.S. */ IFUNC_IMPL (i, name, strspn, - IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32)) /* Support sysdeps/i386/i686/multiarch/wcschr.S. */ IFUNC_IMPL (i, name, wcschr, - IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, wcschr, CPU_FEATURE_USABLE (SSE2), __wcschr_sse2) IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32)) /* Support sysdeps/i386/i686/multiarch/wcscmp.S. */ IFUNC_IMPL (i, name, wcscmp, - IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, wcscmp, CPU_FEATURE_USABLE (SSE2), __wcscmp_sse2) IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32)) /* Support sysdeps/i386/i686/multiarch/wcscpy.S. */ IFUNC_IMPL (i, name, wcscpy, - IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), __wcscpy_ssse3) IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32)) /* Support sysdeps/i386/i686/multiarch/wcslen.S. */ IFUNC_IMPL (i, name, wcslen, - IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, wcslen, CPU_FEATURE_USABLE (SSE2), __wcslen_sse2) IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32)) /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */ IFUNC_IMPL (i, name, wcsrchr, - IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, wcsrchr, CPU_FEATURE_USABLE (SSE2), __wcsrchr_sse2) IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32)) /* Support sysdeps/i386/i686/multiarch/wmemcmp.S. */ IFUNC_IMPL (i, name, wmemcmp, - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_2), __wmemcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3), __wmemcmp_ssse3) IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32)) @@ -314,64 +314,64 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */ IFUNC_IMPL (i, name, __memcpy_chk, IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __memcpy_chk_ssse3_rep) IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __memcpy_chk_ssse3) IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSE2), + CPU_FEATURE_USABLE (SSE2), __memcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/memcpy.S. */ IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3), __memcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3), __memcpy_ssse3) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSE2), __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */ IFUNC_IMPL (i, name, __mempcpy_chk, IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __mempcpy_chk_ssse3_rep) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSSE3), + CPU_FEATURE_USABLE (SSSE3), __mempcpy_chk_ssse3) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSE2), + CPU_FEATURE_USABLE (SSE2), __mempcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, __mempcpy_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */ IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3), __mempcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3), __mempcpy_ssse3) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSE2), __mempcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strlen.S. */ IFUNC_IMPL (i, name, strlen, - IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strlen, CPU_FEATURE_USABLE (SSE2), __strlen_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), + IFUNC_IMPL_ADD (array, i, strlen, CPU_FEATURE_USABLE (SSE2), __strlen_sse2) IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32)) /* Support sysdeps/i386/i686/multiarch/strncmp.S. */ IFUNC_IMPL (i, name, strncmp, - IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), + IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2), __strncmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), + IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSSE3), __strncmp_ssse3) IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32)) #endif diff --git a/sysdeps/i386/i686/multiarch/ifunc-memmove.h b/sysdeps/i386/i686/multiarch/ifunc-memmove.h index a590048d1d..c05cb6dd4f 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-memmove.h +++ b/sysdeps/i386/i686/multiarch/ifunc-memmove.h @@ -33,7 +33,7 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); - if (CPU_FEATURES_CPU_P (cpu_features, SSSE3)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) { if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Rep_String)) return OPTIMIZE (ssse3_rep); diff --git a/sysdeps/i386/i686/multiarch/ifunc-memset.h b/sysdeps/i386/i686/multiarch/ifunc-memset.h index 14199c30fd..bead331a9d 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-memset.h +++ b/sysdeps/i386/i686/multiarch/ifunc-memset.h @@ -28,7 +28,7 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSE2)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSE2)) { if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Rep_String)) return OPTIMIZE (sse2_rep); diff --git a/sysdeps/i386/i686/multiarch/ifunc-sse2-bsf.h b/sysdeps/i386/i686/multiarch/ifunc-sse2-bsf.h index 8b6fa6447d..0d302a3dcd 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-sse2-bsf.h +++ b/sysdeps/i386/i686/multiarch/ifunc-sse2-bsf.h @@ -28,7 +28,7 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSE2)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSE2)) { if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) return OPTIMIZE (sse2); diff --git a/sysdeps/i386/i686/multiarch/ifunc-sse2-ssse3.h b/sysdeps/i386/i686/multiarch/ifunc-sse2-ssse3.h index 77b615e40d..c10ca4a9df 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-sse2-ssse3.h +++ b/sysdeps/i386/i686/multiarch/ifunc-sse2-ssse3.h @@ -29,11 +29,11 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSE2) + if (CPU_FEATURE_USABLE_P (cpu_features, SSE2) && CPU_FEATURES_ARCH_P (cpu_features, Fast_Rep_String)) return OPTIMIZE (sse2); - if (CPU_FEATURES_CPU_P (cpu_features, SSSE3)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) return OPTIMIZE (ssse3); return OPTIMIZE (ia32); diff --git a/sysdeps/i386/i686/multiarch/ifunc-sse2.h b/sysdeps/i386/i686/multiarch/ifunc-sse2.h index c0dd85e2bb..58794a2806 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-sse2.h +++ b/sysdeps/i386/i686/multiarch/ifunc-sse2.h @@ -27,7 +27,7 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSE2)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSE2)) return OPTIMIZE (sse2); return OPTIMIZE (ia32); diff --git a/sysdeps/i386/i686/multiarch/ifunc-sse4_2.h b/sysdeps/i386/i686/multiarch/ifunc-sse4_2.h index b9b06d5996..014be1d5f7 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-sse4_2.h +++ b/sysdeps/i386/i686/multiarch/ifunc-sse4_2.h @@ -27,7 +27,7 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse42); return OPTIMIZE (ia32); diff --git a/sysdeps/i386/i686/multiarch/ifunc-ssse3-sse4_2.h b/sysdeps/i386/i686/multiarch/ifunc-ssse3-sse4_2.h index b4074f3f8f..39bfea986d 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-ssse3-sse4_2.h +++ b/sysdeps/i386/i686/multiarch/ifunc-ssse3-sse4_2.h @@ -29,10 +29,10 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse4_2); - if (CPU_FEATURES_CPU_P (cpu_features, SSSE3)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) return OPTIMIZE (ssse3); return OPTIMIZE (ia32); diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c index 90f649f52a..0729853e21 100644 --- a/sysdeps/i386/i686/multiarch/s_fma.c +++ b/sysdeps/i386/i686/multiarch/s_fma.c @@ -27,7 +27,7 @@ extern double __fma_ia32 (double x, double y, double z) attribute_hidden; extern double __fma_fma (double x, double y, double z) attribute_hidden; libm_ifunc (__fma, - HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32); + CPU_FEATURE_USABLE (FMA) ? __fma_fma : __fma_ia32); libm_alias_double (__fma, fma) #define __fma __fma_ia32 diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c index 27757eca9d..20f965c342 100644 --- a/sysdeps/i386/i686/multiarch/s_fmaf.c +++ b/sysdeps/i386/i686/multiarch/s_fmaf.c @@ -27,7 +27,7 @@ extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden; extern float __fmaf_fma (float x, float y, float z) attribute_hidden; libm_ifunc (__fmaf, - HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32); + CPU_FEATURE_USABLE (FMA) ? __fmaf_fma : __fmaf_ia32); libm_alias_float (__fma, fma) #define __fmaf __fmaf_ia32 diff --git a/sysdeps/i386/i686/multiarch/wcscpy.c b/sysdeps/i386/i686/multiarch/wcscpy.c index 51347d70f5..f0038bc4a2 100644 --- a/sysdeps/i386/i686/multiarch/wcscpy.c +++ b/sysdeps/i386/i686/multiarch/wcscpy.c @@ -34,7 +34,7 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (CPU_FEATURES_CPU_P (cpu_features, SSSE3)) + if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) return OPTIMIZE (ssse3); return OPTIMIZE (ia32); diff --git a/sysdeps/i386/setfpucw.c b/sysdeps/i386/setfpucw.c index 68f5b2e86c..c640a72cc2 100644 --- a/sysdeps/i386/setfpucw.c +++ b/sysdeps/i386/setfpucw.c @@ -39,7 +39,7 @@ __setfpucw (fpu_control_t set) __asm__ ("fldcw %0" : : "m" (*&cw)); /* If the CPU supports SSE, we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) + if (CPU_FEATURE_USABLE (SSE)) { unsigned int xnew_exc; diff --git a/sysdeps/unix/sysv/linux/x86/elision-conf.c b/sysdeps/unix/sysv/linux/x86/elision-conf.c index b38b4250e8..ecdb0378e3 100644 --- a/sysdeps/unix/sysv/linux/x86/elision-conf.c +++ b/sysdeps/unix/sysv/linux/x86/elision-conf.c @@ -63,7 +63,7 @@ do_set_elision_enable (int32_t elision_enable) if __libc_enable_secure isn't enabled since elision_enable will be set according to the default, which is disabled. */ if (elision_enable == 1) - __pthread_force_elision = HAS_CPU_FEATURE (RTM) ? 1 : 0; + __pthread_force_elision = CPU_FEATURE_USABLE (RTM) ? 1 : 0; } /* The pthread->elision_enable tunable is 0 or 1 indicating that elision diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index 5366a37ea0..217c21c34f 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -583,7 +583,7 @@ get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr, /* A value of 0 for the HTT bit indicates there is only a single logical processor. */ - if (HAS_CPU_FEATURE (HTT)) + if (CPU_FEATURE_USABLE (HTT)) { /* Figure out the number of logical threads that share the highest cache level. */ @@ -732,7 +732,7 @@ intel_bug_no_cache_info: /* Assume that all logical threads share the highest cache level. */ threads - = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx + = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16) & 0xff); } @@ -867,14 +867,14 @@ init_cacheinfo (void) unsigned int minimum_rep_movsb_threshold; /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */ unsigned int rep_movsb_threshold; - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) + && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) { rep_movsb_threshold = 2048 * (64 / 16); minimum_rep_movsb_threshold = 64 * 8; } - else if (CPU_FEATURES_ARCH_P (cpu_features, - AVX_Fast_Unaligned_Load)) + else if (CPU_FEATURE_PREFERRED_P (cpu_features, + AVX_Fast_Unaligned_Load)) { rep_movsb_threshold = 2048 * (32 / 16); minimum_rep_movsb_threshold = 32 * 8; diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index c7673a2eb9..4c24ba7c31 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -42,73 +42,109 @@ extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *) #endif static void -get_extended_indices (struct cpu_features *cpu_features) +update_usable (struct cpu_features *cpu_features) { - unsigned int eax, ebx, ecx, edx; - __cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax >= 0x80000001) - __cpuid (0x80000001, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); - if (eax >= 0x80000007) - __cpuid (0x80000007, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx); - if (eax >= 0x80000008) - __cpuid (0x80000008, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx); -} - -static void -get_common_indices (struct cpu_features *cpu_features, - unsigned int *family, unsigned int *model, - unsigned int *extended_model, unsigned int *stepping) -{ - if (family) - { - unsigned int eax; - __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); - cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; - *extended_model = (eax >> 12) & 0xf0; - *stepping = eax & 0x0f; - if (*family == 0x0f) - { - *family += (eax >> 20) & 0xff; - *model += *extended_model; - } - } - - if (cpu_features->basic.max_cpuid >= 7) - { - __cpuid_count (7, 0, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); - __cpuid_count (7, 1, - cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].edx); - } - - if (cpu_features->basic.max_cpuid >= 0xd) - __cpuid_count (0xd, 1, - cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx); + /* Before COMMON_CPUID_INDEX_80000001, copy the cpuid array elements to + the usable array. */ + unsigned int i; + for (i = 0; i < COMMON_CPUID_INDEX_80000001; i++) + cpu_features->features[i].usable = cpu_features->features[i].cpuid; + + /* Before COMMON_CPUID_INDEX_80000001, clear the unknown usable bits + and the always zero bits. */ + CPU_FEATURE_UNSET (cpu_features, INDEX_1_ECX_16); + CPU_FEATURE_UNSET (cpu_features, INDEX_1_ECX_31); + CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_10); + CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_20); + CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_30); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EBX_6); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EBX_22); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_13); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_15); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_16); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_23); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_24); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_26); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_0); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_1); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_5); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_6); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_7); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_9); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_11); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_12); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_13); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_17); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_19); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_21); + CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_23); + + /* EAX/EBX from COMMON_CPUID_INDEX_1 and EAX from COMMON_CPUID_INDEX_7 + aren't used for CPU feature detection. */ + cpu_features->features[COMMON_CPUID_INDEX_1].usable.eax = 0; + cpu_features->features[COMMON_CPUID_INDEX_1].usable.ebx = 0; + cpu_features->features[COMMON_CPUID_INDEX_7].usable.eax = 0; + + /* Starting from COMMON_CPUID_INDEX_80000001, copy the cpuid bits to + usable bits. */ + CPU_FEATURE_SET_USABLE (cpu_features, LAHF64_SAHF64); + CPU_FEATURE_SET_USABLE (cpu_features, SVM); + CPU_FEATURE_SET_USABLE (cpu_features, LZCNT); + CPU_FEATURE_SET_USABLE (cpu_features, SSE4A); + CPU_FEATURE_SET_USABLE (cpu_features, PREFETCHW); + CPU_FEATURE_SET_USABLE (cpu_features, XOP); + CPU_FEATURE_SET_USABLE (cpu_features, LWP); + CPU_FEATURE_SET_USABLE (cpu_features, FMA4); + CPU_FEATURE_SET_USABLE (cpu_features, TBM); + CPU_FEATURE_SET_USABLE (cpu_features, SYSCALL_SYSRET); + CPU_FEATURE_SET_USABLE (cpu_features, NX); + CPU_FEATURE_SET_USABLE (cpu_features, PAGE1GB); + CPU_FEATURE_SET_USABLE (cpu_features, RDTSCP); + CPU_FEATURE_SET_USABLE (cpu_features, LM); + CPU_FEATURE_SET_USABLE (cpu_features, XSAVEOPT); + CPU_FEATURE_SET_USABLE (cpu_features, XSAVEC); + CPU_FEATURE_SET_USABLE (cpu_features, XGETBV_ECX_1); + CPU_FEATURE_SET_USABLE (cpu_features, XSAVES); + CPU_FEATURE_SET_USABLE (cpu_features, XFD); + CPU_FEATURE_SET_USABLE (cpu_features, INVARIANT_TSC); + CPU_FEATURE_SET_USABLE (cpu_features, WBNOINVD); + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BF16); + + /* MPX has been deprecated. */ + CPU_FEATURE_UNSET (cpu_features, MPX); + + /* Clear the usable bits which require OS support. */ + CPU_FEATURE_UNSET (cpu_features, FMA); + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, F16C); + CPU_FEATURE_UNSET (cpu_features, AVX2); + CPU_FEATURE_UNSET (cpu_features, AVX512F); + CPU_FEATURE_UNSET (cpu_features, AVX512DQ); + CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA); + CPU_FEATURE_UNSET (cpu_features, AVX512PF); + CPU_FEATURE_UNSET (cpu_features, AVX512ER); + CPU_FEATURE_UNSET (cpu_features, AVX512CD); + CPU_FEATURE_UNSET (cpu_features, AVX512BW); + CPU_FEATURE_UNSET (cpu_features, AVX512VL); + CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI); + CPU_FEATURE_UNSET (cpu_features, PKU); + CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2); + CPU_FEATURE_UNSET (cpu_features, VAES); + CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ); + CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI); + CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG); + CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ); + CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW); + CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS); + CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT); + CPU_FEATURE_UNSET (cpu_features, AMX_BF16); + CPU_FEATURE_UNSET (cpu_features, AMX_TILE); + CPU_FEATURE_UNSET (cpu_features, AMX_INT8); + CPU_FEATURE_UNSET (cpu_features, XOP); + CPU_FEATURE_UNSET (cpu_features, FMA4); + CPU_FEATURE_UNSET (cpu_features, XSAVEC); + CPU_FEATURE_UNSET (cpu_features, XFD); + CPU_FEATURE_UNSET (cpu_features, AVX512_BF16); /* Can we call xgetbv? */ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) @@ -123,40 +159,28 @@ get_common_indices (struct cpu_features *cpu_features, /* Determine if AVX is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX)) { - cpu_features->usable[index_arch_AVX_Usable] - |= bit_arch_AVX_Usable; + CPU_FEATURE_SET (cpu_features, AVX); /* The following features depend on AVX being usable. */ /* Determine if AVX2 is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX2)) - { - cpu_features->usable[index_arch_AVX2_Usable] - |= bit_arch_AVX2_Usable; - - /* Unaligned load with 256-bit AVX registers are faster on - Intel/AMD processors with AVX2. */ - cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - |= bit_arch_AVX_Fast_Unaligned_Load; - } + { + CPU_FEATURE_SET (cpu_features, AVX2); + + /* Unaligned load with 256-bit AVX registers are faster + on Intel/AMD processors with AVX2. */ + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] + |= bit_arch_AVX_Fast_Unaligned_Load; + } /* Determine if FMA is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, FMA)) - cpu_features->usable[index_arch_FMA_Usable] - |= bit_arch_FMA_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, FMA); /* Determine if VAES is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, VAES)) - cpu_features->usable[index_arch_VAES_Usable] - |= bit_arch_VAES_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, VAES); /* Determine if VPCLMULQDQ is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ)) - cpu_features->usable[index_arch_VPCLMULQDQ_Usable] - |= bit_arch_VPCLMULQDQ_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, VPCLMULQDQ); /* Determine if XOP is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, XOP)) - cpu_features->usable[index_arch_XOP_Usable] - |= bit_arch_XOP_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, XOP); /* Determine if F16C is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, F16C)) - cpu_features->usable[index_arch_F16C_Usable] - |= bit_arch_F16C_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, F16C); } /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and @@ -168,73 +192,41 @@ get_common_indices (struct cpu_features *cpu_features, /* Determine if AVX512F is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) { - cpu_features->usable[index_arch_AVX512F_Usable] - |= bit_arch_AVX512F_Usable; + CPU_FEATURE_SET (cpu_features, AVX512F); /* Determine if AVX512CD is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD)) - cpu_features->usable[index_arch_AVX512CD_Usable] - |= bit_arch_AVX512CD_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512CD); /* Determine if AVX512ER is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) - cpu_features->usable[index_arch_AVX512ER_Usable] - |= bit_arch_AVX512ER_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512ER); /* Determine if AVX512PF is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF)) - cpu_features->usable[index_arch_AVX512PF_Usable] - |= bit_arch_AVX512PF_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512PF); /* Determine if AVX512VL is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL)) - cpu_features->usable[index_arch_AVX512VL_Usable] - |= bit_arch_AVX512VL_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512VL); /* Determine if AVX512DQ is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)) - cpu_features->usable[index_arch_AVX512DQ_Usable] - |= bit_arch_AVX512DQ_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512DQ); /* Determine if AVX512BW is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW)) - cpu_features->usable[index_arch_AVX512BW_Usable] - |= bit_arch_AVX512BW_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512BW); /* Determine if AVX512_4FMAPS is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS)) - cpu_features->usable[index_arch_AVX512_4FMAPS_Usable] - |= bit_arch_AVX512_4FMAPS_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_4FMAPS); /* Determine if AVX512_4VNNIW is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW)) - cpu_features->usable[index_arch_AVX512_4VNNIW_Usable] - |= bit_arch_AVX512_4VNNIW_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_4VNNIW); /* Determine if AVX512_BITALG is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG)) - cpu_features->usable[index_arch_AVX512_BITALG_Usable] - |= bit_arch_AVX512_BITALG_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BITALG); /* Determine if AVX512_IFMA is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA)) - cpu_features->usable[index_arch_AVX512_IFMA_Usable] - |= bit_arch_AVX512_IFMA_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_IFMA); /* Determine if AVX512_VBMI is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI)) - cpu_features->usable[index_arch_AVX512_VBMI_Usable] - |= bit_arch_AVX512_VBMI_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VBMI); /* Determine if AVX512_VBMI2 is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2)) - cpu_features->usable[index_arch_AVX512_VBMI2_Usable] - |= bit_arch_AVX512_VBMI2_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VBMI2); /* Determine if is AVX512_VNNI usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI)) - cpu_features->usable[index_arch_AVX512_VNNI_Usable] - |= bit_arch_AVX512_VNNI_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VNNI); /* Determine if AVX512_VPOPCNTDQ is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ)) - cpu_features->usable[index_arch_AVX512_VPOPCNTDQ_Usable] - |= bit_arch_AVX512_VPOPCNTDQ_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, + AVX512_VPOPCNTDQ); /* Determine if AVX512_VP2INTERSECT is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, - AVX512_VP2INTERSECT)) - cpu_features->usable[index_arch_AVX512_VP2INTERSECT_Usable] - |= bit_arch_AVX512_VP2INTERSECT_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, + AVX512_VP2INTERSECT); /* Determine if AVX512_BF16 is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BF16)) - cpu_features->usable[index_arch_AVX512_BF16_Usable] - |= bit_arch_AVX512_BF16_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BF16); } } } @@ -244,19 +236,17 @@ get_common_indices (struct cpu_features *cpu_features, == (bit_XTILECFG_state | bit_XTILEDATA_state)) { /* Determine if AMX_BF16 is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16)) - cpu_features->usable[index_arch_AMX_BF16_Usable] - |= bit_arch_AMX_BF16_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AMX_BF16); /* Determine if AMX_TILE is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE)) - cpu_features->usable[index_arch_AMX_TILE_Usable] - |= bit_arch_AMX_TILE_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AMX_TILE); /* Determine if AMX_INT8 is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8)) - cpu_features->usable[index_arch_AMX_INT8_Usable] - |= bit_arch_AMX_INT8_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, AMX_INT8); } + + /* XFD is usable only when OSXSAVE is enabled. */ + CPU_FEATURE_SET_USABLE (cpu_features, XFD); + /* For _dl_runtime_resolve, set xsave_state_size to xsave area size + integer register save size and align it to 64 bytes. */ if (cpu_features->basic.max_cpuid >= 0xd) @@ -318,8 +308,7 @@ get_common_indices (struct cpu_features *cpu_features, { cpu_features->xsave_state_size = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); - cpu_features->usable[index_arch_XSAVEC_Usable] - |= bit_arch_XSAVEC_Usable; + CPU_FEATURE_SET (cpu_features, XSAVEC); } } } @@ -328,8 +317,79 @@ get_common_indices (struct cpu_features *cpu_features, /* Determine if PKU is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, OSPKE)) - cpu_features->usable[index_arch_PKU_Usable] - |= bit_arch_PKU_Usable; + CPU_FEATURE_SET (cpu_features, PKU); +} + +static void +get_extended_indices (struct cpu_features *cpu_features) +{ + unsigned int eax, ebx, ecx, edx; + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.eax, + cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.edx); + if (eax >= 0x80000007) + __cpuid (0x80000007, + cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.eax, + cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.edx); + if (eax >= 0x80000008) + __cpuid (0x80000008, + cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.eax, + cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.edx); +} + +static void +get_common_indices (struct cpu_features *cpu_features, + unsigned int *family, unsigned int *model, + unsigned int *extended_model, unsigned int *stepping) +{ + if (family) + { + unsigned int eax; + __cpuid (1, eax, + cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.edx); + cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; + *extended_model = (eax >> 12) & 0xf0; + *stepping = eax & 0x0f; + if (*family == 0x0f) + { + *family += (eax >> 20) & 0xff; + *model += *extended_model; + } + } + + if (cpu_features->basic.max_cpuid >= 7) + { + __cpuid_count (7, 0, + cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.eax, + cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.edx); + __cpuid_count (7, 1, + cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.eax, + cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.edx); + } + + if (cpu_features->basic.max_cpuid >= 0xd) + __cpuid_count (0xd, 1, + cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.eax, + cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ebx, + cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ecx, + cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.edx); + } _Static_assert (((index_arch_Fast_Unaligned_Load @@ -353,8 +413,6 @@ init_cpu_features (struct cpu_features *cpu_features) unsigned int stepping = 0; enum cpu_features_kind kind; - cpu_features->usable_p = cpu_features->usable; - #if !HAS_CPUID if (__get_cpuid_max (0, 0) == 0) { @@ -377,6 +435,8 @@ init_cpu_features (struct cpu_features *cpu_features) get_extended_indices (cpu_features); + update_usable (cpu_features); + if (family == 0x06) { model += extended_model; @@ -473,7 +533,7 @@ init_cpu_features (struct cpu_features *cpu_features) with stepping >= 4) to avoid TSX on kernels that weren't updated with the latest microcode package (which disables broken feature by default). */ - cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM; + CPU_FEATURE_UNSET (cpu_features, RTM); break; } } @@ -502,15 +562,15 @@ init_cpu_features (struct cpu_features *cpu_features) get_extended_indices (cpu_features); - ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + update_usable (cpu_features); - if (HAS_ARCH_FEATURE (AVX_Usable)) + ecx = cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx; + + if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) { /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and FMA4 requires AVX, determine if FMA4 is usable here. */ - if (CPU_FEATURES_CPU_P (cpu_features, FMA4)) - cpu_features->usable[index_arch_FMA4_Usable] - |= bit_arch_FMA4_Usable; + CPU_FEATURE_SET_USABLE (cpu_features, FMA4); } if (family == 0x15) @@ -541,13 +601,15 @@ init_cpu_features (struct cpu_features *cpu_features) get_extended_indices (cpu_features); + update_usable (cpu_features); + model += extended_model; if (family == 0x6) { if (model == 0xf || model == 0x19) { - cpu_features->usable[index_arch_AVX_Usable] - &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable); + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_Slow_SSE4_2] |= bit_arch_Slow_SSE4_2; @@ -560,8 +622,8 @@ init_cpu_features (struct cpu_features *cpu_features) { if (model == 0x1b) { - cpu_features->usable[index_arch_AVX_Usable] - &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable); + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_Slow_SSE4_2] |= bit_arch_Slow_SSE4_2; @@ -571,8 +633,8 @@ init_cpu_features (struct cpu_features *cpu_features) } else if (model == 0x3b) { - cpu_features->usable[index_arch_AVX_Usable] - &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable); + CPU_FEATURE_UNSET (cpu_features, AVX); + CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] &= ~bit_arch_AVX_Fast_Unaligned_Load; @@ -583,6 +645,7 @@ init_cpu_features (struct cpu_features *cpu_features) { kind = arch_kind_other; get_common_indices (cpu_features, NULL, NULL, NULL, NULL); + update_usable (cpu_features); } /* Support i586 if CX8 is available. */ @@ -629,31 +692,30 @@ no_cpuid: { const char *platform = NULL; - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && CPU_FEATURES_CPU_P (cpu_features, AVX512CD)) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD)) { - if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER)) { - if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF)) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF)) platform = "xeon_phi"; } else { - if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW) - && CPU_FEATURES_CPU_P (cpu_features, AVX512DQ) - && CPU_FEATURES_CPU_P (cpu_features, AVX512VL)) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1; } } if (platform == NULL - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) - && CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) - && CPU_FEATURES_CPU_P (cpu_features, BMI1) - && CPU_FEATURES_CPU_P (cpu_features, BMI2) - && CPU_FEATURES_CPU_P (cpu_features, LZCNT) - && CPU_FEATURES_CPU_P (cpu_features, MOVBE) - && CPU_FEATURES_CPU_P (cpu_features, POPCNT)) + && CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURE_USABLE_P (cpu_features, FMA) + && CPU_FEATURE_USABLE_P (cpu_