diff options
| -rw-r--r-- | sysdeps/x86_64/memmove.S | 32 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-impl-list.c | 467 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-memmove.h | 47 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S | 15 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S | 3 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S | 11 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S | 16 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-shlib-compat.h | 26 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S | 36 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memmove-ssse3.S | 4 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/rtld-memmove.S | 18 |
11 files changed, 403 insertions, 272 deletions
diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S index 78e8d974d9..19527690eb 100644 --- a/sysdeps/x86_64/memmove.S +++ b/sysdeps/x86_64/memmove.S @@ -16,17 +16,6 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <sysdep.h> - -#define VEC_SIZE 16 -#define VEC(i) xmm##i -#define PREFETCHNT prefetchnta -#define VMOVNT movntdq -/* Use movups and movaps for smaller code sizes. */ -#define VMOVU movups -#define VMOVA movaps -#define MOV_SIZE 3 -#define SECTION(p) p #ifdef USE_MULTIARCH # if !IS_IN (libc) @@ -42,12 +31,18 @@ #if !defined USE_MULTIARCH || !IS_IN (libc) # define MEMPCPY_SYMBOL(p,s) __mempcpy #endif -#ifndef MEMMOVE_SYMBOL -# define MEMMOVE_CHK_SYMBOL(p,s) p -# define MEMMOVE_SYMBOL(p,s) memmove -#endif -#include "multiarch/memmove-vec-unaligned-erms.S" +#define MEMMOVE_CHK_SYMBOL(p,s) p +#define MEMMOVE_SYMBOL(p,s) memmove + + +#define DEFAULT_IMPL_V1 "multiarch/memmove-sse2-unaligned-erms.S" +#define DEFAULT_IMPL_V3 "multiarch/memmove-avx-unaligned-erms.S" +#define DEFAULT_IMPL_V4 "multiarch/memmove-evex-unaligned-erms.S" + +#include "isa-default-impl.h" + +weak_alias (__mempcpy, mempcpy) #ifndef USE_MULTIARCH libc_hidden_builtin_def (memmove) @@ -59,13 +54,10 @@ libc_hidden_def (__mempcpy) weak_alias (__mempcpy, mempcpy) libc_hidden_builtin_def (mempcpy) + # if defined SHARED && IS_IN (libc) # undef memcpy # include <shlib-compat.h> versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14); - -# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14) -compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5); -# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index b84acfead2..7858aa316f 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -101,84 +101,96 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #ifdef SHARED /* Support sysdeps/x86_64/multiarch/memmove_chk.c. */ IFUNC_IMPL (i, name, __memmove_chk, - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX512F), - __memmove_chk_avx512_no_vzeroupper) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_chk_avx512_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_chk_avx512_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX), - __memmove_chk_avx_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX), - __memmove_chk_avx_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memmove_chk_avx_unaligned_rtm) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memmove_chk_avx_unaligned_erms_rtm) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_chk_evex_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_chk_evex_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - CPU_FEATURE_USABLE (SSSE3), - __memmove_chk_ssse3) IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, - __memmove_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, - __memmove_chk_sse2_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, - __memmove_chk_erms)) + __memmove_chk_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX512F), + __memmove_chk_avx512_no_vzeroupper) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_chk_avx512_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_chk_avx512_unaligned_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_chk_evex_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_chk_evex_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX), + __memmove_chk_avx_unaligned) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (AVX), + __memmove_chk_avx_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memmove_chk_avx_unaligned_rtm) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memmove_chk_avx_unaligned_erms_rtm) + /* By V3 we assume fast aligned copy. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk, + CPU_FEATURE_USABLE (SSSE3), + __memmove_chk_ssse3) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2 (SSSE3 is too + optimized around aligned copy to be better as general + purpose memmove). */ + X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk, 1, + __memmove_chk_sse2_unaligned) + X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk, 1, + __memmove_chk_sse2_unaligned_erms)) #endif /* Support sysdeps/x86_64/multiarch/memmove.c. */ IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX), - __memmove_avx_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX), - __memmove_avx_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memmove, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memmove_avx_unaligned_rtm) - IFUNC_IMPL_ADD (array, i, memmove, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memmove_avx_unaligned_erms_rtm) - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_evex_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_evex_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX512F), - __memmove_avx512_no_vzeroupper) - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_avx512_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, - CPU_FEATURE_USABLE (AVX512VL), - __memmove_avx512_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3), - __memmove_ssse3) - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms) IFUNC_IMPL_ADD (array, i, memmove, 1, - __memmove_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, 1, - __memmove_sse2_unaligned_erms)) + __memmove_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, memmove, + CPU_FEATURE_USABLE (AVX512F), + __memmove_avx512_no_vzeroupper) + X86_IFUNC_IMPL_ADD_V4 (array, i, memmove, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_avx512_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, memmove, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_avx512_unaligned_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, memmove, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_evex_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, memmove, + CPU_FEATURE_USABLE (AVX512VL), + __memmove_evex_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, memmove, + CPU_FEATURE_USABLE (AVX), + __memmove_avx_unaligned) + X86_IFUNC_IMPL_ADD_V3 (array, i, memmove, + CPU_FEATURE_USABLE (AVX), + __memmove_avx_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, memmove, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memmove_avx_unaligned_rtm) + X86_IFUNC_IMPL_ADD_V3 (array, i, memmove, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memmove_avx_unaligned_erms_rtm) + /* By V3 we assume fast aligned copy. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, memmove, + CPU_FEATURE_USABLE (SSSE3), + __memmove_ssse3) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2 (SSSE3 is too + optimized around aligned copy to be better as general + purpose memmove). */ + X86_IFUNC_IMPL_ADD_V2 (array, i, memmove, 1, + __memmove_sse2_unaligned) + X86_IFUNC_IMPL_ADD_V2 (array, i, memmove, 1, + __memmove_sse2_unaligned_erms)) /* Support sysdeps/x86_64/multiarch/memrchr.c. */ IFUNC_IMPL (i, name, memrchr, @@ -832,165 +844,190 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #ifdef SHARED /* Support sysdeps/x86_64/multiarch/memcpy_chk.c. */ IFUNC_IMPL (i, name, __memcpy_chk, - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX512F), - __memcpy_chk_avx512_no_vzeroupper) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_chk_avx512_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_chk_avx512_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX), - __memcpy_chk_avx_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX), - __memcpy_chk_avx_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memcpy_chk_avx_unaligned_rtm) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memcpy_chk_avx_unaligned_erms_rtm) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_chk_evex_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_chk_evex_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - CPU_FEATURE_USABLE (SSSE3), - __memcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, - __memcpy_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, - __memcpy_chk_sse2_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, - __memcpy_chk_erms)) + __memcpy_chk_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX512F), + __memcpy_chk_avx512_no_vzeroupper) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_chk_avx512_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_chk_avx512_unaligned_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_chk_evex_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_chk_evex_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX), + __memcpy_chk_avx_unaligned) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (AVX), + __memcpy_chk_avx_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memcpy_chk_avx_unaligned_rtm) + X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memcpy_chk_avx_unaligned_erms_rtm) + /* By V3 we assume fast aligned copy. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk, + CPU_FEATURE_USABLE (SSSE3), + __memcpy_chk_ssse3) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2 (SSSE3 is too + optimized around aligned copy to be better as general + purpose memmove). */ + X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk, 1, + __memcpy_chk_sse2_unaligned) + X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk, 1, + __memcpy_chk_sse2_unaligned_erms)) #endif /* Support sysdeps/x86_64/multiarch/memcpy.c. */ IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX), - __memcpy_avx_unaligned) - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX), - __memcpy_avx_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memcpy, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memcpy_avx_unaligned_rtm) - IFUNC_IMPL_ADD (array, i, memcpy, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __memcpy_avx_unaligned_erms_rtm) - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_evex_unaligned) - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_evex_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3), - __memcpy_ssse3) - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX512F), - __memcpy_avx512_no_vzeroupper) - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_avx512_unaligned) - IFUNC_IMPL_ADD (array, i, memcpy, - CPU_FEATURE_USABLE (AVX512VL), - __memcpy_avx512_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, - __memcpy_sse2_unaligned_erms) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms)) + __memcpy_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX512F), + __memcpy_avx512_no_vzeroupper) + X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_avx512_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_avx512_unaligned_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_evex_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX512VL), + __memcpy_evex_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX), + __memcpy_avx_unaligned) + X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy, + CPU_FEATURE_USABLE (AVX), + __memcpy_avx_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memcpy_avx_unaligned_rtm) + X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __memcpy_avx_unaligned_erms_rtm) + /* By V3 we assume fast aligned copy. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy, + CPU_FEATURE_USABLE (SSSE3), + __memcpy_ssse3) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2 (SSSE3 is too + optimized around aligned copy to be better as general + purpose memmove). */ + X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy, 1, + __memcpy_sse2_unaligned) + X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy, 1, + __memcpy_sse2_unaligned_erms)) #ifdef SHARED /* Support sysdeps/x86_64/multiarch/mempcpy_chk.c. */ IFUNC_IMPL (i, name, __mempcpy_chk, - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX512F), - __mempcpy_chk_avx512_no_vzeroupper) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_chk_avx512_unaligned) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_chk_avx512_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX), - __mempcpy_chk_avx_unaligned) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX), - __mempcpy_chk_avx_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __mempcpy_chk_avx_unaligned_rtm) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __mempcpy_chk_avx_unaligned_erms_rtm) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_chk_evex_unaligned) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_chk_evex_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - CPU_FEATURE_USABLE (SSSE3), - __mempcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, - __mempcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, - __mempcpy_chk_sse2_unaligned_erms) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, - __mempcpy_chk_erms)) + __mempcpy_chk_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX512F), + __mempcpy_chk_avx512_no_vzeroupper) + X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_chk_avx512_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_chk_avx512_unaligned_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_chk_evex_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_chk_evex_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX), + __mempcpy_chk_avx_unaligned) + X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (AVX), + __mempcpy_chk_avx_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __mempcpy_chk_avx_unaligned_rtm) + X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __mempcpy_chk_avx_unaligned_erms_rtm) + /* By V3 we assume fast aligned copy. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk, + CPU_FEATURE_USABLE (SSSE3), + __mempcpy_chk_ssse3) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2 (SSSE3 is too + optimized around aligned copy to be better as general + purpose memmove). */ + X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk, 1, + __mempcpy_chk_sse2_unaligned) + X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk, 1, + __mempcpy_chk_sse2_unaligned_erms)) #endif /* Support sysdeps/x86_64/multiarch/mempcpy.c. */ IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX512F), - __mempcpy_avx512_no_vzeroupper) - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_avx512_unaligned) - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_avx512_unaligned_erms) - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX), - __mempcpy_avx_unaligned) - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX), - __mempcpy_avx_unaligned_erms) - IFUNC_IMPL_ADD (array, i, mempcpy, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __mempcpy_avx_unaligned_rtm) - IFUNC_IMPL_ADD (array, i, mempcpy, - (CPU_FEATURE_USABLE (AVX) - && CPU_FEATURE_USABLE (RTM)), - __mempcpy_avx_unaligned_erms_rtm) - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_evex_unaligned) - IFUNC_IMPL_ADD (array, i, mempcpy, - CPU_FEATURE_USABLE (AVX512VL), - __mempcpy_evex_unaligned_erms) - IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3), - __mempcpy_ssse3) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, - __mempcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, mempcpy, 1, - __mempcpy_sse2_unaligned_erms) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms)) + __mempcpy_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX512F), + __mempcpy_avx512_no_vzeroupper) + X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_avx512_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_avx512_unaligned_erms) + X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_evex_unaligned) + X86_IFUNC_IMPL_ADD_V4 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX512VL), + __mempcpy_evex_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX), + __mempcpy_avx_unaligned) + X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy, + CPU_FEATURE_USABLE (AVX), + __mempcpy_avx_unaligned_erms) + X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __mempcpy_avx_unaligned_rtm) + X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy, + (CPU_FEATURE_USABLE (AVX) + && CPU_FEATURE_USABLE (RTM)), + __mempcpy_avx_unaligned_erms_rtm) + /* By V3 we assume fast aligned copy. */ + X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy, + CPU_FEATURE_USABLE (SSSE3), + __mempcpy_ssse3) + /* ISA V2 wrapper for SSE2 implementation because the SSE2 + implementation is also used at ISA level 2 (SSSE3 is too + optimized around aligned copy to be better as general + purpose memmove). */ + X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy, 1, + __mempcpy_sse2_unaligned) + X86_IFUNC_IMPL_AD |
