diff options
| author | H.J. Lu <hongjiu.lu@intel.com> | 2010-01-12 11:22:03 -0800 |
|---|---|---|
| committer | Ulrich Drepper <drepper@redhat.com> | 2010-01-12 11:22:03 -0800 |
| commit | 3af48cbdfaeb8bc389de1caeb33bc29811da80e8 (patch) | |
| tree | 94a209777ab8c7e24cff9e50660a4075e6338594 | |
| parent | 4bfc6ab9ae3b259caa8b12229f0c67b4b514e9cd (diff) | |
| download | glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.tar.xz glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.zip | |
Optimize 32bit memset/memcpy with SSE2/SSSE3.
36 files changed, 6336 insertions, 15 deletions
@@ -1,3 +1,65 @@ +2010-01-12 H.J. Lu <hongjiu.lu@intel.com> + + * sysdeps/i386/i686/bcopy.S: New file. + + * sysdeps/i386/i686/cacheinfo.c (__x86_64_data_cache_size): Define. + + * sysdeps/i386/i686/memcpy.S (__memcpy_chk): Use ENTRY_CHK + and END_CHK. + * sysdeps/i386/i686/memmove.S (__memmove_chk): Likewise. + * sysdeps/i386/i686/mempcpy.S (__mempcpy_chk): Likewise. + * sysdeps/i386/i686/memset.S (__memset_chk): Likewise. + + * sysdeps/i386/i686/memmove.S: Support USE_AS_BCOPY. + + * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add + bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 memmove-ssse3 + memcpy-ssse3-rep mempcpy-ssse3-rep memmove-ssse3-rep + bcopy-ssse3 bcopy-ssse3-rep memset-sse2-rep bzero-sse2-rep + * sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S: New file. + * sysdeps/i386/i686/multiarch/bcopy-ssse3.S: New file. + * sysdeps/i386/i686/multiarch/bcopy.S: New file. + * sysdeps/i386/i686/multiarch/bzero-sse2-rep.S: New file. + * sysdeps/i386/i686/multiarch/bzero-sse2.S: New file. + * sysdeps/i386/i686/multiarch/bzero.S: New file. + * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: New file. + * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: New file. + * sysdeps/i386/i686/multiarch/memcpy.S: New file. + * sysdeps/i386/i686/multiarch/memcpy_chk.S: New file. + * sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S: New file. + * sysdeps/i386/i686/multiarch/memmove-ssse3.S: New file. + * sysdeps/i386/i686/multiarch/memmove.S: New file. + * sysdeps/i386/i686/multiarch/memmove_chk.S: New file. + * sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S: New file. + * sysdeps/i386/i686/multiarch/mempcpy-ssse3.S: New file. + * sysdeps/i386/i686/multiarch/mempcpy.S: New file. + * sysdeps/i386/i686/multiarch/mempcpy_chk.S: New file. + * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: New file. + * sysdeps/i386/i686/multiarch/memset-sse2.S: New file. + * sysdeps/i386/i686/multiarch/memset.S: New file. + * sysdeps/i386/i686/multiarch/memset_chk.S: New file. + + * sysdeps/i386/sysdep.h (ENTRY_CHK): New. + (END_CHK): Likewise. + + * sysdeps/i386/i686/multiarch/ifunc-defines.sym: Add + FEATURE_OFFSET, FEATURE_SIZE and FEATURE_INDEX_1. + * sysdeps/x86_64/multiarch/ifunc-defines.sym: Likewise. + + * sysdeps/x86_64/cacheinfo.c (intel_02_cache_info): Add entries + for 0x0e and 0x80. + (__x86_64_data_cache_size): New. + (init_cacheinfo): Set __x86_64_data_cache_size. + + * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Turn + on bit_Fast_Rep_String for Intel Core i7. + + * sysdeps/x86_64/multiarch/init-arch.h (bit_Fast_Rep_String): New. + (index_Fast_Rep_String): Likewise. + (FEATURE_INDEX_1): Likewise. + (FEATURE_INDEX_MAX): Likewise. + (cpu_features): Add feature. + 2010-01-12 Ulrich Drepper <drepper@redhat.com> * conform/data/sys/select.h-data: Fix up for XPG7. diff --git a/sysdeps/i386/i686/bcopy.S b/sysdeps/i386/i686/bcopy.S new file mode 100644 index 0000000000..15ef9419a4 --- /dev/null +++ b/sysdeps/i386/i686/bcopy.S @@ -0,0 +1,3 @@ +#define USE_AS_BCOPY +#define memmove bcopy +#include <sysdeps/i386/i686/memmove.S> diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c index 82e4cd223e..f8b7f521ca 100644 --- a/sysdeps/i386/i686/cacheinfo.c +++ b/sysdeps/i386/i686/cacheinfo.c @@ -1,3 +1,4 @@ +#define __x86_64_data_cache_size __x86_data_cache_size #define __x86_64_data_cache_size_half __x86_data_cache_size_half #define __x86_64_shared_cache_size __x86_shared_cache_size #define __x86_64_shared_cache_size_half __x86_shared_cache_size_half diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S index 0b2da1ea27..86ee082beb 100644 --- a/sysdeps/i386/i686/memcpy.S +++ b/sysdeps/i386/i686/memcpy.S @@ -32,11 +32,11 @@ .text #if defined PIC && !defined NOT_IN_libc -ENTRY (__memcpy_chk) +ENTRY_CHK (__memcpy_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memcpy_chk) +END_CHK (__memcpy_chk) #endif ENTRY (BP_SYM (memcpy)) ENTER diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S index b93b5c729f..981f14f4e0 100644 --- a/sysdeps/i386/i686/memmove.S +++ b/sysdeps/i386/i686/memmove.S @@ -26,18 +26,27 @@ #define PARMS LINKAGE+4 /* one spilled register */ #define RTN PARMS -#define DEST RTN+RTN_SIZE -#define SRC DEST+PTR_SIZE -#define LEN SRC+PTR_SIZE .text -#if defined PIC && !defined NOT_IN_libc -ENTRY (__memmove_chk) + +#ifdef USE_AS_BCOPY +# define SRC RTN+RTN_SIZE +# define DEST SRC+PTR_SIZE +# define LEN DEST+PTR_SIZE +#else +# define DEST RTN+RTN_SIZE +# define SRC DEST+PTR_SIZE +# define LEN SRC+PTR_SIZE + +# if defined PIC && !defined NOT_IN_libc +ENTRY_CHK (__memmove_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memmove_chk) +END_CHK (__memmove_chk) +# endif #endif + ENTRY (BP_SYM (memmove)) ENTER @@ -69,8 +78,10 @@ ENTRY (BP_SYM (memmove)) movsl movl %edx, %esi cfi_restore (esi) +#ifndef USE_AS_BCOPY movl DEST(%esp), %eax RETURN_BOUNDED_POINTER (DEST(%esp)) +#endif popl %edi cfi_adjust_cfa_offset (-4) @@ -101,8 +112,10 @@ ENTRY (BP_SYM (memmove)) movsl movl %edx, %esi cfi_restore (esi) +#ifndef USE_AS_BCOPY movl DEST(%esp), %eax RETURN_BOUNDED_POINTER (DEST(%esp)) +#endif cld popl %edi @@ -112,4 +125,6 @@ ENTRY (BP_SYM (memmove)) LEAVE RET_PTR END (BP_SYM (memmove)) +#ifndef USE_AS_BCOPY libc_hidden_builtin_def (memmove) +#endif diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S index 6437e4a5d4..c10686fb3d 100644 --- a/sysdeps/i386/i686/mempcpy.S +++ b/sysdeps/i386/i686/mempcpy.S @@ -32,11 +32,11 @@ .text #if defined PIC && !defined NOT_IN_libc -ENTRY (__mempcpy_chk) +ENTRY_CHK (__mempcpy_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) jb HIDDEN_JUMPTARGET (__chk_fail) -END (__mempcpy_chk) +END_CHK (__mempcpy_chk) #endif ENTRY (BP_SYM (__mempcpy)) ENTER diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index dfa1aa7019..b343af7b64 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -40,11 +40,11 @@ .text #if defined PIC && !defined NOT_IN_libc && !BZERO_P -ENTRY (__memset_chk) +ENTRY_CHK (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk) +END_CHK (__memset_chk) #endif ENTRY (BP_SYM (memset)) ENTER diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index e1553b284e..fbad9ae734 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -4,6 +4,10 @@ gen-as-const-headers += ifunc-defines.sym endif ifeq ($(subdir),string) +sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ + memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ + memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ + memset-sse2-rep bzero-sse2-rep ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-strcspn-c.c += -msse4 diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S new file mode 100644 index 0000000000..cbc8b420e8 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMMOVE +#define USE_AS_BCOPY +#define MEMCPY __bcopy_ssse3_rep +#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S new file mode 100644 index 0000000000..36aac44b9c --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMMOVE +#define USE_AS_BCOPY +#define MEMCPY __bcopy_ssse3 +#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S new file mode 100644 index 0000000000..8671bf684e --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bcopy.S @@ -0,0 +1,89 @@ +/* Multiple versions of bcopy + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(bcopy) + .type bcopy, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __bcopy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bcopy_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bcopy_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(bcopy) +# else + .text +ENTRY(bcopy) + .type bcopy, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __bcopy_ia32, %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal __bcopy_ssse3, %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features + jz 2f + leal __bcopy_ssse3_rep, %eax +2: ret +END(bcopy) +# endif + +# undef ENTRY +# define ENTRY(name) \ + .type __bcopy_ia32, @function; \ + .p2align 4; \ + __bcopy_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32 + +#endif + +#include "../bcopy.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S new file mode 100644 index 0000000000..507b288bb3 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S @@ -0,0 +1,3 @@ +#define USE_AS_BZERO +#define __memset_sse2_rep __bzero_sse2_rep +#include "memset-sse2-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S new file mode 100644 index 0000000000..8d04512e4e --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bzero-sse2.S @@ -0,0 +1,3 @@ +#define USE_AS_BZERO +#define __memset_sse2 __bzero_sse2 +#include "memset-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S new file mode 100644 index 0000000000..8c740a42dc --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bzero.S @@ -0,0 +1,97 @@ +/* Multiple versions of bzero + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(__bzero) + .type __bzero, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __bzero_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bzero_sse2@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bzero_sse2_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__bzero) +# else + .text |
