aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2010-01-12 11:22:03 -0800
committerUlrich Drepper <drepper@redhat.com>2010-01-12 11:22:03 -0800
commit3af48cbdfaeb8bc389de1caeb33bc29811da80e8 (patch)
tree94a209777ab8c7e24cff9e50660a4075e6338594
parent4bfc6ab9ae3b259caa8b12229f0c67b4b514e9cd (diff)
downloadglibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.tar.xz
glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.zip
Optimize 32bit memset/memcpy with SSE2/SSSE3.
-rw-r--r--ChangeLog62
-rw-r--r--sysdeps/i386/i686/bcopy.S3
-rw-r--r--sysdeps/i386/i686/cacheinfo.c1
-rw-r--r--sysdeps/i386/i686/memcpy.S4
-rw-r--r--sysdeps/i386/i686/memmove.S27
-rw-r--r--sysdeps/i386/i686/mempcpy.S4
-rw-r--r--sysdeps/i386/i686/memset.S4
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy.S89
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-sse2-rep.S3
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/bzero.S97
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-defines.sym3
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S1785
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3.S1737
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy.S90
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy_chk.S64
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove.S114
-rw-r--r--sysdeps/i386/i686/multiarch/memmove_chk.S112
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy.S93
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy_chk.S64
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2-rep.S821
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2.S867
-rw-r--r--sysdeps/i386/i686/multiarch/memset.S112
-rw-r--r--sysdeps/i386/i686/multiarch/memset_chk.S116
-rw-r--r--sysdeps/i386/sysdep.h3
-rw-r--r--sysdeps/x86_64/cacheinfo.c10
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-defines.sym3
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c18
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h14
36 files changed, 6336 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index 5efd65fcc5..126f7589a3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,65 @@
+2010-01-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/bcopy.S: New file.
+
+ * sysdeps/i386/i686/cacheinfo.c (__x86_64_data_cache_size): Define.
+
+ * sysdeps/i386/i686/memcpy.S (__memcpy_chk): Use ENTRY_CHK
+ and END_CHK.
+ * sysdeps/i386/i686/memmove.S (__memmove_chk): Likewise.
+ * sysdeps/i386/i686/mempcpy.S (__mempcpy_chk): Likewise.
+ * sysdeps/i386/i686/memset.S (__memset_chk): Likewise.
+
+ * sysdeps/i386/i686/memmove.S: Support USE_AS_BCOPY.
+
+ * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+ bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 memmove-ssse3
+ memcpy-ssse3-rep mempcpy-ssse3-rep memmove-ssse3-rep
+ bcopy-ssse3 bcopy-ssse3-rep memset-sse2-rep bzero-sse2-rep
+ * sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S: New file.
+ * sysdeps/i386/i686/multiarch/bcopy-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/bcopy.S: New file.
+ * sysdeps/i386/i686/multiarch/bzero-sse2-rep.S: New file.
+ * sysdeps/i386/i686/multiarch/bzero-sse2.S: New file.
+ * sysdeps/i386/i686/multiarch/bzero.S: New file.
+ * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: New file.
+ * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/memcpy.S: New file.
+ * sysdeps/i386/i686/multiarch/memcpy_chk.S: New file.
+ * sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S: New file.
+ * sysdeps/i386/i686/multiarch/memmove-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/memmove.S: New file.
+ * sysdeps/i386/i686/multiarch/memmove_chk.S: New file.
+ * sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S: New file.
+ * sysdeps/i386/i686/multiarch/mempcpy-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/mempcpy.S: New file.
+ * sysdeps/i386/i686/multiarch/mempcpy_chk.S: New file.
+ * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: New file.
+ * sysdeps/i386/i686/multiarch/memset-sse2.S: New file.
+ * sysdeps/i386/i686/multiarch/memset.S: New file.
+ * sysdeps/i386/i686/multiarch/memset_chk.S: New file.
+
+ * sysdeps/i386/sysdep.h (ENTRY_CHK): New.
+ (END_CHK): Likewise.
+
+ * sysdeps/i386/i686/multiarch/ifunc-defines.sym: Add
+ FEATURE_OFFSET, FEATURE_SIZE and FEATURE_INDEX_1.
+ * sysdeps/x86_64/multiarch/ifunc-defines.sym: Likewise.
+
+ * sysdeps/x86_64/cacheinfo.c (intel_02_cache_info): Add entries
+ for 0x0e and 0x80.
+ (__x86_64_data_cache_size): New.
+ (init_cacheinfo): Set __x86_64_data_cache_size.
+
+ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Turn
+ on bit_Fast_Rep_String for Intel Core i7.
+
+ * sysdeps/x86_64/multiarch/init-arch.h (bit_Fast_Rep_String): New.
+ (index_Fast_Rep_String): Likewise.
+ (FEATURE_INDEX_1): Likewise.
+ (FEATURE_INDEX_MAX): Likewise.
+ (cpu_features): Add feature.
+
2010-01-12 Ulrich Drepper <drepper@redhat.com>
* conform/data/sys/select.h-data: Fix up for XPG7.
diff --git a/sysdeps/i386/i686/bcopy.S b/sysdeps/i386/i686/bcopy.S
new file mode 100644
index 0000000000..15ef9419a4
--- /dev/null
+++ b/sysdeps/i386/i686/bcopy.S
@@ -0,0 +1,3 @@
+#define USE_AS_BCOPY
+#define memmove bcopy
+#include <sysdeps/i386/i686/memmove.S>
diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c
index 82e4cd223e..f8b7f521ca 100644
--- a/sysdeps/i386/i686/cacheinfo.c
+++ b/sysdeps/i386/i686/cacheinfo.c
@@ -1,3 +1,4 @@
+#define __x86_64_data_cache_size __x86_data_cache_size
#define __x86_64_data_cache_size_half __x86_data_cache_size_half
#define __x86_64_shared_cache_size __x86_shared_cache_size
#define __x86_64_shared_cache_size_half __x86_shared_cache_size_half
diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S
index 0b2da1ea27..86ee082beb 100644
--- a/sysdeps/i386/i686/memcpy.S
+++ b/sysdeps/i386/i686/memcpy.S
@@ -32,11 +32,11 @@
.text
#if defined PIC && !defined NOT_IN_libc
-ENTRY (__memcpy_chk)
+ENTRY_CHK (__memcpy_chk)
movl 12(%esp), %eax
cmpl %eax, 16(%esp)
jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memcpy_chk)
+END_CHK (__memcpy_chk)
#endif
ENTRY (BP_SYM (memcpy))
ENTER
diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
index b93b5c729f..981f14f4e0 100644
--- a/sysdeps/i386/i686/memmove.S
+++ b/sysdeps/i386/i686/memmove.S
@@ -26,18 +26,27 @@
#define PARMS LINKAGE+4 /* one spilled register */
#define RTN PARMS
-#define DEST RTN+RTN_SIZE
-#define SRC DEST+PTR_SIZE
-#define LEN SRC+PTR_SIZE
.text
-#if defined PIC && !defined NOT_IN_libc
-ENTRY (__memmove_chk)
+
+#ifdef USE_AS_BCOPY
+# define SRC RTN+RTN_SIZE
+# define DEST SRC+PTR_SIZE
+# define LEN DEST+PTR_SIZE
+#else
+# define DEST RTN+RTN_SIZE
+# define SRC DEST+PTR_SIZE
+# define LEN SRC+PTR_SIZE
+
+# if defined PIC && !defined NOT_IN_libc
+ENTRY_CHK (__memmove_chk)
movl 12(%esp), %eax
cmpl %eax, 16(%esp)
jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memmove_chk)
+END_CHK (__memmove_chk)
+# endif
#endif
+
ENTRY (BP_SYM (memmove))
ENTER
@@ -69,8 +78,10 @@ ENTRY (BP_SYM (memmove))
movsl
movl %edx, %esi
cfi_restore (esi)
+#ifndef USE_AS_BCOPY
movl DEST(%esp), %eax
RETURN_BOUNDED_POINTER (DEST(%esp))
+#endif
popl %edi
cfi_adjust_cfa_offset (-4)
@@ -101,8 +112,10 @@ ENTRY (BP_SYM (memmove))
movsl
movl %edx, %esi
cfi_restore (esi)
+#ifndef USE_AS_BCOPY
movl DEST(%esp), %eax
RETURN_BOUNDED_POINTER (DEST(%esp))
+#endif
cld
popl %edi
@@ -112,4 +125,6 @@ ENTRY (BP_SYM (memmove))
LEAVE
RET_PTR
END (BP_SYM (memmove))
+#ifndef USE_AS_BCOPY
libc_hidden_builtin_def (memmove)
+#endif
diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S
index 6437e4a5d4..c10686fb3d 100644
--- a/sysdeps/i386/i686/mempcpy.S
+++ b/sysdeps/i386/i686/mempcpy.S
@@ -32,11 +32,11 @@
.text
#if defined PIC && !defined NOT_IN_libc
-ENTRY (__mempcpy_chk)
+ENTRY_CHK (__mempcpy_chk)
movl 12(%esp), %eax
cmpl %eax, 16(%esp)
jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__mempcpy_chk)
+END_CHK (__mempcpy_chk)
#endif
ENTRY (BP_SYM (__mempcpy))
ENTER
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index dfa1aa7019..b343af7b64 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -40,11 +40,11 @@
.text
#if defined PIC && !defined NOT_IN_libc && !BZERO_P
-ENTRY (__memset_chk)
+ENTRY_CHK (__memset_chk)
movl 12(%esp), %eax
cmpl %eax, 16(%esp)
jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk)
+END_CHK (__memset_chk)
#endif
ENTRY (BP_SYM (memset))
ENTER
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index e1553b284e..fbad9ae734 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -4,6 +4,10 @@ gen-as-const-headers += ifunc-defines.sym
endif
ifeq ($(subdir),string)
+sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
+ memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
+ memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
+ memset-sse2-rep bzero-sse2-rep
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
new file mode 100644
index 0000000000..cbc8b420e8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY __bcopy_ssse3_rep
+#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
new file mode 100644
index 0000000000..36aac44b9c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY __bcopy_ssse3
+#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
new file mode 100644
index 0000000000..8671bf684e
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy.S
@@ -0,0 +1,89 @@
+/* Multiple versions of bcopy
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(bcopy)
+ .type bcopy, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __bcopy_ia32@GOTOFF(%ebx), %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __bcopy_ssse3@GOTOFF(%ebx), %eax
+ testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __bcopy_ssse3_rep@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(bcopy)
+# else
+ .text
+ENTRY(bcopy)
+ .type bcopy, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __bcopy_ia32, %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ jz 2f
+ leal __bcopy_ssse3, %eax
+ testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
+ jz 2f
+ leal __bcopy_ssse3_rep, %eax
+2: ret
+END(bcopy)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __bcopy_ia32, @function; \
+ .p2align 4; \
+ __bcopy_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32
+
+#endif
+
+#include "../bcopy.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
new file mode 100644
index 0000000000..507b288bb3
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_sse2_rep __bzero_sse2_rep
+#include "memset-sse2-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S
new file mode 100644
index 0000000000..8d04512e4e
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bzero-sse2.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_sse2 __bzero_sse2
+#include "memset-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
new file mode 100644
index 0000000000..8c740a42dc
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bzero.S
@@ -0,0 +1,97 @@
+/* Multiple versions of bzero
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib. */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(__bzero)
+ .type __bzero, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __bzero_ia32@GOTOFF(%ebx), %eax
+ testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __bzero_sse2@GOTOFF(%ebx), %eax
+ testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __bzero_sse2_rep@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(__bzero)
+# else
+ .text