diff options
| author | Liubov Dmitrieva <liubov.dmitrieva@intel.com> | 2011-08-04 15:33:38 -0400 |
|---|---|---|
| committer | Ulrich Drepper <drepper@gmail.com> | 2011-08-04 15:33:38 -0400 |
| commit | 5fa16e9b016b34788b9a48b5ab9752a583bb987c (patch) | |
| tree | e62092078eefe8f18b9491d98cff56f244332669 | |
| parent | 8c1a459f9a64abee69c154c8a0e5ab9be86256e4 (diff) | |
| download | glibc-5fa16e9b016b34788b9a48b5ab9752a583bb987c.tar.xz glibc-5fa16e9b016b34788b9a48b5ab9752a583bb987c.zip | |
Improve x86-32 strcat functions with SSE2/SSSE3
| -rw-r--r-- | ChangeLog | 26 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/Makefile | 3 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strcat-sse2.S | 1244 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strcat-ssse3.S | 573 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strcat.S | 131 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strcpy-ssse3.S | 547 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strlen-sse2.S | 114 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strncat-c.c | 8 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strncat-sse2.S | 4 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strncat-ssse3.S | 4 | ||||
| -rw-r--r-- | sysdeps/i386/i686/multiarch/strncat.S | 3 |
11 files changed, 2340 insertions, 317 deletions
@@ -1,3 +1,29 @@ +2011-07-22 Liubov Dmitrieva <liubov.dmitrieva@intel.com> + + * sysdeps/i386/i686/multiarch/strcat-sse2.S: Update. + Fix overflow bug in strncat. + * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Likewise. + + * string/test-strncat.c: Update. + Add new tests for checking overflow bugs. + +2011-07-15 Liubov Dmitrieva <liubov.dmitrieva@intel.com> + + * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add + strcat-ssse3 strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c. + * sysdeps/i386/i686/multiarch/strcat.S: New file. + * sysdeps/i386/i686/multiarch/strcat-c.c: New file. + * sysdeps/i386/i686/multiarch/strcat-sse2.S: New file. + * sysdeps/i386/i686/multiarch/strcat-ssse3.S: New file. + * sysdeps/i386/i686/multiarch/strncat.S: New file. + * sysdeps/i386/i686/multiarch/strncat-sse2.S: New file. + * sysdeps/i386/i686/multiarch/strncat-ssse3.S: New file. + + * sysdeps/i386/i686/multiarch/strcpy-ssse3.S + (USE_AS_STRCAT): Define. + Add strcat and strncat support. + * sysdeps/i386/i686/multiarch/strlen-sse2.S: Likewise. + 2011-07-25 Andreas Schwab <schwab@redhat.com> * sysdeps/i386/i486/bits/string.h (__strncat_g): Correctly handle diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 4bae699caf..83b2818a9d 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -12,7 +12,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \ - strncpy-sse2 stpcpy-sse2 stpncpy-sse2 + strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \ + strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-varshift.c += -msse4 diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S new file mode 100644 index 0000000000..b692036cec --- /dev/null +++ b/sysdeps/i386/i686/multiarch/strcat-sse2.S @@ -0,0 +1,1244 @@ +/* strcat with SSE2 + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + + +#ifndef NOT_IN_libc + +# include <sysdep.h> + + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# ifdef SHARED +# define JMPTBL(I, B) I - B + +/* Load an entry in a jump table into ECX and branch to it. TABLE is a + jump table with relative offsets. INDEX is a register contains the + index into the jump table. SCALE is the scale of INDEX. */ + +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + /* We first load PC into ECX. */ \ + call __i686.get_pc_thunk.cx; \ + /* Get the address of the jump table. */ \ + addl $(TABLE - .), %ecx; \ + /* Get the entry and convert the relative offset to the \ + absolute address. */ \ + addl (%ecx,INDEX,SCALE), %ecx; \ + /* We loaded the jump table and adjuested ECX. Go. */ \ + jmp *%ecx +# else +# define JMPTBL(I, B) I + +/* Branch to an entry in a jump table. TABLE is a jump table with + absolute offsets. INDEX is a register contains the index into the + jump table. SCALE is the scale of INDEX. */ + +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + jmp *TABLE(,INDEX,SCALE) +# endif + +# ifndef STRCAT +# define STRCAT __strcat_sse2 +# endif + +# define PARMS 4 +# define STR1 PARMS+4 +# define STR2 STR1+4 + +# ifdef USE_AS_STRNCAT +# define LEN STR2+8 +# define STR3 STR1+4 +# else +# define STR3 STR1 +# endif + +# define USE_AS_STRCAT +# ifdef USE_AS_STRNCAT +# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); +# else +# define RETURN POP(%esi); ret; CFI_PUSH(%esi); +# endif + +.text +ENTRY (STRCAT) + PUSH (%esi) + mov STR1(%esp), %eax + mov STR2(%esp), %esi +# ifdef USE_AS_STRNCAT + PUSH (%ebx) + movl LEN(%esp), %ebx + test %ebx, %ebx + jz L(ExitZero) +# endif + cmpb $0, (%esi) + mov %esi, %ecx + mov %eax, %edx + jz L(ExitZero) + + and $63, %ecx + and $63, %edx + cmp $32, %ecx + ja L(StrlenCore7_1) + cmp $48, %edx + ja L(alignment_prolog) + + pxor %xmm0, %xmm0 + pxor %xmm4, %xmm4 + pxor %xmm7, %xmm7 + movdqu (%eax), %xmm1 + movdqu (%esi), %xmm5 + pcmpeqb %xmm1, %xmm0 + movdqu 16(%esi), %xmm6 + pmovmskb %xmm0, %ecx + pcmpeqb %xmm5, %xmm4 + pcmpeqb %xmm6, %xmm7 + test %ecx, %ecx + jnz L(exit_less16_) + mov %eax, %ecx + and $-16, %eax + jmp L(loop_prolog) + +L(alignment_prolog): + pxor %xmm0, %xmm0 + pxor %xmm4, %xmm4 + mov %edx, %ecx + pxor %xmm7, %xmm7 + and $15, %ecx + and $-16, %eax + pcmpeqb (%eax), %xmm0 + movdqu (%esi), %xmm5 + movdqu 16(%esi), %xmm6 + pmovmskb %xmm0, %edx + pcmpeqb %xmm5, %xmm4 + shr %cl, %edx + pcmpeqb %xmm6, %xmm7 + test %edx, %edx + jnz L(exit_less16) + add %eax, %ecx + + pxor %xmm0, %xmm0 +L(loop_prolog): + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + .p2align 4 +L(align16_loop): + pcmpeqb 16(%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%eax), %xmm3 + pmovmskb %xmm3, %edx + lea 64(%eax), %eax + test %edx, %edx + jz L(align16_loop) + bsf %edx, %edx + add %edx, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit16): + bsf %edx, %edx + lea 16(%eax, %edx), %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit32): + bsf %edx, %edx + lea 32(%eax, %edx), %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit48): + bsf %edx, %edx + lea 48(%eax, %edx), %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_less16): + bsf %edx, %edx + add %ecx, %eax + add %edx, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_less16_): + bsf %ecx, %ecx + add %ecx, %eax + + .p2align 4 +L(StartStrcpyPart): + pmovmskb %xmm4, %edx +# ifdef USE_AS_STRNCAT + cmp $16, %ebx + jbe L(CopyFrom1To16BytesTail1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16BytesTail1) + + movdqu %xmm5, (%eax) + pmovmskb %xmm7, %edx +# ifdef USE_AS_STRNCAT + cmp $32, %ebx + jbe L(CopyFrom1To32Bytes1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To32Bytes1) + + mov %esi, %ecx + and $-16, %esi + and $15, %ecx + pxor %xmm0, %xmm0 +# ifdef USE_AS_STRNCAT + add %ecx, %ebx +# endif + sub %ecx, %eax + jmp L(Unalign16Both) + +L(StrlenCore7_1): + mov %eax, %ecx + pxor %xmm0, %xmm0 + and $15, %ecx + and $-16, %eax + pcmpeqb (%eax), %xmm0 + pmovmskb %xmm0, %edx + shr %cl, %edx + test %edx, %edx + jnz L(exit_less16_1) + add %eax, %ecx + + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + + .p2align 4 +L(align16_loop_1): + pcmpeqb 16(%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16_1) + + pcmpeqb 32(%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32_1) + + pcmpeqb 48(%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48_1) + + pcmpeqb 64(%eax), %xmm3 + pmovmskb %xmm3, %edx + lea 64(%eax), %eax + test %edx, %edx + jz L(align16_loop_1) + bsf %edx, %edx + add %edx, %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit16_1): + bsf %edx, %edx + lea 16(%eax, %edx), %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit32_1): + bsf %edx, %edx + lea 32(%eax, %edx), %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit48_1): + bsf %edx, %edx + lea 48(%eax, %edx), %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit_less16_1): + bsf %edx, %edx + add %ecx, %eax + add %edx, %eax + + .p2align 4 +L(StartStrcpyPart_1): + mov %esi, %ecx + and $15, %ecx + and $-16, %esi + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + +# ifdef USE_AS_STRNCAT + cmp $48, %ebx + ja L(BigN) +# endif + pcmpeqb (%esi), %xmm1 +# ifdef USE_AS_STRNCAT + add %ecx, %ebx +# endif + pmovmskb %xmm1, %edx + shr %cl, %edx +# ifdef USE_AS_STRNCAT + cmp $16, %ebx + jbe L(CopyFrom1To16BytesTailCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16BytesTail) + + pcmpeqb 16(%esi), %xmm0 + pmovmskb %xmm0, %edx +# ifdef USE_AS_STRNCAT + cmp $32, %ebx + jbe L(CopyFrom1To32BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To32Bytes) + + movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ + movdqu %xmm1, (%eax) + sub %ecx, %eax + + .p2align 4 +L(Unalign16Both): + mov $16, %ecx + movdqa (%esi, %ecx), %xmm1 + movaps 16(%esi, %ecx), %xmm2 + movdqu %xmm1, (%eax, %ecx) + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $48, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) +L(Unalign16BothBigN): + movaps 16(%esi, %ecx), %xmm3 + movdqu %xmm2, (%eax, %ecx) + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm4 + movdqu %xmm3, (%eax, %ecx) + pcmpeqb %xmm4, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm1 + movdqu %xmm4, (%eax, %ecx) + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm2 + movdqu %xmm1, (%eax, %ecx) + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm3 + movdqu %xmm2, (%eax, %ecx) + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movdqu %xmm3, (%eax, %ecx) + mov %esi, %edx + lea 16(%esi, %ecx), %esi + and $-0x40, %esi + sub %esi, %edx + sub %edx, %eax +# ifdef USE_AS_STRNCAT + lea 128(%ebx, %edx), %ebx +# endif + movaps (%esi), %xmm2 + movaps %xmm2, %xmm4 + movaps 16(%esi), %xmm5 + movaps 32(%esi), %xmm3 + movaps %xmm3, %xmm6 + movaps 48(%esi), %xmm7 + pminub %xmm5, %xmm2 + pminub %xmm7, %xmm3 + pminub %xmm2, %xmm3 + pcmpeqb %xmm0, %xmm3 + pmovmskb %xmm3, %edx +# ifdef USE_AS_STRNCAT + sub $64, %ebx + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jnz L(Unaligned64Leave) + + .p2align 4 +L(Unaligned64Loop_start): + add $64, %eax + add $64, %esi + movdqu %xmm4, -64(%eax) + movaps (%esi), %xmm2 + movdqa %xmm2, %xmm4 + movdqu %xmm5, -48(%eax) + movaps 16(%esi), %xmm5 + pminub %xmm5, %xmm2 + movaps 32(%esi), %xmm3 + movdqu %xmm6, -32(%eax) + movaps %xmm3, %xmm6 + movdqu %xmm7, -16(%eax) + movaps 48(%esi), %xmm7 + pminub %xmm7, %xmm3 + pminub %xmm2, %xmm3 + pcmpeqb %xmm0, %xmm3 + pmovmskb %xmm3, %edx +# ifdef USE_AS_STRNCAT + sub $64, %ebx + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jz L(Unaligned64Loop_start) + +L(Unaligned64Leave): + pxor %xmm1, %xmm1 + + pcmpeqb %xmm4, %xmm0 + pcmpeqb %xmm5, %xmm1 + pmovmskb %xmm0, %edx + pmovmskb %xmm1, %ecx + test %edx, %edx + jnz L(CopyFrom1To16BytesUnaligned_0) + test %ecx, %ecx + jnz L(CopyFrom1To16BytesUnaligned_16) + + pcmpeqb %xmm6, %xmm0 + pcmpeqb %xmm7, %xmm1 + pmovmskb %xmm0, %edx + pmovmskb %xmm1, %ecx + test %edx, %edx + jnz L(CopyFrom1To16BytesUnaligned_32) + + bsf %ecx, %edx + movdqu %xmm4, (%eax) + movdqu %xmm5, 16(%eax) + movdqu %xmm6, 32(%eax) + add $48, %esi + add $48, %eax + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + +# ifdef USE_AS_STRNCAT + .p2align 4 +L(BigN): + pcmpeqb (%esi), %xmm1 + pmovmskb %xmm1, %edx + shr %cl, %edx + test %edx, %edx + jnz L(CopyFrom1To16BytesTail) + + pcmpeqb 16(%esi), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(CopyFrom1To32Bytes) + + movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ + movdqu %xmm1, (%eax) + sub %ecx, %eax + sub $48, %ebx + add %ecx, %ebx + + mov $16, %ecx + movdqa (%esi, %ecx), %xmm1 + movaps 16(%esi, %ecx), %xmm2 + movdqu %xmm1, (%eax, %ecx) + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + jmp L(Unalign16BothBigN) +# endif + +/*------------end of main part-------------------------------*/ + +/* Case1 */ + .p2align 4 +L(CopyFrom1To16Bytes): + add %ecx, %eax + add %ecx, %esi + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesTail): + add %ecx, %esi + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To32Bytes1): + add $16, %esi + add $16, %eax +L(CopyFrom1To16BytesTail1): + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To32Bytes): + bsf %edx, %edx + add %ecx, %esi + add $16, %edx + sub %ecx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesUnaligned_0): + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesUnaligned_16): + bsf %ecx, %edx + movdqu %xmm4, (%eax) + add $16, %esi + add $16, %eax + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesUnaligned_32): + bsf %edx, %edx + movdqu %xmm4, (%eax) + movdqu %xmm5, 16(%eax) + add $32, %esi + add $32, %eax + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + +# ifdef USE_AS_STRNCAT + + .p2align 4 +L(CopyFrom1To16BytesExit): + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + +/* Case2 */ + + .p2align 4 +L(CopyFrom1To16BytesCase2): + add $16, %ebx + add %ecx, %eax + add %ecx, %esi + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To32BytesCase2): + sub %ecx, %ebx + add %ecx, %esi + bsf %edx, %edx + add $16, %edx + sub %ecx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +L(CopyFrom1To16BytesTailCase2): + sub %ecx, %ebx + add %ecx, %esi + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +L(CopyFrom1To16BytesTail1Case2): + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +/* Case2 or Case3, Case3 */ + + .p2align 4 +L(CopyFrom1To16BytesCase2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To16BytesCase2) +L(CopyFrom1To16BytesCase3): + add $16, %ebx + add %ecx, %eax + add %ecx, %esi + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To32BytesCase2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To32BytesCase2) + sub %ecx, %ebx + add %ecx, %esi + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To16BytesTailCase2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To16BytesTailCase2) + sub %ecx, %ebx + add %ecx, %esi + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To32Bytes1Case2OrCase3): + add $16, %eax + add $16, %esi + sub $16, %ebx +L(CopyFrom1To16BytesTail1Case2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To16BytesTail1Case2) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +# endif + +# ifdef USE_AS_STRNCAT + .p2align 4 +L(StrncatExit0): + movb %bh, (%eax) + mov STR3(%esp), %eax + RETURN +# endif + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit1): + movb %bh, 1(%eax) +# endif +L(Exit1): +# ifdef USE_AS_STRNCAT + movb (%esi), %dh +# endif + movb %dh, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit2): + movb %bh, 2(%eax) +# endif +L(Exit2): + movw (%esi), %dx + movw %dx, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit3): + movb %bh, 3(%eax) +# endif +L(Exit3): + movw (%esi), %cx + movw %cx, (%eax) +# ifdef USE_AS_STRNCAT + movb 2(%esi), %dh +# endif + movb %dh, 2(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit4): + movb %bh, 4(%eax) +# endif +L(Exit4): + movl (%esi), %edx + movl %edx, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit5): + movb %bh, 5(%eax) +# endif +L(Exit5): + movl (%esi), %ecx +# ifdef USE_AS_STRNCAT + movb 4(%esi), %dh +# endif + movb %dh, 4(%eax)< |
