diff options
| author | Ondřej Bílka <neleai@seznam.cz> | 2013-10-08 15:46:48 +0200 |
|---|---|---|
| committer | Ondřej Bílka <neleai@seznam.cz> | 2013-10-08 15:46:48 +0200 |
| commit | e7044ea76bd95f8adc0eab0b2bdcab7f51055b48 (patch) | |
| tree | 262f397226e64df368b266a681622e7e25c30e5a | |
| parent | 41500766f71fd072b6b6a9e4603fb7f85bddcfe2 (diff) | |
| download | glibc-e7044ea76bd95f8adc0eab0b2bdcab7f51055b48.tar.xz glibc-e7044ea76bd95f8adc0eab0b2bdcab7f51055b48.zip | |
Use p2align instead ALIGN
| -rw-r--r-- | ChangeLog | 13 | ||||
| -rw-r--r-- | sysdeps/x86_64/memset.S | 10 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memcmp-sse4.S | 84 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memcmp-ssse3.S | 126 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S | 10 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-ssse3-back.S | 86 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-ssse3.S | 254 | ||||
| -rw-r--r-- | sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S | 9 | ||||
| -rw-r--r-- | sysdeps/x86_64/strchr.S | 15 | ||||
| -rw-r--r-- | sysdeps/x86_64/strrchr.S | 17 |
10 files changed, 301 insertions, 323 deletions
@@ -1,3 +1,16 @@ +2013-10-08 Ondřej Bílka <neleai@seznam.cz> + + * sysdeps/x86_64/memset.S (ALIGN): Macro removed. + Use .p2align directive instead, throughout. + * sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise. + * sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise. + * sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: Likewise. + * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise. + * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise. + * sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: Likewise. + * sysdeps/x86_64/strchr.S: Likewise. + * sysdeps/x86_64/strrchr.S: Likewise. + 2013-10-08 Siddhesh Poyarekar <siddhesh@redhat.com> * sysdeps/ieee754/dbl-64/e_pow.c: Fix code formatting. diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index 6c69f4b442..9b1de89d98 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -19,10 +19,6 @@ #include <sysdep.h> -#ifndef ALIGN -# define ALIGN(n) .p2align n -#endif - .text #if !defined NOT_IN_libc ENTRY(__bzero) @@ -71,12 +67,12 @@ L(entry_from_bzero): L(return): rep ret - ALIGN (4) + .p2align 4 L(between_32_64_bytes): movdqu %xmm8, 16(%rdi) movdqu %xmm8, -32(%rdi,%rdx) ret - ALIGN (4) + .p2align 4 L(loop_start): leaq 64(%rdi), %rcx movdqu %xmm8, (%rdi) @@ -92,7 +88,7 @@ L(loop_start): andq $-64, %rdx cmpq %rdx, %rcx je L(return) - ALIGN (4) + .p2align 4 L(loop): movdqa %xmm8, (%rcx) movdqa %xmm8, 16(%rcx) diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S index 1ed4200f4c..d7b147e5ce 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse4.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S @@ -25,10 +25,6 @@ # define MEMCMP __memcmp_sse4_1 # endif -# ifndef ALIGN -# define ALIGN(n) .p2align n -# endif - # define JMPTBL(I, B) (I - B) # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ @@ -60,7 +56,7 @@ ENTRY (MEMCMP) BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(firstbyte): movzbl (%rdi), %eax movzbl (%rsi), %ecx @@ -68,7 +64,7 @@ L(firstbyte): ret # endif - ALIGN (4) + .p2align 4 L(79bytesormore): movdqu (%rsi), %xmm1 movdqu (%rdi), %xmm2 @@ -316,7 +312,7 @@ L(less32bytesin256): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(512bytesormore): # ifdef DATA_CACHE_SIZE_HALF mov $DATA_CACHE_SIZE_HALF, %R8_LP @@ -329,7 +325,7 @@ L(512bytesormore): cmp %r8, %rdx ja L(L2_L3_cache_unaglined) sub $64, %rdx - ALIGN (4) + .p2align 4 L(64bytesormore_loop): movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 @@ -361,7 +357,7 @@ L(64bytesormore_loop): L(L2_L3_cache_unaglined): sub $64, %rdx - ALIGN (4) + .p2align 4 L(L2_L3_unaligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) @@ -396,7 +392,7 @@ L(L2_L3_unaligned_128bytes_loop): /* * This case is for machines which are sensitive for unaligned instructions. */ - ALIGN (4) + .p2align 4 L(2aligned): cmp $128, %rdx ja L(128bytesormorein2aligned) @@ -444,7 +440,7 @@ L(less32bytesin64in2alinged): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(128bytesormorein2aligned): cmp $512, %rdx ja L(512bytesormorein2aligned) @@ -519,7 +515,7 @@ L(less32bytesin128in2aligned): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(256bytesormorein2aligned): sub $256, %rdx @@ -632,7 +628,7 @@ L(less32bytesin256in2alinged): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(512bytesormorein2aligned): # ifdef DATA_CACHE_SIZE_HALF mov $DATA_CACHE_SIZE_HALF, %R8_LP @@ -646,7 +642,7 @@ L(512bytesormorein2aligned): ja L(L2_L3_cache_aglined) sub $64, %rdx - ALIGN (4) + .p2align 4 L(64bytesormore_loopin2aligned): movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 @@ -678,7 +674,7 @@ L(64bytesormore_loopin2aligned): L(L2_L3_cache_aglined): sub $64, %rdx - ALIGN (4) + .p2align 4 L(L2_L3_aligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) @@ -711,7 +707,7 @@ L(L2_L3_aligned_128bytes_loop): BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(64bytesormore_loop_end): add $16, %rdi add $16, %rsi @@ -806,7 +802,7 @@ L(8bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(12bytes): mov -12(%rdi), %rax mov -12(%rsi), %rcx @@ -827,7 +823,7 @@ L(0bytes): # ifndef USE_AS_WMEMCMP /* unreal case for wmemcmp */ - ALIGN (4) + .p2align 4 L(65bytes): movdqu -65(%rdi), %xmm1 movdqu -65(%rsi), %xmm2 @@ -864,7 +860,7 @@ L(9bytes): sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(13bytes): mov -13(%rdi), %rax mov -13(%rsi), %rcx @@ -877,7 +873,7 @@ L(13bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(5bytes): mov -5(%rdi), %eax mov -5(%rsi), %ecx @@ -888,7 +884,7 @@ L(5bytes): sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(66bytes): movdqu -66(%rdi), %xmm1 movdqu -66(%rsi), %xmm2 @@ -929,7 +925,7 @@ L(10bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(14bytes): mov -14(%rdi), %rax mov -14(%rsi), %rcx @@ -942,7 +938,7 @@ L(14bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(6bytes): mov -6(%rdi), %eax mov -6(%rsi), %ecx @@ -958,7 +954,7 @@ L(2bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(67bytes): movdqu -67(%rdi), %xmm2 movdqu -67(%rsi), %xmm1 @@ -997,7 +993,7 @@ L(11bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(15bytes): mov -15(%rdi), %rax mov -15(%rsi), %rcx @@ -1010,7 +1006,7 @@ L(15bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(7bytes): mov -7(%rdi), %eax mov -7(%rsi), %ecx @@ -1023,7 +1019,7 @@ L(7bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(3bytes): movzwl -3(%rdi), %eax movzwl -3(%rsi), %ecx @@ -1036,7 +1032,7 @@ L(1bytes): ret # endif - ALIGN (4) + .p2align 4 L(68bytes): movdqu -68(%rdi), %xmm2 movdqu -68(%rsi), %xmm1 @@ -1079,7 +1075,7 @@ L(20bytes): # ifndef USE_AS_WMEMCMP /* unreal cases for wmemcmp */ - ALIGN (4) + .p2align 4 L(69bytes): movdqu -69(%rsi), %xmm1 movdqu -69(%rdi), %xmm2 @@ -1115,7 +1111,7 @@ L(21bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(70bytes): movdqu -70(%rsi), %xmm1 movdqu -70(%rdi), %xmm2 @@ -1151,7 +1147,7 @@ L(22bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(71bytes): movdqu -71(%rsi), %xmm1 movdqu -71(%rdi), %xmm2 @@ -1188,7 +1184,7 @@ L(23bytes): ret # endif - ALIGN (4) + .p2align 4 L(72bytes): movdqu -72(%rsi), %xmm1 movdqu -72(%rdi), %xmm2 @@ -1227,7 +1223,7 @@ L(24bytes): # ifndef USE_AS_WMEMCMP /* unreal cases for wmemcmp */ - ALIGN (4) + .p2align 4 L(73bytes): movdqu -73(%rsi), %xmm1 movdqu -73(%rdi), %xmm2 @@ -1265,7 +1261,7 @@ L(25bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(74bytes): movdqu -74(%rsi), %xmm1 movdqu -74(%rdi), %xmm2 @@ -1302,7 +1298,7 @@ L(26bytes): movzwl -2(%rsi), %ecx jmp L(diffin2bytes) - ALIGN (4) + .p2align 4 L(75bytes): movdqu -75(%rsi), %xmm1 movdqu -75(%rdi), %xmm2 @@ -1342,7 +1338,7 @@ L(27bytes): xor %eax, %eax ret # endif - ALIGN (4) + .p2align 4 L(76bytes): movdqu -76(%rsi), %xmm1 movdqu -76(%rdi), %xmm2 @@ -1388,7 +1384,7 @@ L(28bytes): # ifndef USE_AS_WMEMCMP /* unreal cases for wmemcmp */ - ALIGN (4) + .p2align 4 L(77bytes): movdqu -77(%rsi), %xmm1 movdqu -77(%rdi), %xmm2 @@ -1430,7 +1426,7 @@ L(29bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(78bytes): movdqu -78(%rsi), %xmm1 movdqu -78(%rdi), %xmm2 @@ -1470,7 +1466,7 @@ L(30bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(79bytes): movdqu -79(%rsi), %xmm1 movdqu -79(%rdi), %xmm2 @@ -1510,7 +1506,7 @@ L(31bytes): xor %eax, %eax ret # endif - ALIGN (4) + .p2align 4 L(64bytes): movdqu -64(%rdi), %xmm2 movdqu -64(%rsi), %xmm1 @@ -1548,7 +1544,7 @@ L(32bytes): /* * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. */ - ALIGN (3) + .p2align 3 L(less16bytes): movsbq %dl, %rdx mov (%rsi, %rdx), %rcx @@ -1585,7 +1581,7 @@ L(diffin2bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(end): and $0xff, %eax and $0xff, %ecx @@ -1599,7 +1595,7 @@ L(end): neg %eax ret - ALIGN (4) + .p2align 4 L(nequal_bigger): ret @@ -1611,7 +1607,7 @@ L(unreal_case): END (MEMCMP) .section .rodata.sse4.1,"a",@progbits - ALIGN (3) + .p2align 3 # ifndef USE_AS_WMEMCMP L(table_64bytes): .int JMPTBL (L(0bytes), L(table_64bytes)) diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S index e319df926e..e04f918dff 100644 --- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S @@ -25,10 +25,6 @@ # define MEMCMP __memcmp_ssse3 # endif -# ifndef ALIGN -# define ALIGN(n) .p2align n -# endif - /* Warning! wmemcmp has to use SIGNED comparison for elements. memcmp has to use UNSIGNED comparison for elemnts. @@ -50,7 +46,7 @@ ENTRY (MEMCMP) add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 /* ECX >= 32. */ L(48bytesormore): movdqu (%rdi), %xmm3 @@ -90,7 +86,7 @@ L(48bytesormore): je L(shr_6) jmp L(shr_7) - ALIGN (2) + .p2align 2 L(next_unaligned_table): cmp $8, %edx je L(shr_8) @@ -117,7 +113,7 @@ L(next_unaligned_table): jmp L(shr_12) # endif - ALIGN (4) + .p2align 4 L(shr_0): cmp $80, %rcx lea -48(%rcx), %rcx @@ -137,7 +133,7 @@ L(shr_0): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_0_gobble): movdqa (%rsi), %xmm0 xor %eax, %eax @@ -180,7 +176,7 @@ L(next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_1): cmp $80, %rcx lea -48(%rcx), %rcx @@ -207,7 +203,7 @@ L(shr_1): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_1_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -258,7 +254,7 @@ L(shr_1_gobble_next): jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_2): cmp $80, %rcx lea -48(%rcx), %rcx @@ -285,7 +281,7 @@ L(shr_2): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_2_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -335,7 +331,7 @@ L(shr_2_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_3): cmp $80, %rcx lea -48(%rcx), %rcx @@ -362,7 +358,7 @@ L(shr_3): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_3_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -414,7 +410,7 @@ L(shr_3_gobble_next): # endif - ALIGN (4) + .p2align 4 L(shr_4): cmp $80, %rcx lea -48(%rcx), %rcx @@ -441,7 +437,7 @@ L(shr_4): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_4_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -493,7 +489,7 @@ L(shr_4_gobble_next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_5): cmp $80, %rcx lea -48(%rcx), %rcx @@ -520,7 +516,7 @@ L(shr_5): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_5_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -570,7 +566,7 @@ L(shr_5_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_6): cmp $80, %rcx lea -48(%rcx), %rcx @@ -597,7 +593,7 @@ L(shr_6): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_6_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -647,7 +643,7 @@ L(shr_6_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_7): cmp $80, %rcx lea -48(%rcx), %rcx @@ -674,7 +670,7 @@ L(shr_7): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_7_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -726,7 +722,7 @@ L(shr_7_gobble_next): # endif - ALIGN (4) + .p2align 4 L(shr_8): cmp $80, %rcx lea -48(%rcx), %rcx @@ -753,7 +749,7 @@ L(shr_8): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_8_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -805,7 +801,7 @@ L(shr_8_gobble_next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_9): cmp $80, %rcx lea -48(%rcx), %rcx @@ -832,7 +828,7 @@ L(shr_9): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_9_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -882,7 +878,7 @@ L(shr_9_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_10): cmp $80, %rcx lea -48(%rcx), %rcx @@ -909,7 +905,7 @@ L(shr_10): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_10_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -959,7 +955,7 @@ L(shr_10_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_11): cmp $80, %rcx lea -48(%rcx), %rcx @@ -986,7 +982,7 @@ L(shr_11): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_11_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1038,7 +1034,7 @@ L(shr_11_gobble_next): # endif - ALIGN (4) + .p2align 4 L(shr_12): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1065,7 +1061,7 @@ L(shr_12): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_12_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1117,7 +1113,7 @@ L(shr_12_gobble_next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_13): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1144,7 +1140,7 @@ L(shr_13): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_13_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1194,7 +1190,7 @@ L(shr_13_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_14): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1221,7 +1217,7 @@ L(shr_14): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_14_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1271,7 +1267,7 @@ L(shr_14_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_15): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1298,7 +1294,7 @@ L(shr_15): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_15_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1348,7 +1344,7 @@ L(shr_15_gobble_next): add %rcx, %rdi jmp L(less48bytes) # endif - ALIGN (4) + .p2align 4 L(exit): pmovmskb %xmm1, %r8d sub $0xffff, %r8d @@ -1389,56 +1385,56 @@ L(less16bytes): sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte16): movzbl -16(%rdi), %eax movzbl -16(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte17): movzbl -15(%rdi), %eax movzbl -15(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte18): movzbl -14(%rdi), %eax movzbl -14(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte19): movzbl -13(%rdi), %eax movzbl -13(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte20): movzbl -12(%rdi), %eax movzbl -12(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte21): movzbl -11(%rdi), %eax movzbl -11(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte22): movzbl -10(%rdi), %eax movzbl -10(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(next_24_bytes): lea 8(%rdi), %rdi lea 8(%rsi), %rsi @@ -1479,14 +1475,14 @@ L(next_24_bytes): jne L(find_diff) ret - ALIGN (4) + .p2align 4 L(second_double_word): mov -12(%rdi), %eax cmp -12(%rsi), %eax jne L(find_diff) ret - ALIGN (4) + .p2align 4 L(next_two_double_words): and $15, %dh jz L(fourth_double_word) @@ -1495,7 +1491,7 @@ L(next_two_double_words): jne L(find_diff) ret - ALIGN (4) + .p2align 4 L(fourth_double_word): mov -4(%rdi), %eax cmp -4(%rsi), %eax @@ -1503,7 +1499,7 @@ L(fourth_double_word): ret # endif - ALIGN (4) + .p2align 4 L(less48bytes): cmp $8, %ecx |
