/* memcmp with SSE2.
Copyright (C) 2017-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <isa-level.h>
/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
so we need this to build for ISA V2 builds. */
#if ISA_SHOULD_BUILD (2)
#include <sysdep.h>
# ifndef MEMCMP
# define MEMCMP __memcmp_sse2
# endif
# ifdef USE_AS_WMEMCMP
# define PCMPEQ pcmpeqd
# define CHAR_SIZE 4
# define SIZE_OFFSET (0)
# else
# define PCMPEQ pcmpeqb
# define CHAR_SIZE 1
# endif
# ifdef USE_AS_MEMCMPEQ
# define SIZE_OFFSET (0)
# define CHECK_CMP(x, y) subl x, y
# else
# ifndef SIZE_OFFSET
# define SIZE_OFFSET (CHAR_PER_VEC * 2)
# endif
# define CHECK_CMP(x, y) cmpl x, y
# endif
# define VEC_SIZE 16
# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
# ifndef MEMCMP
# define MEMCMP memcmp
# endif
.text
ENTRY(MEMCMP)
# ifdef __ILP32__
/* Clear the upper 32 bits. */
movl %edx, %edx
# endif
# ifdef USE_AS_WMEMCMP
/* Use 0xffff to test for mismatches on pmovmskb bitmask. Store
in ecx for code size. This is preferable to using `incw` as
it avoids partial register stalls on older hardware (pre
SnB). */
movl $0xffff, %ecx
# endif
cmpq $CHAR_PER_VEC, %rdx
ja L(more_1x_vec)
# ifdef USE_AS_WMEMCMP
/* saves a byte of code keeping the fall through path n = [2, 4]
in the initial cache line. */
decl %edx
jle L(cmp_0_1)
movq (%rsi), %xmm0
movq (%rdi), %xmm1
PCMPEQ %xmm0, %xmm1
pmovmskb %xmm1, %eax
subl %ecx, %eax
jnz L(ret_nonzero_vec_start_0)
movq -4(%rsi, %rdx, CHAR_SIZE), %xmm0
movq -4(%rdi, %rdx, CHAR_SIZE), %xmm1
PCMPEQ %xmm0, %xmm1
pmovmskb %xmm1, %eax
subl %ecx, %eax
jnz L(ret_nonzero_vec_end_0_adj)
# else
cmpl $8, %edx
ja L(cmp_9_16)
cmpl $4, %edx
jb L(cmp_0_3)
# ifdef USE_AS_MEMCMPEQ
movl (%rsi), %eax
subl (%rdi), %eax
movl -4(%rsi, %rdx), %esi
subl -4(%rdi, %rdx), %esi
orl %esi, %eax
ret
# else
/* Combine comparisons for lo and hi 4-byte comparisons. */
movl -4(%rsi, %rdx), %ecx
movl -4(%rdi, %rdx), <