/* Optimized memcpy implementation for PowerPC A2.
Copyright (C) 2010-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <rtld-global-offsets.h>
#define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */
#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */
.machine a2
EALIGN (memcpy, 5, 0)
CALL_MCOUNT
dcbt 0,r4 /* Prefetch ONE SRC cacheline */
cmplwi cr1,r5,16 /* is size < 16 ? */
mr r6,r3 /* Copy dest reg to r6; */
blt+ cr1,L(shortcopy)
/* Big copy (16 bytes or more)
Figure out how far to the nearest quadword boundary, or if we are
on one already.
r3 - return value (always)
r4 - current source addr
r5 - copy length
r6 - current dest addr
*/
neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */
clrlwi r8,r8,32-4 /* align to 16byte boundary */
sub r7,r4,r3 /* compute offset to src from dest */
cmplwi cr0,r8,0 /* Were we aligned on a 16 byte bdy? */
beq+ L(dst_aligned)
/* Destination is not aligned on quadword boundary. Get us to one.
r3 - return value (always)
r4 - current source addr
r5 - copy length
r6 - current dest addr
r7 - offset to src from dest
r8 - number of bytes to quadword boundary
*/
mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */
subf r5,r8,r5 /* adjust remaining len */
bf cr7*4+3,1f
lbzx r0,r7,r6 /* copy 1 byte addr */
stb r0,0(r6)
addi r6,r6,1
1:
bf cr7*4+2,2f
lhzx r0,r7,r6 /*