aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/generic
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2024-11-26 16:15:25 +0800
committerH.J. Lu <hjl.tools@gmail.com>2024-12-04 04:28:15 +0800
commit1c4cebb84b9e33aea9a90adfadb031d1f1eba927 (patch)
treeb8c59eb6388c942da858d8a1881e739c17493d73 /sysdeps/generic
parentf43eb2cf30fdff39bda1c2018246d4badabbc576 (diff)
downloadglibc-1c4cebb84b9e33aea9a90adfadb031d1f1eba927.tar.xz
glibc-1c4cebb84b9e33aea9a90adfadb031d1f1eba927.zip
malloc: Optimize small memory clearing for calloc
Add calloc-clear-memory.h to clear memory size up to 36 bytes (72 bytes on 64-bit targets) for calloc. Use repeated stores with 1 branch, instead of up to 3 branches. On x86-64, it is faster than memset since calling memset needs 1 indirect branch, 1 broadcast, and up to 4 branches. Signed-off-by: H.J. Lu <hjl.tools@gmail.com> Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Diffstat (limited to 'sysdeps/generic')
-rw-r--r--sysdeps/generic/calloc-clear-memory.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/sysdeps/generic/calloc-clear-memory.h b/sysdeps/generic/calloc-clear-memory.h
new file mode 100644
index 0000000000..1f9d70d267
--- /dev/null
+++ b/sysdeps/generic/calloc-clear-memory.h
@@ -0,0 +1,49 @@
+/* Clear a block of memory for calloc. Generic version.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+static __always_inline void *
+clear_memory (INTERNAL_SIZE_T *d, unsigned long clearsize)
+{
+ /* Unroll clear memory size up to 9 * INTERNAL_SIZE_T bytes. We know
+ that contents have an odd number of INTERNAL_SIZE_T-sized words;
+ minimally 3 words. */
+ unsigned long nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+
+ if (nclears > 9)
+ return memset (d, 0, clearsize);
+
+ /* NB: The VRP pass in GCC 14.2 will optimize it out. */
+ if (nclears < 3)
+ __builtin_unreachable ();
+
+ /* Use repeated stores with 1 branch, instead of up to 3. */
+ *(d + 0) = 0;
+ *(d + 1) = 0;
+ *(d + 2) = 0;
+ *(d + nclears - 2) = 0;
+ *(d + nclears - 2 + 1) = 0;
+ if (nclears > 5)
+ {
+ *(d + 3) = 0;
+ *(d + 3 + 1) = 0;
+ *(d + nclears - 4) = 0;
+ *(d + nclears - 4 + 1) = 0;
+ }
+
+ return d;
+}