aarch64: Add half-width versions of AdvSIMD f32 libmvec routines

Compilers may emit calls to 'half-width' routines (two-lane single-precision variants). These have been added in the form of wrappers around the full-width versions, where the low half of the vector is simply duplicated. This will perform poorly when one lane triggers the special-case handler, as there will be a redundant call to the scalar version, however this is expected to be rare at Ofast. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
author: Joe Ramsay <Joe.Ramsay@arm.com> 2023-12-19 16:44:01 +0000
committer: Szabolcs Nagy <szabolcs.nagy@arm.com> 2023-12-20 08:41:25 +0000
commit: cc0d77ba944cd4ce46c5f0e6d426af3057962ca5 (patch)
tree: 840c09b10bcb0ad4f733e8cb4bce2acbd92e5945 /sysdeps/aarch64/fpu/v_math.h
parent: 3150cc0c9019bf9da841419f86dda8e7f26d676d (diff)
download: glibc-cc0d77ba944cd4ce46c5f0e6d426af3057962ca5.tar.xz
glibc-cc0d77ba944cd4ce46c5f0e6d426af3057962ca5.zip
1 files changed, 15 insertions, 0 deletions
diff --git a/sysdeps/aarch64/fpu/v_math.h b/sysdeps/aarch64/fpu/v_math.h
index d286eb81b3..e8ac0e2332 100644
--- a/sysdeps/aarch64/fpu/v_math.h
+++ b/sysdeps/aarch64/fpu/v_math.h
@@ -29,6 +29,21 @@
 #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
 #define V_NAME_D2(fun) _ZGVnN2vv_##fun
 
+#include "advsimd_f32_protos.h"
+
+#define HALF_WIDTH_ALIAS_F1(fun)                                              \
+  float32x2_t VPCS_ATTR _ZGVnN2v_##fun##f (float32x2_t x)                     \
+  {                                                                           \
+    return vget_low_f32 (_ZGVnN4v_##fun##f (vcombine_f32 (x, x)));            \
+  }
+
+#define HALF_WIDTH_ALIAS_F2(fun)                                              \
+  float32x2_t VPCS_ATTR _ZGVnN2vv_##fun##f (float32x2_t x, float32x2_t y)     \
+  {                                                                           \
+    return vget_low_f32 (                                                     \
+	_ZGVnN4vv_##fun##f (vcombine_f32 (x, x), vcombine_f32 (y, y)));       \
+  }
+
 /* Shorthand helpers for declaring constants.  */
 #define V2(X) { X, X }
 #define V4(X) { X, X, X, X }
author	Joe Ramsay <Joe.Ramsay@arm.com>	2023-12-19 16:44:01 +0000
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>	2023-12-20 08:41:25 +0000
commit	cc0d77ba944cd4ce46c5f0e6d426af3057962ca5 (patch)
tree	840c09b10bcb0ad4f733e8cb4bce2acbd92e5945 /sysdeps/aarch64/fpu/v_math.h
parent	3150cc0c9019bf9da841419f86dda8e7f26d676d (diff)
download	glibc-cc0d77ba944cd4ce46c5f0e6d426af3057962ca5.tar.xz glibc-cc0d77ba944cd4ce46c5f0e6d426af3057962ca5.zip