diff options
| author | Paul E. Murphy <murphyp@linux.vnet.ibm.com> | 2020-04-07 16:20:55 -0500 |
|---|---|---|
| committer | Paul E. Murphy <murphyp@linux.ibm.com> | 2020-11-30 09:56:14 -0600 |
| commit | 33fc34521de970153344cfe1bfa9ce6da7a6efea (patch) | |
| tree | e327104762da05a3354a89a35122fe26bdb8c439 | |
| parent | cc5d5852c65eddf92368b0845e1374bd443316e7 (diff) | |
| download | glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.tar.xz glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.zip | |
powerpc64le: ifunc select *f128 routines in multiarch mode
Programatically generate simple wrappers for interesting libm *f128
objects. Selected functions are transcendental functions or
those with trivial compiler builtins. This can result in a 2-3x
speedup (e.g logf128 and expf128).
A second set of implementation files are generated which include
the first implementation encountered along the search path. This
usually works, except when a wrapper is overriden and makefile
search order slightly diverges from include order. Likewise,
wrapper object files are created for each generated file. These
hold the ifunc selection routines which export ABI.
Next, several shared headers are intercepted to control renaming of
asm function redirects are used first, and sometimes macro renames
if the former is impractical.
Notably, if the request machine supports hardware IEEE128 (i.e POWER9
and newer) this ifunc machinery is disabled. Likewise existing
ifunc support for float128 is consolidated into this (e.g sqrtf128
and fmaf128).
Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
16 files changed, 817 insertions, 197 deletions
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile index 8747b02127..767805b510 100644 --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile @@ -1,10 +1,202 @@ ifeq ($(subdir),math) -libm-sysdep_routines += s_fmaf128-ppc64 s_fmaf128-power9 \ - w_sqrtf128-power9 w_sqrtf128-ppc64le -CFLAGS-s_fmaf128-ppc64.c += $(type-float128-CFLAGS) $(no-gnu-attribute-CFLAGS) -CFLAGS-s_fmaf128-power9.c += $(type-float128-CFLAGS) -mcpu=power9 $(no-gnu-attribute-CFLAGS) +# +# Only enable ifunc _Float128 support if the baseline cpu support +# is older than power9. +ifneq (yes,$(libc-submachine-power9)) +do_f128_multiarch = yes +endif + +# +# This is an ugly, but contained, mechanism to provide hardware optimized +# _Float128 and ldouble == ieee128 optimized routines for P9 and beyond +# hardware. At a very high level, we rely on ASM renames, and rarely +# macro renames to build two sets of _Float128 ABI, one with _power8 (the +# baseline powerpc64le cpu) and _power9 (the first powerpc64le cpu to introduce +# hardware support for _Float128). +# +# At a high level, we compile 3 files for each object file. +# 1. The baseline soft-float128, unsuffixed objects $(object).$(sfx) +# The symbols contained in these files is suffixed by _power8. +# 2. The hard-float128, power9, suffixed objects $(object)-power9.$(sfx). +# The symbols contained in these files is suffixed by _power9. +# 3. The IFUNC wrapper object to export ABI, $(object)-ifunc.$(sfx) +# This glues the above together and implements the ABI. +# +# 2 & 3 are automatically generated by Makefile rule. Placing the exported +# ABI into a separate file allows reuse of existing aliasing macros +# with minimal hassle. +# +# +# If the float128 ABI is expanded, and a new ifunc wrappers are desired, +# the following lists how to map new symbols from the shared headers into +# their local overrides here: +# +# float128_private.h +# +# is used to rename the ldouble == ieee128 object files. This takes +# it a step further and redirects symbols to a local name. This supports +# nearly all files in sysdeps/ieee754/float128, but not all _Float128 +# objects. However, this is only meant to be used internally to support +# compilation of ldbl-128 into float128. +# +# math-type-macros-float128.h +# +# renames symbols which are generated via shared templated in math/. +# +# math_private.h +# +# provides internal declarations for common macros and functions which +# are called from within libm. Note, float128_private.h duplicates +# some of these declarations as these headers are generally not included +# in the same translation unit. +# +# The above is supported by several header files as described below: +# +# float128-ifunc.h +# +# provides support for generating the IFUNC objects in part 3 above. +# This header is only included with wrapper functions. +# +# float128-ifunc-macros.h +# +# disables all first-order float128 aliasing macros used in libm, +# and libm wrappers around libc-symbols.h. +# +# float128-ifunc-redirect-macros.h +# +# provides macros which implement the appending of the suffix to +# symbols what have been selected. +# +# float128-ifunc-redirects.h +# +# provides ASM redirects for symbols which are redirected in the +# private copy of math.h used by glibc, but not declared by math_private.h +# +# float128-ifunc-redirects-mp.h +# +# provides ASM redirects which are used by math_private.h (the -mp suffix) +# and the interposer float128_private.h discussed late. +# +# Notably, this enforces a slightly different mechanism for machine specific +# overrides. Optimizations for all targets must all be reachable from the same +# file. See the history to fmaf128 or sqrtf128 to understand how this looks +# in practice. +# +ifeq ($(do_f128_multiarch),yes) + +gen-libm-f128-ifunc-routines = \ + e_acosf128 e_acoshf128 e_asinf128 e_atan2f128 e_atanhf128 e_coshf128 \ + e_expf128 e_fmodf128 e_hypotf128 e_j0f128 e_j1f128 e_jnf128 \ + e_lgammaf128_r e_logf128 e_log10f128 e_powf128 e_remainderf128 \ + e_sinhf128 e_sqrtf128 e_gammaf128_r e_ilogbf128 k_tanf128 s_asinhf128 \ + s_atanf128 s_cbrtf128 s_ceilf128 s_cosf128 s_erff128 s_expm1f128 \ + s_fabsf128 s_floorf128 s_log1pf128 s_logbf128 \ + s_rintf128 s_scalblnf128 s_sinf128 s_tanf128 \ + s_tanhf128 s_truncf128 s_remquof128 e_log2f128 \ + s_roundf128 s_nearbyintf128 s_sincosf128 s_fmaf128 s_lrintf128 \ + s_llrintf128 s_lroundf128 s_llroundf128 e_exp10f128 \ + m_modff128 m_scalbnf128 m_frexpf128 m_ldexpf128 x2y2m1f128 \ + gamma_productf128 lgamma_negf128 lgamma_productf128 s_roundevenf128 \ + cargf128 conjf128 cimagf128 crealf128 cabsf128 e_scalbf128 s_cacosf128 \ + s_cacoshf128 s_ccosf128 s_ccoshf128 s_casinf128 s_csinf128 \ + s_casinhf128 k_casinhf128 s_csinhf128 k_casinhf128 s_csinhf128 \ + s_catanhf128 s_catanf128 s_ctanf128 s_ctanhf128 s_cexpf128 s_clogf128 \ + s_cprojf128 s_csqrtf128 s_cpowf128 s_clog10f128 s_fdimf128 \ + s_fmaxf128 s_fminf128 w_ilogbf128 w_llogbf128 \ + w_log1pf128 w_scalblnf128 w_acosf128 \ + w_acoshf128 w_asinf128 w_atan2f128 w_atanhf128 w_coshf128 w_exp10f128 \ + w_exp2f128 w_fmodf128 w_hypotf128 w_j0f128 w_j1f128 w_jnf128 \ + w_logf128 w_log10f128 w_log2f128 w_powf128 w_remainderf128 \ + w_scalbf128 w_sinhf128 w_sqrtf128 w_tgammaf128 w_lgammaf128 \ + w_lgammaf128_r w_expf128 e_exp2f128 \ + k_sinf128 k_cosf128 k_sincosf128 e_rem_pio2f128 + + +f128-march-routines-p9 = $(addsuffix -power9,$(gen-libm-f128-ifunc-routines)) +f128-march-routines-ifunc = $(addsuffix -ifunc,$(gen-libm-f128-ifunc-routines)) +f128-march-routines = $(f128-march-routines-p9) $(f128-march-routines-ifunc) +f128-march-cpus = power9 + +libm-routines += $(f128-march-routines) +generated += $(f128-march-routines) + +CFLAGS-float128-ifunc.c += $(type-float128-CFLAGS) $(no-gnu-attribute-CFLAGS) + +# Copy special CFLAGS for some functions +CFLAGS-m_modff128-power9.c += -fsignaling-nans + +# Generate ifunc wrapper files and target specific wrappers around +# each routine above. Note, m_%.c files are fixed up to include +# s_%.c files. This is an artifact of the makefile rules which allow +# some files to be compiled for libc and libm. +$(objpfx)gen-float128-ifuncs.stmp: Makefile + $(make-target-directory) + for gcall in $(gen-libm-f128-ifunc-routines); do \ + ifile="$${gcall}"; \ + if [ $${gcall##m_} != $${gcall} ]; then \ + ifile="s_$${gcall##m_}"; \ + fi; \ + for cpu in $(f128-march-cpus); do \ + file=$(objpfx)$${gcall}-$${cpu}.c; \ + { \ + echo "#include <$${ifile}.c>"; \ + } > $${file}; \ + done; \ + name="$${gcall##?_}"; \ + pfx="$${gcall%%_*}"; \ + R=""; \ + r=""; \ + if [ $${gcall##m_} != $${gcall} ]; then \ + pfx="s"; \ + fi; \ + if [ $${#pfx} != 1 ]; then \ + pfx=""; \ + else \ + pfx="_$${pfx}"; \ + fi; \ + if [ $${name%%_r} != $${name} ]; then \ + R="_R"; \ + r="_r"; \ + name="$${name%%_r}"; \ + fi; \ + name="$${name%%f128}"; \ + decl="DECL_ALIAS$${pfx}_$${name}$${r}"; \ + compat="GEN_COMPAT$${pfx}_$${name}$${r}"; \ + declc="DECL_ALIAS$${R}$${pfx}"; \ + { \ + echo "#include <float128-ifunc.h>"; \ + echo "#ifndef $${decl}"; \ + echo "# define $${decl}(f) $${declc} (f)"; \ + echo "#endif"; \ + echo "#ifndef $${compat}"; \ + echo "# define $${compat}(f)"; \ + echo "#endif"; \ + echo "$${decl} ($${name});"; \ + echo "$${compat} ($${name});"; \ + } > $(objpfx)$${gcall}-ifunc.c; \ + done; \ + echo > $(@) + +$(foreach f,$(f128-march-routines),$(objpfx)$(f).c): $(objpfx)gen-float128-ifuncs.stmp + +enable-f128-ifunc-CFLAGS = -D_F128_ENABLE_IFUNC $(no-gnu-attributes-CFLAGS) $(type-float128-CFLAGS) + +# Enable IFUNC on baseline (power8) implementations +include $(o-iterator) +define o-iterator-doit +$(foreach f,$(gen-libm-f128-ifunc-routines),$(objpfx)$(f)$(o)): sysdep-CFLAGS += -D_F128_ENABLE_IFUNC +endef +object-suffixes-left := $(all-object-suffixes) +include $(o-iterator) + +# Likewise, but for power9. +include $(o-iterator) +define o-iterator-doit +$(foreach f,$(f128-march-routines-p9),$(objpfx)$(f)$(o)): sysdep-CFLAGS += $$(enable-f128-ifunc-CFLAGS) -mcpu=power9 +endef +object-suffixes-left := $(all-object-suffixes) +include $(o-iterator) -CFLAGS-w_sqrtf128-ppc64le.c += $(type-float128-CFLAGS) $(no-gnu-attribute-CFLAGS) -CFLAGS-w_sqrtf128-power9.c += $(type-float128-CFLAGS) -mcpu=power9 $(no-gnu-attribute-CFLAGS) +endif # do_f128_multiarch endif diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-macros.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-macros.h new file mode 100644 index 0000000000..15e7c96fd3 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-macros.h @@ -0,0 +1,56 @@ +/* _Float128 aliasing macro support for ifunc generation on PPC. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _FLOAT128_IFUNC_MACROS_PPC64LE +#define _FLOAT128_IFUNC_MACROS_PPC64LE 1 + +/* Bring in the various alias-providing headers, and disable + _Float128 related macros. This prevents exporting any ABI + from _Float128 implementation objects. */ +#include <libm-alias-float128.h> +#include <libm-alias-finite.h> + +#undef libm_alias_float128_r +#undef libm_alias_finite +#undef libm_alias_exclusive_ldouble +#undef libm_alias_float128_other_r_ldbl +#undef declare_mgen_finite_alias +#undef declare_mgen_alias +#undef declare_mgen_alias_r + +#define libm_alias_finite(from, to) +#define libm_alias_float128_r(from, to, r) +#define libm_alias_exclusive_ldouble(from, to) +#define libm_alias_float128_other_r_ldbl(from, to, r) +#define declare_mgen_finite_alias(from, to) +#define declare_mgen_alias(from, to) +#define declare_mgen_alias_r(from, to) + +/* Likewise, disable hidden symbol support. This is not needed + for the implementation objects as the redirects already give + us this support. This also means any non-_Float128 headers + which provide hidden_def's should be included prior to this + header (e.g fenv.h). */ +#undef libm_hidden_def +#define libm_hidden_def(func) +#undef libm_hidden_proto +#define libm_hidden_proto(f) + +#include <float128-ifunc-redirect-macros.h> + +#endif /* _FLOAT128_IFUNC_MACROS_PPC64LE */ diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirect-macros.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirect-macros.h new file mode 100644 index 0000000000..baf5633aae --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirect-macros.h @@ -0,0 +1,53 @@ +/* _Float128 aliasing macro support for ifunc generation on PPC. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _FLOAT128_IFUNC_REDIRECT_MACROS_PPC64LE +#define _FLOAT128_IFUNC_REDIRECT_MACROS_PPC64LE 1 + +/* Define the redirection macros used throughout most of the IFUNC headers. + The variant is inferred via compiler options. + + F128_REDIR_PFX_R(function, destination_prefix, reentrant_suffix) + Redirect function, optionally suffixed by reentrant_suffix, to a function + named destination_prefix ## function ## variant ## reentrant_suffix. + + F128_SFX_APPEND(sym) + Append the the multiarch variant specific suffix to the sym. sym is not + expanded. This is sym ## variant. + + F128_REDIR_R(func, reentrant_suffix) + Redirect func to a function named function ## variant ## reentrant_suffix + + F128_REDIR(function) + Convience wrapper for F128_REDIR_R where function does not require + a suffix argument. + +*/ +#ifndef _ARCH_PWR9 +#define F128_REDIR_PFX_R(func, pfx, r) \ + extern __typeof(func ## r) func ## r __asm( #pfx #func "_power8" #r ); +#define F128_SFX_APPEND(x) x ## _power8 +#else +#define F128_REDIR_PFX_R(func, pfx, r) \ + extern __typeof(func ## r) func ## r __asm( #pfx #func "_power9" #r ); +#define F128_SFX_APPEND(x) x ## _power9 +#endif +#define F128_REDIR_R(func, r) F128_REDIR_PFX_R (func, , r) +#define F128_REDIR(func) F128_REDIR_R (func, ) + +#endif /*_FLOAT128_IFUNC_REDIRECT_MACROS_PPC64LE */ diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects-mp.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects-mp.h new file mode 100644 index 0000000000..3c8b6f1291 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects-mp.h @@ -0,0 +1,64 @@ +/* _Float128 multiarch redirects shared with math_private.h + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _FLOAT128_IFUNC_REDIRECTS_MP_H +#define _FLOAT128_IFUNC_REDIRECTS_MP_H 1 + +#include <float128-ifunc-redirect-macros.h> + +F128_REDIR (__ieee754_acosf128) +F128_REDIR (__ieee754_acoshf128) +F128_REDIR (__ieee754_asinf128) +F128_REDIR (__ieee754_atan2f128) +F128_REDIR (__ieee754_atanhf128) +F128_REDIR (__ieee754_coshf128) +F128_REDIR (__ieee754_expf128) +F128_REDIR (__ieee754_exp10f128) +F128_REDIR (__ieee754_exp2f128) +F128_REDIR (__ieee754_fmodf128) +F128_REDIR (__ieee754_gammaf128) +F128_REDIR_R (__ieee754_gammaf128, _r) +F128_REDIR (__ieee754_hypotf128) +F128_REDIR (__ieee754_j0f128) +F128_REDIR (__ieee754_j1f128) +F128_REDIR (__ieee754_jnf128) +F128_REDIR (__ieee754_lgammaf128) +F128_REDIR_R (__ieee754_lgammaf128, _r) +F128_REDIR (__ieee754_logf128) +F128_REDIR (__ieee754_log10f128) +F128_REDIR (__ieee754_log2f128) +F128_REDIR (__ieee754_powf128) +F128_REDIR (__ieee754_remainderf128) +F128_REDIR (__ieee754_sinhf128) +F128_REDIR (__ieee754_sqrtf128) +F128_REDIR (__ieee754_y0f128) +F128_REDIR (__ieee754_y1f128) +F128_REDIR (__ieee754_ynf128) +F128_REDIR (__ieee754_scalbf128) +F128_REDIR (__ieee754_ilogbf128) +F128_REDIR (__ieee754_rem_pio2f128) +F128_REDIR (__kernel_sinf128) +F128_REDIR (__kernel_cosf128) +F128_REDIR (__kernel_tanf128) +F128_REDIR (__kernel_sincosf128) +F128_REDIR (__kernel_rem_pio2f128) +F128_REDIR (__x2y2m1f128) +F128_REDIR (__gamma_productf128) +F128_REDIR (__lgamma_negf128) + +#endif /*_FLOAT128_IFUNC_REDIRECTS_MP_H */ diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h new file mode 100644 index 0000000000..a0f035965c --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h @@ -0,0 +1,43 @@ +/* _Float128 redirects for ppc64le multiarch env. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _FLOAT128_IFUNC_REDIRECTS +#define _FLOAT128_IFUNC_REDIRECTS 1 + +#include <float128-ifunc-macros.h> + +F128_REDIR_PFX_R (sqrtf128, __,); +F128_REDIR_PFX_R (rintf128, __,); +F128_REDIR_PFX_R (ceilf128, __,); +F128_REDIR_PFX_R (floorf128, __,); +F128_REDIR_PFX_R (truncf128, __,); +F128_REDIR_PFX_R (roundf128, __,); +F128_REDIR_PFX_R (fabsf128, __,); + +extern __typeof (ldexpf128) F128_SFX_APPEND (__ldexpf128); + +#define __ldexpf128 F128_SFX_APPEND (__ldexpf128) + +/* libm_hidden_proto is disabled by the time we reach here. + Ensure some internally called functions are still called + without going through the PLT. Note, this code is only + included when building libm. */ +hidden_proto (__fpclassifyf128) +hidden_proto (__issignalingf128) + +#endif /* _FLOAT128_IFUNC_REDIRECTS */ diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc.h new file mode 100644 index 0000000000..73099e0341 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc.h @@ -0,0 +1,166 @@ +/* _Float128 ifunc ABI/ifunc generation macros. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + |
