diff options
| author | H.J. Lu <hjl.tools@gmail.com> | 2024-02-26 06:37:03 -0800 |
|---|---|---|
| committer | H.J. Lu <hjl.tools@gmail.com> | 2024-02-28 09:02:56 -0800 |
| commit | 0aac205a814a8511e98d02b91a8dc908f1c53cde (patch) | |
| tree | e9ad8dfa06abd2f6d9b521bb95e9b7cfaa0d7de7 | |
| parent | e6350be7e9cae8f71c96c1f06eab61b9acb227c8 (diff) | |
| download | glibc-0aac205a814a8511e98d02b91a8dc908f1c53cde.tar.xz glibc-0aac205a814a8511e98d02b91a8dc908f1c53cde.zip | |
x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
Compiler generates the following instruction sequence for GNU2 dynamic
TLS access:
leaq tls_var@TLSDESC(%rip), %rax
call *tls_var@TLSCALL(%rax)
or
leal tls_var@TLSDESC(%ebx), %eax
call *tls_var@TLSCALL(%eax)
CALL instruction is transparent to compiler which assumes all registers,
except for EFLAGS and RAX/EAX, are unchanged after CALL. When
_dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow
path. __tls_get_addr is a normal function which doesn't preserve any
caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer
caller-saved registers, but didn't preserve any other caller-saved
registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE,
XSAVE and XSAVEC to save and restore all caller-saved registers. This
fixes BZ #31372.
Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic)
to optimize elf_machine_runtime_setup.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
| -rw-r--r-- | elf/Makefile | 18 | ||||
| -rw-r--r-- | elf/tst-gnu2-tls2.c | 122 | ||||
| -rw-r--r-- | elf/tst-gnu2-tls2.h | 36 | ||||
| -rw-r--r-- | elf/tst-gnu2-tls2mod0.c | 31 | ||||
| -rw-r--r-- | elf/tst-gnu2-tls2mod1.c | 31 | ||||
| -rw-r--r-- | elf/tst-gnu2-tls2mod2.c | 31 | ||||
| -rw-r--r-- | sysdeps/i386/dl-machine.h | 2 | ||||
| -rw-r--r-- | sysdeps/i386/dl-tlsdesc-dynamic.h | 190 | ||||
| -rw-r--r-- | sysdeps/i386/dl-tlsdesc.S | 115 | ||||
| -rw-r--r-- | sysdeps/x86/Makefile | 7 | ||||
| -rw-r--r-- | sysdeps/x86/cpu-features.c | 56 | ||||
| -rw-r--r-- | sysdeps/x86/dl-procinfo.c | 16 | ||||
| -rw-r--r-- | sysdeps/x86/features-offsets.sym (renamed from sysdeps/x86_64/features-offsets.sym) | 2 | ||||
| -rw-r--r-- | sysdeps/x86/sysdep.h | 6 | ||||
| -rw-r--r-- | sysdeps/x86/tst-gnu2-tls2.c | 20 | ||||
| -rw-r--r-- | sysdeps/x86_64/Makefile | 2 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-machine.h | 19 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-procinfo.c | 16 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-tlsdesc-dynamic.h | 166 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-tlsdesc.S | 108 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-trampoline-save.h | 34 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-trampoline-state.h | 51 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 20 | ||||
| -rw-r--r-- | sysdeps/x86_64/dl-trampoline.h | 34 |
24 files changed, 920 insertions, 213 deletions
diff --git a/elf/Makefile b/elf/Makefile index 36c04baf02..02dc476e27 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -424,6 +424,7 @@ tests += \ tst-glibc-hwcaps-prepend \ tst-global1 \ tst-global2 \ + tst-gnu2-tls2 \ tst-initfinilazyfail \ tst-initorder \ tst-initorder2 \ @@ -846,6 +847,9 @@ modules-names += \ tst-filterobj-flt \ tst-finilazyfailmod \ tst-globalmod2 \ + tst-gnu2-tls2mod0 \ + tst-gnu2-tls2mod1 \ + tst-gnu2-tls2mod2 \ tst-initlazyfailmod \ tst-initorder2a \ tst-initorder2b \ @@ -3044,8 +3048,22 @@ $(objpfx)tst-tlsgap.out: \ $(objpfx)tst-tlsgap-mod0.so \ $(objpfx)tst-tlsgap-mod1.so \ $(objpfx)tst-tlsgap-mod2.so + +$(objpfx)tst-gnu2-tls2: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2.out: \ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so + ifeq (yes,$(have-mtls-dialect-gnu2)) +# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved +# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372 +test-xfail-tst-gnu2-tls2 = yes + CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2 CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2 CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2 endif diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c new file mode 100644 index 0000000000..7ac04d7f33 --- /dev/null +++ b/elf/tst-gnu2-tls2.c @@ -0,0 +1,122 @@ +/* Test TLSDESC relocation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <dlfcn.h> +#include <pthread.h> +#include <support/xdlfcn.h> +#include <support/xthread.h> +#include <support/check.h> +#include <support/test-driver.h> +#include "tst-gnu2-tls2.h" + +#ifndef IS_SUPPORTED +# define IS_SUPPORTED() true +#endif + +/* An architecture can define it to clobber caller-saved registers in + malloc below to verify that the implicit TLSDESC call won't change + caller-saved registers. */ +#ifndef PREPARE_MALLOC +# define PREPARE_MALLOC() +#endif + +extern void * __libc_malloc (size_t); + +size_t malloc_counter = 0; + +void * +malloc (size_t n) +{ + PREPARE_MALLOC (); + malloc_counter++; + return __libc_malloc (n); +} + +static void *mod[3]; +#ifndef MOD +# define MOD(i) "tst-gnu2-tls2mod" #i ".so" +#endif +static const char *modname[3] = { MOD(0), MOD(1), MOD(2) }; +#undef MOD + +static void +open_mod (int i) +{ + mod[i] = xdlopen (modname[i], RTLD_LAZY); + printf ("open %s\n", modname[i]); +} + +static void +close_mod (int i) +{ + xdlclose (mod[i]); + mod[i] = NULL; + printf ("close %s\n", modname[i]); +} + +static void +access_mod (int i, const char *sym) +{ + struct tls var = { -1, -1, -1, -1 }; + struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym); + /* Check that our malloc is called. */ + malloc_counter = 0; + struct tls *p = f (&var); + TEST_VERIFY (malloc_counter != 0); + printf ("access %s: %s() = %p\n", modname[i], sym, p); + TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); + ++(p->a); +} + +static void * +start (void *arg) +{ + /* The DTV generation is at the last dlopen of mod0 and the + entry for mod1 is NULL. */ + + open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS. */ + + /* Force the slow path in GNU2 TLS descriptor call. */ + access_mod (1, "apply_tls"); + + return arg; +} + +static int +do_test (void) +{ + if (!IS_SUPPORTED ()) + return EXIT_UNSUPPORTED; + + open_mod (0); + open_mod (1); + open_mod (2); + close_mod (0); + close_mod (1); /* Create modid gap at mod1. */ + open_mod (0); /* Reuse modid of mod0, bump generation count. */ + + /* Create a thread where DTV of mod1 is NULL. */ + pthread_t t = xpthread_create (NULL, start, NULL); + xpthread_join (t); + return 0; +} + +#include <support/test-driver.c> diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h new file mode 100644 index 0000000000..77964a57a3 --- /dev/null +++ b/elf/tst-gnu2-tls2.h @@ -0,0 +1,36 @@ +/* Test TLSDESC relocation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +struct tls +{ + int64_t a, b, c, d; +}; + +extern struct tls *apply_tls (struct tls *); + +/* An architecture can define them to verify that clobber caller-saved + registers aren't changed by the implicit TLSDESC call. */ +#ifndef BEFORE_TLSDESC_CALL +# define BEFORE_TLSDESC_CALL() +#endif + +#ifndef AFTER_TLSDESC_CALL +# define AFTER_TLSDESC_CALL() +#endif diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c new file mode 100644 index 0000000000..45556a0e17 --- /dev/null +++ b/elf/tst-gnu2-tls2mod0.c @@ -0,0 +1,31 @@ +/* DSO used by tst-gnu2-tls2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "tst-gnu2-tls2.h" + +__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + BEFORE_TLSDESC_CALL (); + tls_var0 = *p; + struct tls *ret = &tls_var0; + AFTER_TLSDESC_CALL (); + return ret; +} diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c new file mode 100644 index 0000000000..e10b9dbc0a --- /dev/null +++ b/elf/tst-gnu2-tls2mod1.c @@ -0,0 +1,31 @@ +/* DSO used by tst-gnu2-tls2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "tst-gnu2-tls2.h" + +__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + BEFORE_TLSDESC_CALL (); + tls_var1[1] = *p; + struct tls *ret = &tls_var1[1]; + AFTER_TLSDESC_CALL (); + return ret; +} diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c new file mode 100644 index 0000000000..141af51e55 --- /dev/null +++ b/elf/tst-gnu2-tls2mod2.c @@ -0,0 +1,31 @@ +/* DSO used by tst-gnu2-tls2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "tst-gnu2-tls2.h" + +__thread struct tls tls_var2 __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + BEFORE_TLSDESC_CALL (); + tls_var2 = *p; + struct tls *ret = &tls_var2; + AFTER_TLSDESC_CALL (); + return ret; +} diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index fc1ef96587..50d74fe6e9 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n", { td->arg = _dl_make_tlsdesc_dynamic (sym_map, sym->st_value + (ElfW(Word))td->arg); - td->entry = _dl_tlsdesc_dynamic; + td->entry = GLRO(dl_x86_tlsdesc_dynamic); } else # endif diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h new file mode 100644 index 0000000000..3627028577 --- /dev/null +++ b/sysdeps/i386/dl-tlsdesc-dynamic.h @@ -0,0 +1,190 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2004-2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#undef REGISTER_SAVE_AREA + +#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK +# ifdef USE_FNSAVE +# error USE_FNSAVE shouldn't be defined +# endif +# ifdef USE_FXSAVE +/* Use fxsave to save all registers. */ +# define REGISTER_SAVE_AREA 512 +# endif +#else +# ifdef USE_FNSAVE +/* Use fnsave to save x87 FPU stack registers. */ +# define REGISTER_SAVE_AREA 108 +# else +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save all registers. Add 12 bytes to align the stack + to 16 bytes. */ +# define REGISTER_SAVE_AREA (512 + 12) +# endif +#endif + + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,@function + + /* This function is used for symbols that need dynamic TLS. + + %eax points to the TLS descriptor, such that 0(%eax) points to + _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct + tlsdesc_dynamic_arg object. It must return in %eax the offset + between the thread pointer and the object denoted by the + argument, without clobbering any registers. + + The assembly code that follows is a rendition of the following + C code, hand-optimized a little bit. + +ptrdiff_t +__attribute__ ((__regparm__ (1))) +_dl_tlsdesc_dynamic (struct tlsdesc *tdp) +{ + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); + if (__builtin_expect (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; +} +*/ + cfi_startproc + .align 16 +_dl_tlsdesc_dynamic: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + movl %ecx, 20(%esp) + movl %edx, 24(%esp) + movl TLSDESC_ARG(%eax), %eax + movl %gs:DTV_OFFSET, %edx + movl TLSDESC_GEN_COUNT(%eax), %ecx + cmpl (%edx), %ecx + ja 2f + movl TLSDESC_MODID(%eax), %ecx + movl (%edx,%ecx,8), %edx + cmpl $-1, %edx + je 2f + movl TLSDESC_MODOFF(%eax), %eax + addl %edx, %eax +1: + movl 20(%esp), %ecx + subl %gs:0, %eax + movl 24(%esp), %edx + addl $32, %esp + cfi_adjust_cfa_offset (-32) + ret + .p2align 4,,7 +2: + cfi_adjust_cfa_offset (32) +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + movl %ebx, -28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + and $-STATE_SAVE_ALIGNMENT, %esp +#endif +#ifdef REGISTER_SAVE_AREA + subl $REGISTER_SAVE_AREA, %esp +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +#else +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +# endif + /* Allocate stack space of the required size to save the state. */ + LOAD_PIC_REG (cx) + subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +#endif +#ifdef USE_FNSAVE + fnsave (%esp) +#elif defined USE_FXSAVE + fxsave (%esp) +#else + /* Save the argument for ___tls_get_addr in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + /* Clear the XSAVE Header. */ +# ifdef USE_XSAVE + movl %edx, (512)(%esp) + movl %edx, (512 + 4 * 1)(%esp) + movl %edx, (512 + 4 * 2)(%esp) + movl %edx, (512 + 4 * 3)(%esp) +# endif + movl %edx, (512 + 4 * 4)(%esp) + movl %edx, (512 + 4 * 5)(%esp) + movl %edx, (512 + 4 * 6)(%esp) + movl %edx, (512 + 4 * 7)(%esp) + movl %edx, (512 + 4 * 8)(%esp) + movl %edx, (512 + 4 * 9)(%esp) + movl %edx, (512 + 4 * 10)(%esp) + movl %edx, (512 + 4 * 11)(%esp) + movl %edx, (512 + 4 * 12)(%esp) + movl %edx, (512 + 4 * 13)(%esp) + movl %edx, (512 + 4 * 14)(%esp) + movl %edx, (512 + 4 * 15)(%esp) +# ifdef USE_XSAVE + xsave (%esp) +# else + xsavec (%esp) +# endif + /* Restore the argument for ___tls_get_addr in EAX. */ + movl %ecx, %eax +#endif + call HIDDEN_JUMPTARGET (___tls_get_addr) + /* Get register content back. */ +#ifdef USE_FNSAVE + frstor (%esp) +#elif defined USE_FXSAVE + fxrstor (%esp) +#else + /* Save and retore ___tls_get_addr return value stored in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor (%esp) + movl %ecx, %eax +#endif +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl -28(%esp), %ebx + cfi_restore(%ebx) +#else + addl $REGISTER_SAVE_AREA, %esp + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +#endif + jmp 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S index 90d93caa0c..f002feee56 100644 --- a/sysdeps/i386/dl-tlsdesc.S +++ b/sysdeps/i386/dl-tlsdesc.S |
