aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormengqinggang <mengqinggang@loongson.cn>2024-05-08 10:06:15 +0800
committercaiyinyu <caiyinyu@loongson.cn>2024-05-15 10:31:53 +0800
commit1dbf2bef7934cee9829d875f11968d6ff1fee77f (patch)
treed9e2a0c544e5a7468bfc5149613155f2d2d4efdf
parentf942a732d37a96217ef828116ebe64a644db18d7 (diff)
downloadglibc-1dbf2bef7934cee9829d875f11968d6ff1fee77f.tar.xz
glibc-1dbf2bef7934cee9829d875f11968d6ff1fee77f.zip
LoongArch: Add support for TLS Descriptors
This is mostly based on AArch64 and RISC-V implementation. Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. For _dl_tlsdesc_dynamic function slow path, temporarily save and restore all vector registers.
-rw-r--r--config.h.in3
-rw-r--r--elf/elf.h2
-rw-r--r--sysdeps/loongarch/Makefile34
-rw-r--r--sysdeps/loongarch/configure33
-rw-r--r--sysdeps/loongarch/configure.ac16
-rw-r--r--sysdeps/loongarch/dl-machine.h52
-rw-r--r--sysdeps/loongarch/dl-tls.h9
-rw-r--r--sysdeps/loongarch/dl-tlsdesc.S436
-rw-r--r--sysdeps/loongarch/dl-tlsdesc.h49
-rw-r--r--sysdeps/loongarch/linkmap.h3
-rw-r--r--sysdeps/loongarch/preconfigure1
-rw-r--r--sysdeps/loongarch/sys/asm.h1
-rw-r--r--sysdeps/loongarch/sys/regdef.h1
-rw-r--r--sysdeps/loongarch/tlsdesc.c39
-rw-r--r--sysdeps/loongarch/tlsdesc.sym28
-rw-r--r--sysdeps/loongarch/tst-gnu2-tls2.h377
16 files changed, 1076 insertions, 8 deletions
diff --git a/config.h.in b/config.h.in
index c4cc7d3b9a..9a83b774fa 100644
--- a/config.h.in
+++ b/config.h.in
@@ -141,6 +141,9 @@
/* LOONGARCH floating-point ABI for ld.so. */
#undef LOONGARCH_ABI_FRLEN
+/* Define whether compiler support vector. */
+#undef HAVE_LOONGARCH_VEC_COM
+
/* Define whether ARM used hard-float and support VFPvX-D32. */
#undef HAVE_ARM_PCS_VFP_D32
diff --git a/elf/elf.h b/elf/elf.h
index 55b2e87860..682bce5a94 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -4241,6 +4241,8 @@ enum
#define R_LARCH_TLS_TPREL32 10
#define R_LARCH_TLS_TPREL64 11
#define R_LARCH_IRELATIVE 12
+#define R_LARCH_TLS_DESC32 13
+#define R_LARCH_TLS_DESC64 14
/* Reserved for future relocs that the dynamic linker must understand. */
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
index 446bda6563..a4ee915eff 100644
--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
@@ -1,5 +1,7 @@
ifeq ($(subdir),misc)
-sysdep_headers += sys/asm.h
+sysdep_headers += \
+ sys/asm.h \
+ # sysdep_headers
tests += \
tst-hwcap-tunables \
@@ -9,21 +11,45 @@ tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
endif
ifeq ($(subdir),elf)
-gen-as-const-headers += dl-link.sym
+sysdep-dl-routines += \
+ dl-tlsdesc \
+ tlsdesc \
+ # sysdep-dl-routines
+
+gen-as-const-headers += \
+ dl-link.sym \
+ # gen-as-const-headers
+endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += \
+ tlsdesc.sym \
+ # gen-as-const-headers
endif
ifeq ($(subdir),elf)
sysdep-dl-routines += \
dl-get-cpu-features \
# sysdep-dl-routines
+
+# Disable the compiler from using LSX for TLS descriptor tests, or storing into
+# 16B TLS variable may clobber FP/vector registers and prevent us from checking
+# their contents.
+CFLAGS-tst-gnu2-tls2mod0.c += -mno-lsx
+CFLAGS-tst-gnu2-tls2mod1.c += -mno-lsx
+CFLAGS-tst-gnu2-tls2mod2.c += -mno-lsx
endif
# LoongArch's assembler also needs to know about PIC as it changes the
# definition of some assembler macros.
-ASFLAGS-.os += $(pic-ccflag)
+ASFLAGS-.os += \
+ $(pic-ccflag) \
+ # ASFLAGS-.os
# All the objects in lib*_nonshared.a need to be compiled with medium code
# model or large applications may fail to link.
ifeq (yes,$(have-cmodel-medium))
-CFLAGS-.oS += -mcmodel=medium
+CFLAGS-.oS += \
+ -mcmodel=medium \
+ # CFLAGS-.oS
endif
diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
index 30b60d1983..731f79438f 100644
--- a/sysdeps/loongarch/configure
+++ b/sysdeps/loongarch/configure
@@ -110,3 +110,36 @@ if test $libc_cv_loongarch_vec_asm = no; then
as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for vector support in compiler" >&5
+printf %s "checking for vector support in compiler... " >&6; }
+if test ${libc_cv_loongarch_vec_com+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+void foo (void)
+{
+ asm volatile ("vldi \$vr0, 1" ::: "\$vr0");
+ asm volatile ("xvldi \$xr0, 1" ::: "\$xr0");
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+ libc_cv_loongarch_vec_com=yes
+else $as_nop
+ libc_cv_loongarch_vec_com=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_com" >&5
+printf "%s\n" "$libc_cv_loongarch_vec_com" >&6; }
+if test "$libc_cv_loongarch_vec_com" = yes ;
+then
+ printf "%s\n" "#define HAVE_LOONGARCH_VEC_COM 1" >>confdefs.h
+
+fi
diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
index 28a8ae5486..24fee55dcc 100644
--- a/sysdeps/loongarch/configure.ac
+++ b/sysdeps/loongarch/configure.ac
@@ -65,3 +65,19 @@ rm -f conftest*])
if test $libc_cv_loongarch_vec_asm = no; then
AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
fi
+
+AC_CACHE_CHECK([for vector support in compiler],
+ libc_cv_loongarch_vec_com, [
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+void foo (void)
+{
+ asm volatile ("vldi $vr0, 1" ::: "$vr0");
+ asm volatile ("xvldi $xr0, 1" ::: "$xr0");
+}
+]])],
+ [libc_cv_loongarch_vec_com=yes],
+ [libc_cv_loongarch_vec_com=no])])
+if test "$libc_cv_loongarch_vec_com" = yes ;
+then
+ AC_DEFINE(HAVE_LOONGARCH_VEC_COM)
+fi
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index 6baf0e600a..ab6f1da7c0 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -25,7 +25,7 @@
#include <entry.h>
#include <elf/elf.h>
#include <sys/asm.h>
-#include <dl-tls.h>
+#include <dl-tlsdesc.h>
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
@@ -206,6 +206,36 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
*addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
break;
+ case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
+ {
+ struct tlsdesc volatile *td = (struct tlsdesc volatile *)addr_field;
+ if (sym == NULL)
+ {
+ td->arg = (void*)reloc->r_addend;
+ td->entry = _dl_tlsdesc_undefweak;
+ }
+ else
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (!TRY_STATIC_TLS (map, sym_map))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic (sym_map,
+ sym->st_value + reloc->r_addend);
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+# endif
+ {
+ td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
+ + reloc->r_addend);
+ td->entry = _dl_tlsdesc_return;
+ }
+ }
+ break;
+ }
+
case R_LARCH_COPY:
{
if (sym == NULL)
@@ -274,6 +304,26 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
else
*reloc_addr = map->l_mach.plt;
}
+ else if (__glibc_likely (r_type == R_LARCH_TLS_DESC64)
+ || __glibc_likely (r_type == R_LARCH_TLS_DESC32))
+ {
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+ const ElfW (Sym) *sym = &symtab[symndx];
+ const struct r_found_version *version = NULL;
+
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW (Half) *vernum = (const void *)D_PTR (map,
+ l_info[VERSYMIDX (DT_VERSYM)]);
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
+ }
+
+ /* Always initialize TLS descriptors completely, because lazy
+ initialization requires synchronization at every TLS access. */
+ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+ skip_ifunc);
+ }
else
_dl_reloc_bad_type (map, r_type, 1);
}
diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
index 29924b866d..de593c002d 100644
--- a/sysdeps/loongarch/dl-tls.h
+++ b/sysdeps/loongarch/dl-tls.h
@@ -16,6 +16,9 @@
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
+#ifndef _DL_TLS_H
+#define _DL_TLS_H
+
/* Type used for the representation of TLS information in the GOT. */
typedef struct
{
@@ -23,6 +26,8 @@ typedef struct
unsigned long int ti_offset;
} tls_index;
+extern void *__tls_get_addr (tls_index *ti);
+
/* The thread pointer points to the first static TLS block. */
#define TLS_TP_OFFSET 0
@@ -37,10 +42,10 @@ typedef struct
/* Compute the value for a DTPREL reloc. */
#define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
-extern void *__tls_get_addr (tls_index *ti);
-
#define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
#define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
/* Value used for dtv entries for which the allocation is delayed. */
#define TLS_DTV_UNALLOCATED ((void *) -1l)
+
+#endif
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
new file mode 100644
index 0000000000..15d5fa1c42
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -0,0 +1,436 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+ LoongArch version.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+ .text
+
+ /* Compute the thread pointer offset for symbols in the static
+ TLS block. The offset is the same for all threads.
+ Prototype:
+ _dl_tlsdesc_return (tlsdesc *); */
+ .hidden _dl_tlsdesc_return
+ .global _dl_tlsdesc_return
+ .type _dl_tlsdesc_return,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_return:
+ REG_L a0, a0, 8
+ RET
+ cfi_endproc
+ .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+ /* Handler for undefined weak TLS symbols.
+ Prototype:
+ _dl_tlsdesc_undefweak (tlsdesc *);
+
+ The second word of the descriptor contains the addend.
+ Return the addend minus the thread pointer. This ensures
+ that when the caller adds on the thread pointer it gets back
+ the addend. */
+ .hidden _dl_tlsdesc_undefweak
+ .global _dl_tlsdesc_undefweak
+ .type _dl_tlsdesc_undefweak,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_undefweak:
+ REG_L a0, a0, 8
+ sub.d a0, a0, tp
+ RET
+ cfi_endproc
+ .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+
+#ifdef SHARED
+
+#define FRAME_SIZE (-((-14 * SZREG) & ALMASK))
+#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK))
+#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK))
+#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK))
+
+ /* Handler for dynamic TLS symbols.
+ Prototype:
+ _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+ The second word of the descriptor points to a
+ tlsdesc_dynamic_arg structure.
+
+ Returns the offset between the thread pointer and the
+ object referenced by the argument.
+
+ ptrdiff_t
+ _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+ {
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV);
+ if (__glibc_likely (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val
+ + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+ } */
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_dynamic:
+ /* Save just enough registers to support fast path, if we fall
+ into slow path we will save additional registers. */
+ ADDI sp, sp, -24
+ REG_S t0, sp, 0
+ REG_S t1, sp, 8
+ REG_S t2, sp, 16
+
+/* Runtime Storage Layout of Thread-Local Storage
+ TP point to the start of TLS block.
+
+ dtv
+Low address TCB ----------------> dtv0(counter)
+ TP --> static_block0 <----- dtv1
+ static_block1 <----- dtv2
+ static_block2 <----- dtv3
+ dynamic_block0 <----- dtv4
+Hign address dynamic_block1 <----- dtv5 */
+
+ REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */
+ REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */
+ REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
+ REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */
+ /* If dtv[0].counter < td->gen_count, goto slow path. */
+ bltu t2, t1, .Lslow
+
+ REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
+ /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
+ slli.d t1, t1, 4
+ add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */
+ REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
+ li.d t2, TLS_DTV_UNALLOCATED
+ /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
+ goto slow path. */
+ beq t1, t2, .Lslow
+
+ REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */
+ /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
+ add.d a0, t1, t2
+.Lret:
+ sub.d a0, a0, tp
+ REG_L t0, sp, 0
+ REG_L t1, sp, 8
+ REG_L t2, sp, 16
+ ADDI sp, sp, 24
+ RET
+
+.Lslow:
+ /* This is the slow path. We need to call __tls_get_addr() which
+ means we need to save and restore all the register that the
+ callee will trash. */
+
+ /* Save the remaining registers that we must treat as caller save. */
+ ADDI sp, sp, -FRAME_SIZE
+ REG_S ra, sp, 0 * SZREG
+ REG_S a1, sp, 1 * SZREG
+ REG_S a2, sp, 2 * SZREG
+ REG_S a3, sp, 3 * SZREG
+ REG_S a4, sp, 4 * SZREG
+ REG_S a5, sp, 5 * SZREG
+ REG_S a6, sp, 6 * SZREG
+ REG_S a7, sp, 7 * SZREG
+ REG_S t3, sp, 8 * SZREG
+ REG_S t4, sp, 9 * SZREG
+ REG_S t5, sp, 10 * SZREG
+ REG_S t6, sp, 11 * SZREG
+ REG_S t7, sp, 12 * SZREG
+ REG_S t8, sp, 13 * SZREG
+
+#ifndef __loongarch_soft_float
+
+ /* Save fcsr0 register.
+ Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
+ of some fields in fcsr0. */
+ ADDI sp, sp, -SZFCSREG
+ movfcsr2gr t0, fcsr0
+ st.w t0, sp, 0
+
+ /* Whether support LASX. */
+ la.global t0, _rtld_global_ro
+ REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
+ andi t0, t0, HWCAP_LOONGARCH_LASX
+ beqz t0, .Llsx
+
+ /* Save 256-bit vector registers.
+ FIXME: Without vector ABI, save all vector registers. */
+ ADDI sp, sp, -FRAME_SIZE_LASX
+ xvst xr0, sp, 0*SZXREG
+ xvst xr1, sp, 1*SZXREG
+ xvst xr2, sp, 2*SZXREG
+ xvst xr3, sp, 3*SZXREG
+ xvst xr4, sp, 4*SZXREG
+ xvst xr5, sp, 5*SZXREG
+ xvst xr6, sp, 6*SZXREG
+ xvst xr7, sp, 7*SZXREG
+ xvst xr8, sp, 8*SZXREG
+ xvst xr9, sp, 9*SZXREG
+ xvst xr10, sp, 10*SZXREG
+ xvst xr11, sp, 11*SZXREG
+ xvst xr12, sp, 12*SZXREG
+ xvst xr13, sp, 13*SZXREG
+ xvst xr14, sp, 14*SZXREG
+ xvst xr15, sp, 15*SZXREG
+ xvst xr16, sp, 16*SZXREG
+ xvst xr17, sp, 17*SZXREG
+ xvst xr18, sp, 18*SZXREG
+ xvst xr19, sp, 19*SZXREG
+ xvst xr20, sp, 20*SZXREG
+ xvst xr21, sp, 21*SZXREG
+ xvst xr22, sp, 22*SZXREG
+ xvst xr23, sp, 23*SZXREG
+ xvst xr24, sp, 24*SZXREG
+ xvst xr25, sp, 25*SZXREG
+ xvst xr26, sp, 26*SZXREG
+ xvst xr27, sp, 27*SZXREG
+ xvst xr28, sp, 28*SZXREG
+ xvst xr29, sp, 29*SZXREG
+ xvst xr30, sp, 30*SZXREG
+ xvst xr31, sp, 31*SZXREG
+ b .Ltga
+
+.Llsx:
+ /* Whether support LSX. */
+ andi t0, t0, HWCAP_LOONGARCH_LSX
+ beqz t0, .Lfloat
+
+ /* Save 128-bit vector registers. */
+ ADDI sp, sp, -FRAME_SIZE_LSX
+ vst vr0, sp, 0*SZVREG
+ vst vr1, sp, 1*SZVREG
+ vst vr2, sp, 2*SZVREG
+ vst vr3, sp, 3*SZVREG
+ vst vr4, sp, 4*SZVREG
+ vst vr5, sp, 5*SZVREG
+ vst vr6, sp, 6*SZVREG
+ vst vr7, sp, 7*SZVREG
+ vst vr8, sp, 8*SZVREG
+ vst vr9, sp, 9*SZVREG
+ vst vr10, sp, 10*SZVREG
+ vst vr11, sp, 11*SZVREG
+ vst vr12, sp, 12*SZVREG
+ vst vr13, sp, 13*SZVREG
+ vst vr14, sp, 14*SZVREG
+ vst vr15, sp, 15*SZVREG
+ vst vr16, sp, 16*SZVREG
+ vst vr17, sp, 17*SZVREG
+ vst vr18, sp, 18*SZVREG
+ vst vr19, sp, 19*SZVREG
+ vst vr20, sp, 20*SZVREG
+ vst vr21, sp, 21*SZVREG
+ vst vr22, sp, 22*SZVREG
+ vst vr23, sp, 23*SZVREG
+ vst vr24, sp, 24*SZVREG
+ vst vr25, sp, 25*SZVREG
+ vst vr26, sp, 26*SZVREG
+ vst vr27, sp, 27*SZVREG
+ vst vr28, sp, 28*SZVREG
+ vst vr29, sp, 29*SZVREG
+ vst vr30, sp, 30*SZVREG
+ vst vr31, sp, 31*SZVREG
+ b .Ltga
+
+.Lfloat:
+ /* Save float registers. */
+ ADDI sp, sp, -FRAME_SIZE_FLOAT
+ FREG_S fa0, sp, 0*SZFREG
+ FREG_S fa1, sp, 1*SZFREG
+ FREG_S fa2, sp, 2*SZFREG
+ FREG_S fa3, sp, 3*SZFREG
+ FREG_S fa4, sp, 4*SZFREG
+ FREG_S fa5, sp, 5*SZFREG
+ FREG_S fa6, sp, 6*SZFREG
+ FREG_S fa7, sp, 7*SZFREG
+ FREG_S ft0, sp, 8*SZFREG
+ FREG_S ft1, sp, 9*SZFREG
+ FREG_S ft2, sp, 10*SZFREG
+ FREG_S ft3, sp, 11*SZFREG
+ FREG_S ft4, sp, 12*SZFREG
+ FREG_S ft5, sp, 13*SZFREG
+ FREG_S ft6, sp, 14*SZFREG
+ FREG_S ft7, sp, 15*SZFREG
+ FREG_S ft8, sp, 16*SZFREG
+ FREG_S ft9, sp, 17*SZFREG
+ FREG_S ft10, sp, 18*SZFREG
+ FREG_S ft11, sp, 19*SZFREG
+ FREG_S ft12, sp, 20*SZFREG
+ FREG_S ft13, sp, 21*SZFREG
+ FREG_S ft14, sp, 22*SZFREG
+ FREG_S ft15, sp, 23*SZFREG
+
+#endif /* #ifndef __loongarch_soft_float */
+
+.Ltga:
+ bl HIDDEN_JUMPTARGET(__tls_get_addr)
+ ADDI a0, a0, -TLS_DTV_OFFSET
+
+#ifndef __loongarch_soft_float
+
+ la.global t0, _rtld_global_ro
+ REG_L t0, t0, GLRO_DL_HWCAP_OFFSET
+ andi t0, t0, HWCAP_LOONGARCH_LASX
+ beqz t0, .Llsx1
+
+ /* Restore 256-bit vector registers. */
+ xvld xr0, sp, 0*SZXREG
+ xvld xr1, sp, 1*SZXREG
+ xvld xr2, sp, 2*SZXREG
+ xvld xr3, sp, 3*SZXREG
+ xvld xr4, sp, 4*SZXREG
+ xvld xr5, sp, 5*SZXREG
+ xvld xr6, sp, 6*SZXREG
+ xvld xr7, sp, 7*SZXREG
+ xvld xr8, sp, 8*SZXREG
+ xvld xr9, sp, 9*SZXREG
+ xvld xr10, sp, 10*SZXREG
+ xvld xr11, sp, 11*SZXREG
+ xvld xr12, sp, 12*SZXREG
+ xvld xr13, sp, 13*SZXREG
+ xvld xr14, sp, 14*SZXREG
+ xvld xr15, sp, 15*SZXREG
+ xvld xr16, sp, 16*SZXREG
+ xvld xr17, sp, 17*SZXREG
+ xvld xr18, sp, 18*SZXREG
+ xvld xr19, sp, 19*SZXREG
+ xvld xr20, sp, 20*SZXREG
+ xvld xr21, sp, 21*SZXREG
+ xvld xr22, sp, 22*SZXREG
+ xvld xr23, sp, 23*SZXREG
+ xvld xr24, sp, 24*SZXREG
+ xvld xr25, sp, 25*SZXREG
+ xvld xr26, sp, 26*SZXREG
+ xvld xr27, sp, 27*SZXREG
+ xvld xr28, sp, 28*SZXREG
+ xvld xr29, sp, 29*SZXREG
+ xvld xr30, sp, 30*SZXREG
+ xvld xr31, sp, 31*SZXREG
+ ADDI sp, sp, FRAME_SIZE_LASX
+ b .Lfcsr
+
+.Llsx1:
+ andi t0, s0, HWCAP_LOONGARCH_LSX
+ beqz t0, .Lfloat1
+
+ /* Restore 128-bit vector registers. */
+ vld vr0, sp, 0*SZVREG
+ vld vr1, sp, 1*SZVREG
+ vld vr2, sp, 2*SZVREG
+ vld vr3, sp, 3*SZVREG
+ vld vr4, sp, 4*SZVREG
+ vld vr5, sp, 5*SZVREG
+ vld vr6, sp, 6*SZVREG
+ vld vr7, sp, 7*SZVREG
+ vld vr8, sp, 8*SZVREG
+ vld vr9, sp, 9*SZVREG
+ vld vr10, sp, 10*SZVREG