[PATCH v3] LoongArch: Add support for TLS Descriptors

Adhemerval Zanella Netto adhemerval.zanella@linaro.org
Wed Mar 13 18:14:00 GMT 2024



On 13/03/24 14:28, Xi Ruoyao wrote:
> Hi Qinggang and Adhemerval,
> 
> I've built and tested Glibc with this patch,
> https://sourceware.org/pipermail/libc-alpha/2024-March/155297.html, and
> a change to enable the gnu2 tests:
> 
> diff --git a/sysdeps/loongarch/preconfigure
> b/sysdeps/loongarch/preconfigure
> index dfc7ecfd9e..0d1e9ed8df 100644
> --- a/sysdeps/loongarch/preconfigure
> +++ b/sysdeps/loongarch/preconfigure
> @@ -43,6 +43,7 @@ loongarch*)
>  
>  
>      base_machine=loongarch
> +    mtls_descriptor=desc
>      ;;
>  esac
> 
> with a patched GCC
> (https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647597.html, and
> some changes I've suggested in this thread).
> 
> The gnu2 tests have passed.
> 
> There are two failures which seem unrelated though:
> 
> FAIL: misc/tst-preadvwritev64v2
> FAIL: misc/tst-preadvwritev2

Is this loongarch related or does it happen on any architecture?

> 
> It looks like they are caused by the recent addition of RWF_NOAPPEND in
> mainline Linux kernel.
> 
> On Fri, 2024-03-08 at 15:43 +0800, mengqinggang wrote:
>> This is mostly based on AArch64 and RISC-V implementation.
>>
>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>
>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>> all vector registers.
>> ---
>> Changes v2 -> v3: 
>> - Remove _dl_tlsdesc_return_lasx, _dl_tlsdesc_return_lsx. 
>>   Provide only one _dl_tlsdesc_dynamic implementation and check the 
>>   required save/restore of vector register based on hwcap value.
>> - Other details mentained by Adhemerval Zanella Netto, H.J. Lu and caiyinyu.
>>
>> Changes v1 -> v2: 
>> - Fix vr24-vr31, xr24-xr31 typo.
>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic. 
>>
>> v2 link: https://sourceware.org/pipermail/libc-alpha/2024-February/155068.html
>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>>
>>  elf/elf.h                                     |   2 +
>>  sysdeps/loongarch/Makefile                    |   6 +
>>  sysdeps/loongarch/dl-machine.h                |  52 ++-
>>  sysdeps/loongarch/dl-tls.h                    |   9 +-
>>  sysdeps/loongarch/dl-tlsdesc.S                | 417 ++++++++++++++++++
>>  sysdeps/loongarch/dl-tlsdesc.h                |  49 ++
>>  sysdeps/loongarch/linkmap.h                   |   3 +-
>>  sysdeps/loongarch/sys/asm.h                   |   1 +
>>  sysdeps/loongarch/sys/regdef.h                |   1 +
>>  sysdeps/loongarch/tlsdesc.c                   |  39 ++
>>  sysdeps/loongarch/tlsdesc.sym                 |  28 ++
>>  .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>>  12 files changed, 605 insertions(+), 4 deletions(-)
>>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>  create mode 100644 sysdeps/loongarch/tlsdesc.c
>>  create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>
>> diff --git a/elf/elf.h b/elf/elf.h
>> index f2206e5c06..eec24ea049 100644
>> --- a/elf/elf.h
>> +++ b/elf/elf.h
>> @@ -4237,6 +4237,8 @@ enum
>>  #define R_LARCH_TLS_TPREL32	10
>>  #define R_LARCH_TLS_TPREL64	11
>>  #define R_LARCH_IRELATIVE	12
>> +#define R_LARCH_TLS_DESC32	13
>> +#define R_LARCH_TLS_DESC64	14
>>  
>>  /* Reserved for future relocs that the dynamic linker must understand.  */
>>  
>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>> index 43d2f583cd..181389e787 100644
>> --- a/sysdeps/loongarch/Makefile
>> +++ b/sysdeps/loongarch/Makefile
>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>  endif
>>  
>>  ifeq ($(subdir),elf)
>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>  gen-as-const-headers += dl-link.sym
>>  endif
>>  
>> +ifeq ($(subdir),csu)
>> +gen-as-const-headers += tlsdesc.sym
>> +endif
>> +
>> +
>>  # LoongArch's assembler also needs to know about PIC as it changes the
>>  # definition of some assembler macros.
>>  ASFLAGS-.os += $(pic-ccflag)
>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>> index ab81b82d95..0e22337183 100644
>> --- a/sysdeps/loongarch/dl-machine.h
>> +++ b/sysdeps/loongarch/dl-machine.h
>> @@ -25,7 +25,7 @@
>>  #include <entry.h>
>>  #include <elf/elf.h>
>>  #include <sys/asm.h>
>> -#include <dl-tls.h>
>> +#include <dl-tlsdesc.h>
>>  #include <dl-static-tls.h>
>>  #include <dl-machine-rel.h>
>>  
>> @@ -187,6 +187,36 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>        *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>        break;
>>  
>> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>> +      {
>> +	struct tlsdesc volatile *td = (struct tlsdesc volatile *)addr_field;
>> +	if (sym == NULL)
>> +	  {
>> +	    td->arg = (void*)reloc->r_addend;
>> +	    td->entry = _dl_tlsdesc_undefweak;
>> +	  }
>> +	else
>> +	  {
>> +# ifndef SHARED
>> +	    CHECK_STATIC_TLS (map, sym_map);
>> +# else
>> +	    if (!TRY_STATIC_TLS (map, sym_map))
>> +	      {
>> +		td->arg = _dl_make_tlsdesc_dynamic (sym_map,
>> +			      sym->st_value + reloc->r_addend);
>> +		td->entry = _dl_tlsdesc_dynamic;
>> +	      }
>> +	    else
>> +# endif
>> +	      {
>> +		td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>> +			    + reloc->r_addend);
>> +		td->entry = _dl_tlsdesc_return;
>> +	      }
>> +	  }
>> +	break;
>> +      }
>> +
>>      case R_LARCH_COPY:
>>        {
>>  	  if (sym == NULL)
>> @@ -255,6 +285,26 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>        else
>>  	*reloc_addr = map->l_mach.plt;
>>      }
>> +  else if (__glibc_likely (r_type == R_LARCH_TLS_DESC64)
>> +	    || __glibc_likely (r_type == R_LARCH_TLS_DESC32))
>> +    {
>> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>> +      const ElfW (Sym) *sym = &symtab[symndx];
>> +      const struct r_found_version *version = NULL;
>> +
>> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>> +	{
>> +	  const ElfW (Half) *vernum = (const void *)D_PTR (map,
>> +					  l_info[VERSYMIDX (DT_VERSYM)]);
>> +	  version = &map->l_versions[vernum[symndx] & 0x7fff];
>> +	}
>> +
>> +      /* Always initialize TLS descriptors completely, because lazy
>> +	 initialization requires synchronization at every TLS access.  */
>> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>> +			skip_ifunc);
>> +    }
>>    else
>>      _dl_reloc_bad_type (map, r_type, 1);
>>  }
>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>> index 29924b866d..de593c002d 100644
>> --- a/sysdeps/loongarch/dl-tls.h
>> +++ b/sysdeps/loongarch/dl-tls.h
>> @@ -16,6 +16,9 @@
>>     License along with the GNU C Library.  If not, see
>>     <https://www.gnu.org/licenses/>.  */
>>  
>> +#ifndef _DL_TLS_H
>> +#define _DL_TLS_H
>> +
>>  /* Type used for the representation of TLS information in the GOT.  */
>>  typedef struct
>>  {
>> @@ -23,6 +26,8 @@ typedef struct
>>    unsigned long int ti_offset;
>>  } tls_index;
>>  
>> +extern void *__tls_get_addr (tls_index *ti);
>> +
>>  /* The thread pointer points to the first static TLS block.  */
>>  #define TLS_TP_OFFSET 0
>>  
>> @@ -37,10 +42,10 @@ typedef struct
>>  /* Compute the value for a DTPREL reloc.  */
>>  #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>  
>> -extern void *__tls_get_addr (tls_index *ti);
>> -
>>  #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>  #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>  
>>  /* Value used for dtv entries for which the allocation is delayed.  */
>>  #define TLS_DTV_UNALLOCATED ((void *) -1l)
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>> new file mode 100644
>> index 0000000000..34028e988b
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>> @@ -0,0 +1,417 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2024 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include "tlsdesc.h"
>> +
>> +	.text
>> +
>> +	/* Compute the thread pointer offset for symbols in the static
>> +	   TLS block. The offset is the same for all threads.
>> +	   Prototype:
>> +	   _dl_tlsdesc_return (tlsdesc *);  */
>> +	.hidden _dl_tlsdesc_return
>> +	.global	_dl_tlsdesc_return
>> +	.type	_dl_tlsdesc_return,%function
>> +	cfi_startproc
>> +	.align 2
>> +_dl_tlsdesc_return:
>> +	REG_L  a0, a0, 8
>> +	RET
>> +	cfi_endproc
>> +	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
>> +
>> +	/* Handler for undefined weak TLS symbols.
>> +	   Prototype:
>> +	   _dl_tlsdesc_undefweak (tlsdesc *);
>> +
>> +	   The second word of the descriptor contains the addend.
>> +	   Return the addend minus the thread pointer. This ensures
>> +	   that when the caller adds on the thread pointer it gets back
>> +	   the addend.  */
>> +	.hidden _dl_tlsdesc_undefweak
>> +	.global	_dl_tlsdesc_undefweak
>> +	.type	_dl_tlsdesc_undefweak,%function
>> +	cfi_startproc
>> +	.align  2
>> +_dl_tlsdesc_undefweak:
>> +	REG_L	a0, a0, 8
>> +	sub.d	a0, a0, tp
>> +	RET
>> +	cfi_endproc
>> +	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>> +
>> +
>> +#ifdef SHARED
>> +
>> +#define FRAME_SIZE	  (-((-13 * SZREG) & ALMASK))
>> +#define FRAME_SIZE_LSX	  (-((-32 * SZVREG) & ALMASK))
>> +#define FRAME_SIZE_LASX	  (-((-32 * SZXREG) & ALMASK))
>> +#define FRAME_SIZE_FLOAT  (-((-24 * SZFREG) & ALMASK))
>> +
>> +	/* Handler for dynamic TLS symbols.
>> +	   Prototype:
>> +	   _dl_tlsdesc_dynamic (tlsdesc *) ;
>> +
>> +	   The second word of the descriptor points to a
>> +	   tlsdesc_dynamic_arg structure.
>> +
>> +	   Returns the offset between the thread pointer and the
>> +	   object referenced by the argument.
>> +
>> +	   ptrdiff_t
>> +	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> +	   {
>> +	     struct tlsdesc_dynamic_arg *td = tdp->arg;
>> +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>> +	     if (__glibc_likely (td->gen_count <= dtv[0].counter
>> +		&& (dtv[td->tlsinfo.ti_module].pointer.val
>> +		    != TLS_DTV_UNALLOCATED),
>> +		1))
>> +	       return dtv[td->tlsinfo.ti_module].pointer.val
>> +		+ td->tlsinfo.ti_offset
>> +		- __thread_pointer;
>> +
>> +	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> +	   }  */
>> +	.hidden _dl_tlsdesc_dynamic
>> +	.global	_dl_tlsdesc_dynamic
>> +	.type	_dl_tlsdesc_dynamic,%function
>> +	cfi_startproc
>> +	.align 2
>> +_dl_tlsdesc_dynamic:
>> +	/* Save just enough registers to support fast path, if we fall
>> +	   into slow path we will save additional registers.  */
>> +	ADDI	sp, sp,-24
>> +	REG_S	t0, sp, 0
>> +	REG_S	t1, sp, 8
>> +	REG_S	t2, sp, 16
>> +
>> +	REG_L	t0, tp, -SIZE_OF_DTV  /* dtv(t0) = tp + TCBHEAD_DTV dtv start */
>> +	REG_L	a0, a0, TLSDESC_ARG	  /* td(a0) = tdp->arg */
>> +	REG_L	t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
>> +	REG_L	t2, t0, DTV_COUNTER	  /* t2 = dtv[0].counter */
>> +	bltu	t2, t1, .Lslow
>> +
>> +	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
>> +	slli.d	t1, t1, 3 + 1 /* sizeof(dtv_t) == sizeof(void*) * 2 */
>> +	add.d	t1, t1, t0    /* t1 = dtv + ti_module * sizeof(dtv_t) */
>> +	REG_L	t1, t1, 0     /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
>> +	li.d	t2, TLS_DTV_UNALLOCATED
>> +	beq	t1, t2, .Lslow
>> +	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
>> +	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
>> +	add.d	a0, t1, t2
>> +.Lret:
>> +	sub.d	a0, a0, tp
>> +	REG_L	t0, sp, 0
>> +	REG_L	t1, sp, 8
>> +	REG_L	t2, sp, 16
>> +	ADDI	sp, sp, 24
>> +	RET
>> +
>> +.Lslow:
>> +	/* This is the slow path. We need to call __tls_get_addr() which
>> +	   means we need to save and restore all the register that the
>> +	   callee will trash.  */
>> +
>> +	/* Save the remaining registers that we must treat as caller save.  */
>> +	ADDI	sp, sp, -FRAME_SIZE
>> +	REG_S	ra, sp, 0 * SZREG
>> +	REG_S	a1, sp, 1 * SZREG
>> +	REG_S	a2, sp, 2 * SZREG
>> +	REG_S	a3, sp, 3 * SZREG
>> +	REG_S	a4, sp, 4 * SZREG
>> +	REG_S	a5, sp, 5 * SZREG
>> +	REG_S	a6, sp, 6 * SZREG
>> +	REG_S	a7, sp, 7 * SZREG
>> +	REG_S	t4, sp, 8 * SZREG
>> +	REG_S	t5, sp, 9 * SZREG
>> +	REG_S	t6, sp, 10 * SZREG
>> +	REG_S	t7, sp, 11 * SZREG
>> +	REG_S	t8, sp, 12 * SZREG
>> +
>> +#ifndef __loongarch_soft_float
>> +
>> +	/* Save fcsr0 register.
>> +	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
>> +	   of some fields in fcsr0.  */
>> +	ADDI	sp, sp, -SZFCSREG
>> +	movfcsr2gr  t0, fcsr0
>> +	st.w	t0, sp, 0
>> +
>> +	/* Whether support LASX.  */
>> +	la.global   t0, _rtld_global_ro
>> +	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
>> +	andi	t0, t0, HWCAP_LOONGARCH_LASX
>> +	beqz	t0, .Llsx
>> +
>> +	/* Save 256-bit vector registers.
>> +	   FIXME: Without vector ABI, save all vector registers.  */
>> +	ADDI	sp, sp, -FRAME_SIZE_LASX
>> +	xvst	xr0, sp, 0*SZXREG
>> +	xvst	xr1, sp, 1*SZXREG
>> +	xvst	xr2, sp, 2*SZXREG
>> +	xvst	xr3, sp, 3*SZXREG
>> +	xvst	xr4, sp, 4*SZXREG
>> +	xvst	xr5, sp, 5*SZXREG
>> +	xvst	xr6, sp, 6*SZXREG
>> +	xvst	xr7, sp, 7*SZXREG
>> +	xvst	xr8, sp, 8*SZXREG
>> +	xvst	xr9, sp, 9*SZXREG
>> +	xvst	xr10, sp, 10*SZXREG
>> +	xvst	xr11, sp, 11*SZXREG
>> +	xvst	xr12, sp, 12*SZXREG
>> +	xvst	xr13, sp, 13*SZXREG
>> +	xvst	xr14, sp, 14*SZXREG
>> +	xvst	xr15, sp, 15*SZXREG
>> +	xvst	xr16, sp, 16*SZXREG
>> +	xvst	xr17, sp, 17*SZXREG
>> +	xvst	xr18, sp, 18*SZXREG
>> +	xvst	xr19, sp, 19*SZXREG
>> +	xvst	xr20, sp, 20*SZXREG
>> +	xvst	xr21, sp, 21*SZXREG
>> +	xvst	xr22, sp, 22*SZXREG
>> +	xvst	xr23, sp, 23*SZXREG
>> +	xvst	xr24, sp, 24*SZXREG
>> +	xvst	xr25, sp, 25*SZXREG
>> +	xvst	xr26, sp, 26*SZXREG
>> +	xvst	xr27, sp, 27*SZXREG
>> +	xvst	xr28, sp, 28*SZXREG
>> +	xvst	xr29, sp, 29*SZXREG
>> +	xvst	xr30, sp, 30*SZXREG
>> +	xvst	xr31, sp, 31*SZXREG
>> +	b	    .Ltga
>> +
>> +.Llsx:
>> +	/* Whether support LSX.  */
>> +	andi	t0, t0, HWCAP_LOONGARCH_LSX
>> +	beqz	t0, .Lfloat
>> +
>> +	/* Save 128-bit vector registers.  */
>> +	ADDI	sp, sp, -FRAME_SIZE_LSX
>> +	vst	vr0, sp, 0*SZVREG
>> +	vst	vr1, sp, 1*SZVREG
>> +	vst	vr2, sp, 2*SZVREG
>> +	vst	vr3, sp, 3*SZVREG
>> +	vst	vr4, sp, 4*SZVREG
>> +	vst	vr5, sp, 5*SZVREG
>> +	vst	vr6, sp, 6*SZVREG
>> +	vst	vr7, sp, 7*SZVREG
>> +	vst	vr8, sp, 8*SZVREG
>> +	vst	vr9, sp, 9*SZVREG
>> +	vst	vr10, sp, 10*SZVREG
>> +	vst	vr11, sp, 11*SZVREG
>> +	vst	vr12, sp, 12*SZVREG
>> +	vst	vr13, sp, 13*SZVREG
>> +	vst	vr14, sp, 14*SZVREG
>> +	vst	vr15, sp, 15*SZVREG
>> +	vst	vr16, sp, 16*SZVREG
>> +	vst	vr17, sp, 17*SZVREG
>> +	vst	vr18, sp, 18*SZVREG
>> +	vst	vr19, sp, 19*SZVREG
>> +	vst	vr20, sp, 20*SZVREG
>> +	vst	vr21, sp, 21*SZVREG
>> +	vst	vr22, sp, 22*SZVREG
>> +	vst	vr23, sp, 23*SZVREG
>> +	vst	vr24, sp, 24*SZVREG
>> +	vst	vr25, sp, 25*SZVREG
>> +	vst	vr26, sp, 26*SZVREG
>> +	vst	vr27, sp, 27*SZVREG
>> +	vst	vr28, sp, 28*SZVREG
>> +	vst	vr29, sp, 29*SZVREG
>> +	vst	vr30, sp, 30*SZVREG
>> +	vst	vr31, sp, 31*SZVREG
>> +	b	    .Ltga
>> +
>> +.Lfloat:
>> +	/* Save float registers.  */
>> +	ADDI	sp, sp, -FRAME_SIZE_FLOAT
>> +	FREG_S	fa0, sp, 0*SZFREG
>> +	FREG_S	fa1, sp, 1*SZFREG
>> +	FREG_S	fa2, sp, 2*SZFREG
>> +	FREG_S	fa3, sp, 3*SZFREG
>> +	FREG_S	fa4, sp, 4*SZFREG
>> +	FREG_S	fa5, sp, 5*SZFREG
>> +	FREG_S	fa6, sp, 6*SZFREG
>> +	FREG_S	fa7, sp, 7*SZFREG
>> +	FREG_S	ft0, sp, 8*SZFREG
>> +	FREG_S	ft1, sp, 9*SZFREG
>> +	FREG_S	ft2, sp, 10*SZFREG
>> +	FREG_S	ft3, sp, 11*SZFREG
>> +	FREG_S	ft4, sp, 12*SZFREG
>> +	FREG_S	ft5, sp, 13*SZFREG
>> +	FREG_S	ft6, sp, 14*SZFREG
>> +	FREG_S	ft7, sp, 15*SZFREG
>> +	FREG_S	ft8, sp, 16*SZFREG
>> +	FREG_S	ft9, sp, 17*SZFREG
>> +	FREG_S	ft10, sp, 18*SZFREG
>> +	FREG_S	ft11, sp, 19*SZFREG
>> +	FREG_S	ft12, sp, 20*SZFREG
>> +	FREG_S	ft13, sp, 21*SZFREG
>> +	FREG_S	ft14, sp, 22*SZFREG
>> +	FREG_S	ft15, sp, 23*SZFREG
>> +
>> +#endif /* #ifndef __loongarch_soft_float */
>> +
>> +.Ltga:
>> +	bl	__tls_get_addr
>> +	ADDI	a0, a0, -TLS_DTV_OFFSET
>> +
>> +#ifndef __loongarch_soft_float
>> +
>> +	la.global   t0, _rtld_global_ro
>> +	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
>> +	andi	t0, t0, HWCAP_LOONGARCH_LASX
>> +	beqz	t0, .Llsx1
>> +
>> +	/* Restore 256-bit vector registers.  */
>> +	xvld	xr0, sp, 0*SZXREG
>> +	xvld	xr1, sp, 1*SZXREG
>> +	xvld	xr2, sp, 2*SZXREG
>> +	xvld	xr3, sp, 3*SZXREG
>> +	xvld	xr4, sp, 4*SZXREG
>> +	xvld	xr5, sp, 5*SZXREG
>> +	xvld	xr6, sp, 6*SZXREG
>> +	xvld	xr7, sp, 7*SZXREG
>> +	xvld	xr8, sp, 8*SZXREG
>> +	xvld	xr9, sp, 9*SZXREG
>> +	xvld	xr10, sp, 10*SZXREG
>> +	xvld	xr11, sp, 11*SZXREG
>> +	xvld	xr12, sp, 12*SZXREG
>> +	xvld	xr13, sp, 13*SZXREG
>> +	xvld	xr14, sp, 14*SZXREG
>> +	xvld	xr15, sp, 15*SZXREG
>> +	xvld	xr16, sp, 16*SZXREG
>> +	xvld	xr17, sp, 17*SZXREG
>> +	xvld	xr18, sp, 18*SZXREG
>> +	xvld	xr19, sp, 19*SZXREG
>> +	xvld	xr20, sp, 20*SZXREG
>> +	xvld	xr21, sp, 21*SZXREG
>> +	xvld	xr22, sp, 22*SZXREG
>> +	xvld	xr23, sp, 23*SZXREG
>> +	xvld	xr24, sp, 24*SZXREG
>> +	xvld	xr25, sp, 25*SZXREG
>> +	xvld	xr26, sp, 26*SZXREG
>> +	xvld	xr27, sp, 27*SZXREG
>> +	xvld	xr28, sp, 28*SZXREG
>> +	xvld	xr29, sp, 29*SZXREG
>> +	xvld	xr30, sp, 30*SZXREG
>> +	xvld	xr31, sp, 31*SZXREG
>> +	ADDI	sp, sp, FRAME_SIZE_LASX
>> +	b .Lfcsr
>> +
>> +.Llsx1:
>> +	andi	t0, s0, HWCAP_LOONGARCH_LSX
>> +	beqz	t0, .Lfloat1
>> +
>> +	/* Restore 128-bit vector registers.  */
>> +	vld	vr0, sp, 0*SZVREG
>> +	vld	vr1, sp, 1*SZVREG
>> +	vld	vr2, sp, 2*SZVREG
>> +	vld	vr3, sp, 3*SZVREG
>> +	vld	vr4, sp, 4*SZVREG
>> +	vld	vr5, sp, 5*SZVREG
>> +	vld	vr6, sp, 6*SZVREG
>> +	vld	vr7, sp, 7*SZVREG
>> +	vld	vr8, sp, 8*SZVREG
>> +	vld	vr9, sp, 9*SZVREG
>> +	vld	vr10, sp, 10*SZVREG
>> +	vld	vr11, sp, 11*SZVREG
>> +	vld	vr12, sp, 12*SZVREG
>> +	vld	vr13, sp, 13*SZVREG
>> +	vld	vr14, sp, 14*SZVREG
>> +	vld	vr15, sp, 15*SZVREG
>> +	vld	vr16, sp, 16*SZVREG
>> +	vld	vr17, sp, 17*SZVREG
>> +	vld	vr18, sp, 18*SZVREG
>> +	vld	vr19, sp, 19*SZVREG
>> +	vld	vr20, sp, 20*SZVREG
>> +	vld	vr21, sp, 21*SZVREG
>> +	vld	vr22, sp, 22*SZVREG
>> +	vld	vr23, sp, 23*SZVREG
>> +	vld	vr24, sp, 24*SZVREG
>> +	vld	vr25, sp, 25*SZVREG
>> +	vld	vr26, sp, 26*SZVREG
>> +	vld	vr27, sp, 27*SZVREG
>> +	vld	vr28, sp, 28*SZVREG
>> +	vld	vr29, sp, 29*SZVREG
>> +	vld	vr30, sp, 30*SZVREG
>> +	vld	vr31, sp, 31*SZVREG
>> +	ADDI	sp, sp, FRAME_SIZE_LSX
>> +	b	    .Lfcsr
>> +
>> +.Lfloat1:
>> +	/* Restore float registers.  */
>> +	FREG_L	fa0, sp, 0*SZFREG
>> +	FREG_L	fa1, sp, 1*SZFREG
>> +	FREG_L	fa2, sp, 2*SZFREG
>> +	FREG_L	fa3, sp, 3*SZFREG
>> +	FREG_L	fa4, sp, 4*SZFREG
>> +	FREG_L	fa5, sp, 5*SZFREG
>> +	FREG_L	fa6, sp, 6*SZFREG
>> +	FREG_L	fa7, sp, 7*SZFREG
>> +	FREG_L	ft0, sp, 8*SZFREG
>> +	FREG_L	ft1, sp, 9*SZFREG
>> +	FREG_L	ft2, sp, 10*SZFREG
>> +	FREG_L	ft3, sp, 11*SZFREG
>> +	FREG_L	ft4, sp, 12*SZFREG
>> +	FREG_L	ft5, sp, 13*SZFREG
>> +	FREG_L	ft6, sp, 14*SZFREG
>> +	FREG_L	ft7, sp, 15*SZFREG
>> +	FREG_L	ft8, sp, 16*SZFREG
>> +	FREG_L	ft9, sp, 17*SZFREG
>> +	FREG_L	ft10, sp, 18*SZFREG
>> +	FREG_L	ft11, sp, 19*SZFREG
>> +	FREG_L	ft12, sp, 20*SZFREG
>> +	FREG_L	ft13, sp, 21*SZFREG
>> +	FREG_L	ft14, sp, 22*SZFREG
>> +	FREG_L	ft15, sp, 23*SZFREG
>> +	ADDI	sp, sp, FRAME_SIZE_FLOAT
>> +
>> +.Lfcsr:
>> +	/* Restore fcsr0 register.  */
>> +	ld.w	t0, sp, 0
>> +	movgr2fcsr  fcsr0, t0
>> +	ADDI	sp, sp, SZFCSREG
>> +
>> +#endif /* #ifndef __loongarch_soft_float */
>> +
>> +	REG_L	ra, sp, 0
>> +	REG_L	a1, sp, 1 * 8
>> +	REG_L	a2, sp, 2 * 8
>> +	REG_L	a3, sp, 3 * 8
>> +	REG_L	a4, sp, 4 * 8
>> +	REG_L	a5, sp, 5 * 8
>> +	REG_L	a6, sp, 6 * 8
>> +	REG_L	a7, sp, 7 * 8
>> +	REG_L	t4, sp, 8 * 8
>> +	REG_L	t5, sp, 9 * 8
>> +	REG_L	t6, sp, 10 * 8
>> +	REG_L	t7, sp, 11 * 8
>> +	REG_L	t8, sp, 12 * 8
>> +	ADDI	sp, sp, FRAME_SIZE
>> +
>> +	b	.Lret
>> +	cfi_endproc
>> +	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> +
>> +#endif /* #ifdef SHARED */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>> new file mode 100644
>> index 0000000000..7444dac520
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>> @@ -0,0 +1,49 @@
>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#ifndef _DL_TLSDESC_H
>> +#define _DL_TLSDESC_H
>> +
>> +#include <dl-tls.h>
>> +
>> +/* Type used to represent a TLS descriptor in the GOT.  */
>> +struct tlsdesc
>> +{
>> +  ptrdiff_t (*entry) (struct tlsdesc *);
>> +  void *arg;
>> +};
>> +
>> +/* Type used as the argument in a TLS descriptor for a symbol that
>> +   needs dynamic TLS offsets.  */
>> +struct tlsdesc_dynamic_arg
>> +{
>> +  tls_index tlsinfo;
>> +  size_t gen_count;
>> +};
>> +
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>> +
>> +#ifdef SHARED
>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>> +#endif
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>> index 4d8737ee7f..833dc9eb82 100644
>> --- a/sysdeps/loongarch/linkmap.h
>> +++ b/sysdeps/loongarch/linkmap.h
>> @@ -18,5 +18,6 @@
>>  
>>  struct link_map_machine
>>  {
>> -  ElfW (Addr) plt; /* Address of .plt.  */
>> +  ElfW (Addr) plt;	/* Address of .plt.  */
>> +  void *tlsdesc_table;	/* Address of TLS descriptor hash table.  */
>>  };
>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>> index 51521a7eb4..23c1d12914 100644
>> --- a/sysdeps/loongarch/sys/asm.h
>> +++ b/sysdeps/loongarch/sys/asm.h
>> @@ -25,6 +25,7 @@
>>  /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>>  #define SZREG 8
>>  #define SZFREG 8
>> +#define SZFCSREG 4
>>  #define SZVREG 16
>>  #define SZXREG 32
>>  #define REG_L ld.d
>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>> index f61ee25b25..80ce3e9c00 100644
>> --- a/sysdeps/loongarch/sys/regdef.h
>> +++ b/sysdeps/loongarch/sys/regdef.h
>> @@ -97,6 +97,7 @@
>>  #define fcc5 $fcc5
>>  #define fcc6 $fcc6
>>  #define fcc7 $fcc7
>> +#define fcsr0 $fcsr0
>>  
>>  #define vr0 $vr0
>>  #define vr1 $vr1
>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>> new file mode 100644
>> index 0000000000..4a3d5d22ef
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.c
>> @@ -0,0 +1,39 @@
>> +/* Manage TLS descriptors.  LoongArch64 version.
>> +
>> +   Copyright (C) 2011-2024 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <ldsodefs.h>
>> +#include <tls.h>
>> +#include <dl-tlsdesc.h>
>> +#include <dl-unmap-segments.h>
>> +#include <tlsdeschtab.h>
>> +
>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>> +   if there is one.  */
>> +
>> +void
>> +_dl_unmap (struct link_map *map)
>> +{
>> +  _dl_unmap_segments (map);
>> +
>> +#ifdef SHARED
>> +  if (map->l_mach.tlsdesc_table)
>> +    htab_delete (map->l_mach.tlsdesc_table);
>> +#endif
>> +}
>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>> new file mode 100644
>> index 0000000000..a0b945e449
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.sym
>> @@ -0,0 +1,28 @@
>> +#include <stddef.h>
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include <link.h>
>> +#include <dl-tlsdesc.h>
>> +
>> +#define SHARED 1
>> +
>> +#include <ldsodefs.h>
>> +
>> +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
>> +
>> +--
>> +
>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>> +
>> +TLSDESC_ARG		offsetof(struct tlsdesc, arg)
>> +TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
>> +TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>> +TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>> +TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
>> +DTV_COUNTER		offsetof(dtv_t, counter)
>> +TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
>> +TLS_DTV_OFFSET		TLS_DTV_OFFSET
>> +SIZE_OF_DTV		sizeof(tcbhead_t)
>> +GLRO_DL_HWCAP_OFFSET    GLRO_offsetof (dl_hwcap)
>> +HWCAP_LOONGARCH_LSX	HWCAP_LOONGARCH_LSX
>> +HWCAP_LOONGARCH_LASX	HWCAP_LOONGARCH_LASX
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> index 547b1c1b7f..ec32e6d13f 100644
>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> @@ -5,3 +5,5 @@ libc.so: calloc
>>  libc.so: free
>>  libc.so: malloc
>>  libc.so: realloc
>> +# The dynamic loader needs __tls_get_addr for TLS.
>> +ld.so: __tls_get_addr
> 


More information about the Libc-alpha mailing list