[PATCH v2] LoongArch: Add support for TLS Descriptors

Adhemerval Zanella Netto adhemerval.zanella@linaro.org
Fri Mar 8 14:10:18 GMT 2024



On 08/03/24 04:53, mengqinggang wrote:
> Thanks a lot for the review! A new v3 version patch has been sent.
> https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
> 
> 
> And some reply below.
> 
> 

>From the gcc enablement patch it seems that you are using the aarch64
ABI naming, -mtls-dialect={desc,trad}.  So I would suggest to check if
there is no regression with my patch to enable TLS descriptor for 
-mtls=desc [1].

You might also extend the testing to check for possible vector register
wrong save/restore, as I did to check for arm32 ones [2] (check 
the sysdeps/arm/tst-gnu2-tls2.h).

Also, I think this patch should be pushed only after gcc enablement
is installed.


[1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/
[2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/

> 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>>
>> On 28/02/24 22:43, mengqinggang wrote:
>>> This is mostly based on AArch64 and RISC-V implementation.
>>>
>>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>>
>>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>>> all vector registers.
>>> ---
>>> Changes v1 -> v2:
>>> - Fix vr24-vr31, xr24-xr31 typo.
>>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>>
>>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>> Patch looks ok, some comments below.
>>
>>>   elf/elf.h                                     |   2 +
>>>   sysdeps/loongarch/Makefile                    |   6 +
>>>   sysdeps/loongarch/dl-link.sym                 |   1 +
>>>   sysdeps/loongarch/dl-machine.h                |  60 ++-
>>>   sysdeps/loongarch/dl-tls.h                    |   9 +-
>>>   sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>>>   sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>>>   sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>>>   sysdeps/loongarch/linkmap.h                   |   1 +
>>>   sysdeps/loongarch/sys/asm.h                   |   1 +
>>>   sysdeps/loongarch/sys/regdef.h                |   1 +
>>>   sysdeps/loongarch/tlsdesc.c                   |  39 ++
>>>   sysdeps/loongarch/tlsdesc.sym                 |  19 +
>>>   .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>>>   14 files changed, 625 insertions(+), 3 deletions(-)
>>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>>   create mode 100644 sysdeps/loongarch/tlsdesc.c
>>>   create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>>
>>> diff --git a/elf/elf.h b/elf/elf.h
>>> index f2206e5c06..eec24ea049 100644
>>> --- a/elf/elf.h
>>> +++ b/elf/elf.h
>>> @@ -4237,6 +4237,8 @@ enum
>>>   #define R_LARCH_TLS_TPREL32    10
>>>   #define R_LARCH_TLS_TPREL64    11
>>>   #define R_LARCH_IRELATIVE    12
>>> +#define R_LARCH_TLS_DESC32    13
>>> +#define R_LARCH_TLS_DESC64    14
>>>     /* Reserved for future relocs that the dynamic linker must understand.  */
>>>   diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>>> index 43d2f583cd..181389e787 100644
>>> --- a/sysdeps/loongarch/Makefile
>>> +++ b/sysdeps/loongarch/Makefile
>>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>>   endif
>>>     ifeq ($(subdir),elf)
>>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>>   gen-as-const-headers += dl-link.sym
>>>   endif
>>>   +ifeq ($(subdir),csu)
>>> +gen-as-const-headers += tlsdesc.sym
>>> +endif
>>> +
>>> +
>>>   # LoongArch's assembler also needs to know about PIC as it changes the
>>>   # definition of some assembler macros.
>>>   ASFLAGS-.os += $(pic-ccflag)
>>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>>> index b534968e30..fd81ef37d5 100644
>>> --- a/sysdeps/loongarch/dl-link.sym
>>> +++ b/sysdeps/loongarch/dl-link.sym
>>> @@ -1,6 +1,7 @@
>>>   #include <stddef.h>
>>>   #include <sysdep.h>
>>>   #include <link.h>
>>> +#include <dl-tlsdesc.h>
>>>     DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>>>   DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
>>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>>> index ab81b82d95..8ca6c224f6 100644
>>> --- a/sysdeps/loongarch/dl-machine.h
>>> +++ b/sysdeps/loongarch/dl-machine.h
>>> @@ -25,7 +25,7 @@
>>>   #include <entry.h>
>>>   #include <elf/elf.h>
>>>   #include <sys/asm.h>
>>> -#include <dl-tls.h>
>>> +#include <dl-tlsdesc.h>
>>>   #include <dl-static-tls.h>
>>>   #include <dl-machine-rel.h>
>>>   @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>>         *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>>         break;
>>>   +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>>> +      {
>>> +    struct tlsdesc volatile *td =
>>> +        (struct tlsdesc volatile *)addr_field;
>>> +    if (! sym)
>>> +      {
>>> +        td->arg = (void*)reloc->r_addend;
>>> +        td->entry = _dl_tlsdesc_undefweak;
>>> +      }
>>> +    else
>>> +      {
>>> +# ifndef SHARED
>>> +        CHECK_STATIC_TLS (map, sym_map);
>>> +# else
>>> +        if (!TRY_STATIC_TLS (map, sym_map))
>>> +          {
>>> +        td->arg = _dl_make_tlsdesc_dynamic
>>> +          (sym_map, sym->st_value + reloc->r_addend);
>>> +# if !defined __loongarch_soft_float
>>> +        if (SUPPORT_LASX)
>>> +          td->entry = _dl_tlsdesc_dynamic_lasx;
>>> +        else
>>> +        if (SUPPORT_LSX)
>>> +          td->entry = _dl_tlsdesc_dynamic_lsx;
>>> +        else
>>> +# endif
>>> +          td->entry = _dl_tlsdesc_dynamic;
>>> +          }
>>> +        else
>>> +# endif
>>> +          {
>>> +        td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>>> +                + reloc->r_addend);
>>> +        td->entry = _dl_tlsdesc_return;
>>> +          }
>>> +      }
>>> +    break;
>>> +      }
>>> +
>>>       case R_LARCH_COPY:
>>>         {
>>>         if (sym == NULL)
>>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>>         else
>>>       *reloc_addr = map->l_mach.plt;
>>>       }
>>> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>> Use __glibc_likely here.
>>
>>> +    {
>>> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>>> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>>> +      const ElfW (Sym) *sym = &symtab[symndx];
>>> +      const struct r_found_version *version = NULL;
>>> +
>>> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>>> +    {
>>> +      const ElfW (Half) *vernum =
>>> +        (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>>> +      version = &map->l_versions[vernum[symndx] & 0x7fff];
>>> +    }
>>> +
>>> +      /* Always initialize TLS descriptors completely, because lazy
>>> +     initialization requires synchronization at every TLS access.  */
>>> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>>> +            skip_ifunc);
>>> +    }
>>>     else
>>>       _dl_reloc_bad_type (map, r_type, 1);
>>>   }
>>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>>> index 29924b866d..de593c002d 100644
>>> --- a/sysdeps/loongarch/dl-tls.h
>>> +++ b/sysdeps/loongarch/dl-tls.h
>>> @@ -16,6 +16,9 @@
>>>      License along with the GNU C Library.  If not, see
>>>      <https://www.gnu.org/licenses/>.  */
>>>   +#ifndef _DL_TLS_H
>>> +#define _DL_TLS_H
>>> +
>>>   /* Type used for the representation of TLS information in the GOT.  */
>>>   typedef struct
>>>   {
>>> @@ -23,6 +26,8 @@ typedef struct
>>>     unsigned long int ti_offset;
>>>   } tls_index;
>>>   +extern void *__tls_get_addr (tls_index *ti);
>>> +
>>>   /* The thread pointer points to the first static TLS block.  */
>>>   #define TLS_TP_OFFSET 0
>>>   @@ -37,10 +42,10 @@ typedef struct
>>>   /* Compute the value for a DTPREL reloc.  */
>>>   #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>>   -extern void *__tls_get_addr (tls_index *ti);
>>> -
>> Why move the function prototype?
> 
> 
> Maybe just want to take it out of a bunch of macros.
> 
> 
>>
>>>   #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>>   #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>>     /* Value used for dtv entries for which the allocation is delayed.  */
>>>   #define TLS_DTV_UNALLOCATED ((void *) -1l)
>>> +
>>> +#endif
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> new file mode 100644
>>> index 0000000000..0d8c9bb991
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> @@ -0,0 +1,341 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> +   LoongArch version.
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024.
>>
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#ifdef USE_LASX
>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>>> +#elif defined USE_LSX
>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>>> +#elif !defined __loongarch_soft_float
>>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>>> +#else
>>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>>> +#endif
>> I don't have a strong opinion, but another option that might be simpler it
>> to provide only only one _dl_tlsdesc_dynamic implementation and check the
>> required save/restore of vector register based on hwcap value.
> 
> 
> The v3 patch provides only one  _dl_tlsdesc_dynamic implementation.
> 
> 
>>> +
>>> +#ifdef SHARED
>>> +    /* Handler for dynamic TLS symbols.
>>> +       Prototype:
>>> +       _dl_tlsdesc_dynamic (tlsdesc *) ;
>>> +
>>> +       The second word of the descriptor points to a
>>> +       tlsdesc_dynamic_arg structure.
>>> +
>>> +       Returns the offset between the thread pointer and the
>>> +       object referenced by the argument.
>>> +
>>> +       ptrdiff_t
>>> +       __attribute__ ((__regparm__ (1)))
>> Does this attribute really make sense for loongarch?
> 
> 
> This line has been deleted.
> 
> 
>>
>>> +       _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>> +       {
>>> +         struct tlsdesc_dynamic_arg *td = tdp->arg;
>>> +         dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>>> +         if (__builtin_expect (td->gen_count <= dtv[0].counter
>> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>>
>>> +        && (dtv[td->tlsinfo.ti_module].pointer.val
>>> +            != TLS_DTV_UNALLOCATED),
>>> +        1))
>>> +           return dtv[td->tlsinfo.ti_module].pointer.val
>>> +        + td->tlsinfo.ti_offset
>>> +        - __thread_pointer;
>>> +
>>> +         return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>> +       }
>>> +     */
>>> +    .hidden _dl_tlsdesc_dynamic
>>> +    .global    _dl_tlsdesc_dynamic
>>> +    .type    _dl_tlsdesc_dynamic,%function
>>> +    cfi_startproc
>>> +    .align 2
>>> +_dl_tlsdesc_dynamic:
>>> +    /* Save just enough registers to support fast path, if we fall
>>> +       into slow path we will save additional registers.  */
>>> +    ADDI    sp, sp,-24
>>> +    REG_S    t0, sp, 0
>>> +    REG_S    t1, sp, 8
>>> +    REG_S    t2, sp, 16
>>> +
>>> +    REG_L    t0, tp, -SIZE_OF_DTV      # dtv(t0) = tp + TCBHEAD_DTV dtv start
>>> +    REG_L    a0, a0, TLSDESC_ARG      # td(a0) = tdp->arg
>>> +    REG_L    t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>>> +    REG_L    t2, t0, DTV_COUNTER      # t2 = dtv[0].counter
>>> +    bltu    t2, t1, Lslow
>>> +
>>> +    REG_L    t1, a0, TLSDESC_MODID      # t1 = td->tlsinfo.ti_module
>>> +    slli.d    t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>>> +    add.d    t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
>>> +    REG_L    t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>>> +    li.d    t2, TLS_DTV_UNALLOCATED
>>> +    beq    t1, t2, Lslow
>>> +    REG_L    t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>>> +    # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>>> +    add.d    a0, t1, t2
>>> +Lret:
>>> +    sub.d    a0, a0, tp
>>> +    REG_L    t0, sp, 0
>>> +    REG_L    t1, sp, 8
>>> +    REG_L    t2, sp, 16
>>> +    ADDI    sp, sp, 24
>>> +    RET
>>> +
>>> +Lslow:
>>> +    /* This is the slow path. We need to call __tls_get_addr() which
>>> +       means we need to save and restore all the register that the
>>> +       callee will trash.  */
>>> +
>>> +    /* Save the remaining registers that we must treat as caller save.  */
>>> +    ADDI    sp, sp, -FRAME_SIZE
>>> +    REG_S    ra, sp, 0 * SZREG
>>> +    REG_S    a1, sp, 1 * SZREG
>>> +    REG_S    a2, sp, 2 * SZREG
>>> +    REG_S    a3, sp, 3 * SZREG
>>> +    REG_S    a4, sp, 4 * SZREG
>>> +    REG_S    a5, sp, 5 * SZREG
>>> +    REG_S    a6, sp, 6 * SZREG
>>> +    REG_S    a7, sp, 7 * SZREG
>>> +    REG_S    t4, sp, 8 * SZREG
>>> +    REG_S    t5, sp, 9 * SZREG
>>> +    REG_S    t6, sp, 10 * SZREG
>>> +    REG_S    t7, sp, 11 * SZREG
>>> +    REG_S    t8, sp, 12 * SZREG
>>> +
>>> +#ifdef USE_LASX
>>> +    xvst    xr0, sp, 13*SZREG + 0*SZXREG
>>> +    xvst    xr1, sp, 13*SZREG + 1*SZXREG
>>> +    xvst    xr2, sp, 13*SZREG + 2*SZXREG
>>> +    xvst    xr3, sp, 13*SZREG + 3*SZXREG
>>> +    xvst    xr4, sp, 13*SZREG + 4*SZXREG
>>> +    xvst    xr5, sp, 13*SZREG + 5*SZXREG
>>> +    xvst    xr6, sp, 13*SZREG + 6*SZXREG
>>> +    xvst    xr7, sp, 13*SZREG + 7*SZXREG
>>> +    xvst    xr8, sp, 13*SZREG + 8*SZXREG
>>> +    xvst    xr9, sp, 13*SZREG + 9*SZXREG
>>> +    xvst    xr10, sp, 13*SZREG + 10*SZXREG
>>> +    xvst    xr11, sp, 13*SZREG + 11*SZXREG
>>> +    xvst    xr12, sp, 13*SZREG + 12*SZXREG
>>> +    xvst    xr13, sp, 13*SZREG + 13*SZXREG
>>> +    xvst    xr14, sp, 13*SZREG + 14*SZXREG
>>> +    xvst    xr15, sp, 13*SZREG + 15*SZXREG
>>> +    xvst    xr16, sp, 13*SZREG + 16*SZXREG
>>> +    xvst    xr17, sp, 13*SZREG + 17*SZXREG
>>> +    xvst    xr18, sp, 13*SZREG + 18*SZXREG
>>> +    xvst    xr19, sp, 13*SZREG + 19*SZXREG
>>> +    xvst    xr20, sp, 13*SZREG + 20*SZXREG
>>> +    xvst    xr21, sp, 13*SZREG + 21*SZXREG
>>> +    xvst    xr22, sp, 13*SZREG + 22*SZXREG
>>> +    xvst    xr23, sp, 13*SZREG + 23*SZXREG
>>> +    xvst    xr24, sp, 13*SZREG + 24*SZXREG
>>> +    xvst    xr25, sp, 13*SZREG + 25*SZXREG
>>> +    xvst    xr26, sp, 13*SZREG + 26*SZXREG
>>> +    xvst    xr27, sp, 13*SZREG + 27*SZXREG
>>> +    xvst    xr28, sp, 13*SZREG + 28*SZXREG
>>> +    xvst    xr29, sp, 13*SZREG + 29*SZXREG
>>> +    xvst    xr30, sp, 13*SZREG + 30*SZXREG
>>> +    xvst    xr31, sp, 13*SZREG + 31*SZXREG
>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> +    # some fields in fcsr0
>>> +    movfcsr2gr  t0, fcsr0
>>> +    REG_S        t0, sp, 32*SZXREG
>>> +#elif defined USE_LSX
>>> +    vst    vr0, sp, 13*SZREG + 0*SZVREG
>>> +    vst    vr1, sp, 13*SZREG + 1*SZVREG
>>> +    vst    vr2, sp, 13*SZREG + 2*SZVREG
>>> +    vst    vr3, sp, 13*SZREG + 3*SZVREG
>>> +    vst    vr4, sp, 13*SZREG + 4*SZVREG
>>> +    vst    vr5, sp, 13*SZREG + 5*SZVREG
>>> +    vst    vr6, sp, 13*SZREG + 6*SZVREG
>>> +    vst    vr7, sp, 13*SZREG + 7*SZVREG
>>> +    vst    vr8, sp, 13*SZREG + 8*SZVREG
>>> +    vst    vr9, sp, 13*SZREG + 9*SZVREG
>>> +    vst    vr10, sp, 13*SZREG + 10*SZVREG
>>> +    vst    vr11, sp, 13*SZREG + 11*SZVREG
>>> +    vst    vr12, sp, 13*SZREG + 12*SZVREG
>>> +    vst    vr13, sp, 13*SZREG + 13*SZVREG
>>> +    vst    vr14, sp, 13*SZREG + 14*SZVREG
>>> +    vst    vr15, sp, 13*SZREG + 15*SZVREG
>>> +    vst    vr16, sp, 13*SZREG + 16*SZVREG
>>> +    vst    vr17, sp, 13*SZREG + 17*SZVREG
>>> +    vst    vr18, sp, 13*SZREG + 18*SZVREG
>>> +    vst    vr19, sp, 13*SZREG + 19*SZVREG
>>> +    vst    vr20, sp, 13*SZREG + 20*SZVREG
>>> +    vst    vr21, sp, 13*SZREG + 21*SZVREG
>>> +    vst    vr22, sp, 13*SZREG + 22*SZVREG
>>> +    vst    vr23, sp, 13*SZREG + 23*SZVREG
>>> +    vst    vr24, sp, 13*SZREG + 24*SZVREG
>>> +    vst    vr25, sp, 13*SZREG + 25*SZVREG
>>> +    vst    vr26, sp, 13*SZREG + 26*SZVREG
>>> +    vst    vr27, sp, 13*SZREG + 27*SZVREG
>>> +    vst    vr28, sp, 13*SZREG + 28*SZVREG
>>> +    vst    vr29, sp, 13*SZREG + 29*SZVREG
>>> +    vst    vr30, sp, 13*SZREG + 30*SZVREG
>>> +    vst    vr31, sp, 13*SZREG + 31*SZVREG
>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> +    # some fields in fcsr0
>>> +    movfcsr2gr  t0, fcsr0
>>> +    REG_S        t0, sp, 32*SZVREG
>>> +#elif !defined __loongarch_soft_float
>>> +    FREG_S    fa0, sp, 13*SZREG + 0*SZFREG
>>> +    FREG_S    fa1, sp, 13*SZREG + 1*SZFREG
>>> +    FREG_S    fa2, sp, 13*SZREG + 2*SZFREG
>>> +    FREG_S    fa3, sp, 13*SZREG + 3*SZFREG
>>> +    FREG_S    fa4, sp, 13*SZREG + 4*SZFREG
>>> +    FREG_S    fa5, sp, 13*SZREG + 5*SZFREG
>>> +    FREG_S    fa6, sp, 13*SZREG + 6*SZFREG
>>> +    FREG_S    fa7, sp, 13*SZREG + 7*SZFREG
>>> +    FREG_S    ft0, sp, 13*SZREG + 8*SZFREG
>>> +    FREG_S    ft1, sp, 13*SZREG + 9*SZFREG
>>> +    FREG_S    ft2, sp, 13*SZREG + 10*SZFREG
>>> +    FREG_S    ft3, sp, 13*SZREG + 11*SZFREG
>>> +    FREG_S    ft4, sp, 13*SZREG + 12*SZFREG
>>> +    FREG_S    ft5, sp, 13*SZREG + 13*SZFREG
>>> +    FREG_S    ft6, sp, 13*SZREG + 14*SZFREG
>>> +    FREG_S    ft7, sp, 13*SZREG + 15*SZFREG
>>> +    FREG_S    ft8, sp, 13*SZREG + 16*SZFREG
>>> +    FREG_S    ft9, sp, 13*SZREG + 17*SZFREG
>>> +    FREG_S    ft10, sp, 13*SZREG + 18*SZFREG
>>> +    FREG_S    ft11, sp, 13*SZREG + 19*SZFREG
>>> +    FREG_S    ft12, sp, 13*SZREG + 20*SZFREG
>>> +    FREG_S    ft13, sp, 13*SZREG + 21*SZFREG
>>> +    FREG_S    ft14, sp, 13*SZREG + 22*SZFREG
>>> +    FREG_S    ft15, sp, 13*SZREG + 23*SZFREG
>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> +    # some fields in fcsr0
>>> +    movfcsr2gr  t0, fcsr0
>>> +    REG_S        t0, sp, 24*SZFREG
>>> +#endif /* #ifdef USE_LASX  */
>>> +
>>> +    bl    __tls_get_addr
>>> +    ADDI    a0, a0, -TLS_DTV_OFFSET
>>> +
>>> +    REG_L    ra, sp, 0
>>> +    REG_L    a1, sp, 1 * 8
>>> +    REG_L    a2, sp, 2 * 8
>>> +    REG_L    a3, sp, 3 * 8
>>> +    REG_L    a4, sp, 4 * 8
>>> +    REG_L    a5, sp, 5 * 8
>>> +    REG_L    a6, sp, 6 * 8
>>> +    REG_L    a7, sp, 7 * 8
>>> +    REG_L    t4, sp, 8 * 8
>>> +    REG_L    t5, sp, 9 * 8
>>> +    REG_L    t6, sp, 10 * 8
>>> +    REG_L    t7, sp, 11 * 8
>>> +    REG_L    t8, sp, 12 * 8
>>> +
>>> +#ifdef USE_LASX
>>> +    xvld    xr0, sp, 13*SZREG + 0*SZXREG
>>> +    xvld    xr1, sp, 13*SZREG + 1*SZXREG
>>> +    xvld    xr2, sp, 13*SZREG + 2*SZXREG
>>> +    xvld    xr3, sp, 13*SZREG + 3*SZXREG
>>> +    xvld    xr4, sp, 13*SZREG + 4*SZXREG
>>> +    xvld    xr5, sp, 13*SZREG + 5*SZXREG
>>> +    xvld    xr6, sp, 13*SZREG + 6*SZXREG
>>> +    xvld    xr7, sp, 13*SZREG + 7*SZXREG
>>> +    xvld    xr8, sp, 13*SZREG + 8*SZXREG
>>> +    xvld    xr9, sp, 13*SZREG + 9*SZXREG
>>> +    xvld    xr10, sp, 13*SZREG + 10*SZXREG
>>> +    xvld    xr11, sp, 13*SZREG + 11*SZXREG
>>> +    xvld    xr12, sp, 13*SZREG + 12*SZXREG
>>> +    xvld    xr13, sp, 13*SZREG + 13*SZXREG
>>> +    xvld    xr14, sp, 13*SZREG + 14*SZXREG
>>> +    xvld    xr15, sp, 13*SZREG + 15*SZXREG
>>> +    xvld    xr16, sp, 13*SZREG + 16*SZXREG
>>> +    xvld    xr17, sp, 13*SZREG + 17*SZXREG
>>> +    xvld    xr18, sp, 13*SZREG + 18*SZXREG
>>> +    xvld    xr19, sp, 13*SZREG + 19*SZXREG
>>> +    xvld    xr20, sp, 13*SZREG + 20*SZXREG
>>> +    xvld    xr21, sp, 13*SZREG + 21*SZXREG
>>> +    xvld    xr22, sp, 13*SZREG + 22*SZXREG
>>> +    xvld    xr23, sp, 13*SZREG + 23*SZXREG
>>> +    xvld    xr24, sp, 13*SZREG + 24*SZXREG
>>> +    xvld    xr25, sp, 13*SZREG + 25*SZXREG
>>> +    xvld    xr26, sp, 13*SZREG + 26*SZXREG
>>> +    xvld    xr27, sp, 13*SZREG + 27*SZXREG
>>> +    xvld    xr28, sp, 13*SZREG + 28*SZXREG
>>> +    xvld    xr29, sp, 13*SZREG + 29*SZXREG
>>> +    xvld    xr30, sp, 13*SZREG + 30*SZXREG
>>> +    xvld    xr31, sp, 13*SZREG + 31*SZXREG
>>> +    REG_L    t0, sp, 32*SZXREG
>>> +    movgr2fcsr  fcsr0, t0
>>> +#elif defined USE_LSX
>>> +    vld    vr0, sp, 13*SZREG + 0*SZVREG
>>> +    vld    vr1, sp, 13*SZREG + 1*SZVREG
>>> +    vld    vr2, sp, 13*SZREG + 2*SZVREG
>>> +    vld    vr3, sp, 13*SZREG + 3*SZVREG
>>> +    vld    vr4, sp, 13*SZREG + 4*SZVREG
>>> +    vld    vr5, sp, 13*SZREG + 5*SZVREG
>>> +    vld    vr6, sp, 13*SZREG + 6*SZVREG
>>> +    vld    vr7, sp, 13*SZREG + 7*SZVREG
>>> +    vld    vr8, sp, 13*SZREG + 8*SZVREG
>>> +    vld    vr9, sp, 13*SZREG + 9*SZVREG
>>> +    vld    vr10, sp, 13*SZREG + 10*SZVREG
>>> +    vld    vr11, sp, 13*SZREG + 11*SZVREG
>>> +    vld    vr12, sp, 13*SZREG + 12*SZVREG
>>> +    vld    vr13, sp, 13*SZREG + 13*SZVREG
>>> +    vld    vr14, sp, 13*SZREG + 14*SZVREG
>>> +    vld    vr15, sp, 13*SZREG + 15*SZVREG
>>> +    vld    vr16, sp, 13*SZREG + 16*SZVREG
>>> +    vld    vr17, sp, 13*SZREG + 17*SZVREG
>>> +    vld    vr18, sp, 13*SZREG + 18*SZVREG
>>> +    vld    vr19, sp, 13*SZREG + 19*SZVREG
>>> +    vld    vr20, sp, 13*SZREG + 20*SZVREG
>>> +    vld    vr21, sp, 13*SZREG + 21*SZVREG
>>> +    vld    vr22, sp, 13*SZREG + 22*SZVREG
>>> +    vld    vr23, sp, 13*SZREG + 23*SZVREG
>>> +    vld    vr24, sp, 13*SZREG + 24*SZVREG
>>> +    vld    vr25, sp, 13*SZREG + 25*SZVREG
>>> +    vld    vr26, sp, 13*SZREG + 26*SZVREG
>>> +    vld    vr27, sp, 13*SZREG + 27*SZVREG
>>> +    vld    vr28, sp, 13*SZREG + 28*SZVREG
>>> +    vld    vr29, sp, 13*SZREG + 29*SZVREG
>>> +    vld    vr30, sp, 13*SZREG + 30*SZVREG
>>> +    vld    vr31, sp, 13*SZREG + 31*SZVREG
>>> +    REG_L    t0, sp, 32*SZVREG
>>> +    movgr2fcsr  fcsr0, t0
>>> +#elif !defined __loongarch_soft_float
>>> +    FREG_L    fa0, sp, 13*SZREG + 0*SZFREG
>>> +    FREG_L    fa1, sp, 13*SZREG + 1*SZFREG
>>> +    FREG_L    fa2, sp, 13*SZREG + 2*SZFREG
>>> +    FREG_L    fa3, sp, 13*SZREG + 3*SZFREG
>>> +    FREG_L    fa4, sp, 13*SZREG + 4*SZFREG
>>> +    FREG_L    fa5, sp, 13*SZREG + 5*SZFREG
>>> +    FREG_L    fa6, sp, 13*SZREG + 6*SZFREG
>>> +    FREG_L    fa7, sp, 13*SZREG + 7*SZFREG
>>> +    FREG_L    ft0, sp, 13*SZREG + 8*SZFREG
>>> +    FREG_L    ft1, sp, 13*SZREG + 9*SZFREG
>>> +    FREG_L    ft2, sp, 13*SZREG + 10*SZFREG
>>> +    FREG_L    ft3, sp, 13*SZREG + 11*SZFREG
>>> +    FREG_L    ft4, sp, 13*SZREG + 12*SZFREG
>>> +    FREG_L    ft5, sp, 13*SZREG + 13*SZFREG
>>> +    FREG_L    ft6, sp, 13*SZREG + 14*SZFREG
>>> +    FREG_L    ft7, sp, 13*SZREG + 15*SZFREG
>>> +    FREG_L    ft8, sp, 13*SZREG + 16*SZFREG
>>> +    FREG_L    ft9, sp, 13*SZREG + 17*SZFREG
>>> +    FREG_L    ft10, sp, 13*SZREG + 18*SZFREG
>>> +    FREG_L    ft11, sp, 13*SZREG + 19*SZFREG
>>> +    FREG_L    ft12, sp, 13*SZREG + 20*SZFREG
>>> +    FREG_L    ft13, sp, 13*SZREG + 21*SZFREG
>>> +    FREG_L    ft14, sp, 13*SZREG + 22*SZFREG
>>> +    FREG_L    ft15, sp, 13*SZREG + 23*SZFREG
>>> +    REG_L    t0, sp, 24*SZFREG
>>> +    movgr2fcsr  fcsr0, t0
>>> +#endif /* #ifdef USE_LASX  */
>>> +
>>> +    ADDI    sp, sp, FRAME_SIZE
>>> +    b    Lret
>>> +    cfi_endproc
>>> +    .size    _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>> +#endif /* #ifdef SHARED  */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>>> new file mode 100644
>>> index 0000000000..4a17079169
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>>> @@ -0,0 +1,93 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> +   LoongArch version.
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024.
>>
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <sysdep.h>
>>> +#include <tls.h>
>>> +#include "tlsdesc.h"
>>> +
>>> +    .text
>>> +
>>> +    /* Compute the thread pointer offset for symbols in the static
>>> +       TLS block. The offset is the same for all threads.
>>> +       Prototype:
>>> +       _dl_tlsdesc_return (tlsdesc *);  */
>>> +    .hidden _dl_tlsdesc_return
>>> +    .global    _dl_tlsdesc_return
>>> +    .type    _dl_tlsdesc_return,%function
>>> +    cfi_startproc
>>> +    .align 2
>>> +_dl_tlsdesc_return:
>>> +    REG_L  a0, a0, 8
>>> +    RET
>>> +    cfi_endproc
>>> +    .size    _dl_tlsdesc_return, .-_dl_tlsdesc_return
>>> +
>>> +    /* Handler for undefined weak TLS symbols.
>>> +       Prototype:
>>> +       _dl_tlsdesc_undefweak (tlsdesc *);
>>> +
>>> +       The second word of the descriptor contains the addend.
>>> +       Return the addend minus the thread pointer. This ensures
>>> +       that when the caller adds on the thread pointer it gets back
>>> +       the addend.  */
>>> +    .hidden _dl_tlsdesc_undefweak
>>> +    .global    _dl_tlsdesc_undefweak
>>> +    .type    _dl_tlsdesc_undefweak,%function
>>> +    cfi_startproc
>>> +    .align  2
>>> +_dl_tlsdesc_undefweak:
>>> +    REG_L    a0, a0, 8
>>> +    sub.d    a0, a0, tp
>>> +    RET
>>> +    cfi_endproc
>>> +    .size    _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>>> +
>>> +
>>> +#ifdef SHARED
>>> +
>>> +#if !defined __loongarch_soft_float
>>> +
>>> +#define USE_LASX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>>> +#define Lret Lret_lasx
>>> +#define Lslow Lslow_lasx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LASX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#define USE_LSX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>>> +#define Lret Lret_lsx
>>> +#define Lslow Lslow_lsx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LSX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#endif
>>> +
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +
>>> +#endif /* #ifdef SHARED  */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>>> new file mode 100644
>>> index 0000000000..988037a714
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>>> @@ -0,0 +1,53 @@
>>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>>> +   LoongArch version.
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#ifndef _DL_TLSDESC_H
>>> +#define _DL_TLSDESC_H
>>> +
>>> +#include <dl-tls.h>
>>> +
>>> +/* Type used to represent a TLS descriptor in the GOT.  */
>>> +struct tlsdesc
>>> +{
>>> +  ptrdiff_t (*entry) (struct tlsdesc *);
>>> +  void *arg;
>>> +};
>>> +
>>> +/* Type used as the argument in a TLS descriptor for a symbol that
>>> +   needs dynamic TLS offsets.  */
>>> +struct tlsdesc_dynamic_arg
>>> +{
>>> +  tls_index tlsinfo;
>>> +  size_t gen_count;
>>> +};
>>> +
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>>> +
>>> +# ifdef SHARED
>>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>>> +#if !defined __loongarch_soft_float
>> Minor style, usually for single tests we use '#ifndef' and add
>> attribute_hidden at the end of prototype.
>>
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>>> +#endif
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>>> +#endif
>>> +
>>> +#endif
>>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>>> index 4d8737ee7f..9b1773634c 100644
>>> --- a/sysdeps/loongarch/linkmap.h
>>> +++ b/sysdeps/loongarch/linkmap.h
>>> @@ -19,4 +19,5 @@
>>>   struct link_map_machine
>>>   {
>>>     ElfW (Addr) plt; /* Address of .plt.  */
>>> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>>>   };
>>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>>> index 51521a7eb4..23c1d12914 100644
>>> --- a/sysdeps/loongarch/sys/asm.h
>>> +++ b/sysdeps/loongarch/sys/asm.h
>>> @@ -25,6 +25,7 @@
>>>   /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>>>   #define SZREG 8
>>>   #define SZFREG 8
>>> +#define SZFCSREG 4
>>>   #define SZVREG 16
>>>   #define SZXREG 32
>>>   #define REG_L ld.d
>>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>>> index f61ee25b25..80ce3e9c00 100644
>>> --- a/sysdeps/loongarch/sys/regdef.h
>>> +++ b/sysdeps/loongarch/sys/regdef.h
>>> @@ -97,6 +97,7 @@
>>>   #define fcc5 $fcc5
>>>   #define fcc6 $fcc6
>>>   #define fcc7 $fcc7
>>> +#define fcsr0 $fcsr0
>>>     #define vr0 $vr0
>>>   #define vr1 $vr1
>>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>>> new file mode 100644
>>> index 0000000000..a357e7619f
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/tlsdesc.c
>>> @@ -0,0 +1,39 @@
>>> +/* Manage TLS descriptors.  AArch64 version.
>>> +
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024 and remove the 'AArch64'.
>>
>>
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <ldsodefs.h>
>>> +#include <tls.h>
>>> +#include <dl-tlsdesc.h>
>>> +#include <dl-unmap-segments.h>
>>> +#include <tlsdeschtab.h>
>>> +
>>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>>> +   if there is one.  */
>>> +
>>> +void
>>> +_dl_unmap (struct link_map *map)
>>> +{
>>> +  _dl_unmap_segments (map);
>>> +
>>> +#ifdef SHARED
>>> +  if (map->l_mach.tlsdesc_table)
>>> +    htab_delete (map->l_mach.tlsdesc_table);
>>> +#endif
>>> +}
>>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>>> new file mode 100644
>>> index 0000000000..bcab218631
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/tlsdesc.sym
>>> @@ -0,0 +1,19 @@
>>> +#include <stddef.h>
>>> +#include <sysdep.h>
>>> +#include <tls.h>
>>> +#include <link.h>
>>> +#include <dl-tlsdesc.h>
>>> +
>>> +--
>>> +
>>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>>> +
>>> +TLSDESC_ARG        offsetof(struct tlsdesc, arg)
>>> +TLSDESC_GEN_COUNT    offsetof(struct tlsdesc_dynamic_arg, gen_count)
>>> +TLSDESC_MODID        offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>>> +TLSDESC_MODOFF        offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>>> +TCBHEAD_DTV        offsetof(tcbhead_t, dtv)
>>> +DTV_COUNTER        offsetof(dtv_t, counter)
>>> +TLS_DTV_UNALLOCATED    TLS_DTV_UNALLOCATED
>>> +TLS_DTV_OFFSET        TLS_DTV_OFFSET
>>> +SIZE_OF_DTV        sizeof(tcbhead_t)
>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> index 547b1c1b7f..ec32e6d13f 100644
>>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> @@ -5,3 +5,5 @@ libc.so: calloc
>>>   libc.so: free
>>>   libc.so: malloc
>>>   libc.so: realloc
>>> +# The dynamic loader needs __tls_get_addr for TLS.
>>> +ld.so: __tls_get_addr
> 


More information about the Libc-alpha mailing list