[PATCH] LoongArch: Add support for dl_runtime_profile
Adhemerval Zanella Netto
adhemerval.zanella@linaro.org
Mon Jun 12 18:59:15 GMT 2023
On 07/06/23 06:51, caiyinyu wrote:
> This commit can fix the FAIL item: elf/tst-sprof-basic.
Some minor style comment below, the rest look ok. Since you are the
arch-maintainer I think myou can commit this if you are not seeing any
regression in your environment.
> ---
> sysdeps/loongarch/Makefile | 4 +
> sysdeps/loongarch/dl-link.sym | 14 +++
> sysdeps/loongarch/dl-machine.h | 30 ++++-
> sysdeps/loongarch/dl-trampoline.S | 175 +++++++++++++++++++++++++++++-
> sysdeps/loongarch/sys/asm.h | 1 +
> 5 files changed, 220 insertions(+), 4 deletions(-)
> create mode 100644 sysdeps/loongarch/dl-link.sym
>
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 1778fd1c88..43d2f583cd 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -2,6 +2,10 @@ ifeq ($(subdir),misc)
> sysdep_headers += sys/asm.h
> endif
>
> +ifeq ($(subdir),elf)
> +gen-as-const-headers += dl-link.sym
> +endif
> +
> # LoongArch's assembler also needs to know about PIC as it changes the
> # definition of some assembler macros.
> ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> new file mode 100644
> index 0000000000..868ab7c6eb
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -0,0 +1,14 @@
> +#include <stddef.h>
> +#include <sysdep.h>
> +#include <link.h>
> +
> +DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
> +DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
> +
> +DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg)
> +DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg)
> +DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra)
> +DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp)
> +
> +DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0)
> +DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index 1e07d124f9..e356a1cd99 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -270,10 +270,34 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
> /* If using PLTs, fill in the first two entries of .got.plt. */
> if (l->l_info[DT_JMPREL])
> {
> - extern void _dl_runtime_resolve (void)
> - __attribute__ ((visibility ("hidden")));
> + extern void _dl_runtime_resolve (void) attribute_hidden;
> + extern void _dl_runtime_profile (void) attribute_hidden;
> +
> ElfW (Addr) *gotplt = (ElfW (Addr) *) D_PTR (l, l_info[DT_PLTGOT]);
> - gotplt[0] = (ElfW (Addr)) & _dl_runtime_resolve;
> +
> + /* The got[0] entry contains the address of a function which gets
> + called to get the address of a so far unresolved function and
> + jump to it. The profiling extension of the dynamic linker allows
> + to intercept the calls to collect information. In this case we
> + don't store the address in the GOT so that all future calls also
> + end in this function. */
> + if ( __glibc_unlikely (profile))
No implicit check, use 'profile != 0' since profile is an int.
> + {
> + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
> +
> + if (GLRO(dl_profile) != NULL
> + && _dl_name_match_p (GLRO(dl_profile), l))
> + /* Say that we really want profiling and the timers are
> + started. */
> + GL(dl_profile_map) = l;
> + }
> + else
> + {
> + /* This function will get called to fix up the GOT entry
> + indicated by the offset on the stack, and then jump to
> + the resolved address. */
> + gotplt[0] = (ElfW (Addr)) & _dl_runtime_resolve;
> + }
> gotplt[1] = (ElfW (Addr)) l;
> }
> #endif
> diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
> index c978e2ef63..ed9ec0901c 100644
> --- a/sysdeps/loongarch/dl-trampoline.S
> +++ b/sysdeps/loongarch/dl-trampoline.S
> @@ -19,6 +19,8 @@
> #include <sysdep.h>
> #include <sys/asm.h>
>
> +#include "dl-link.h"
> +
> /* Assembler veneer called from the PLT header code for lazy loading.
> The PLT header passes its own args in t0-t2. */
> #ifdef __loongarch_soft_float
> @@ -31,7 +33,6 @@ ENTRY (_dl_runtime_resolve)
>
> /* Save arguments to stack. */
> ADDI sp, sp, -FRAME_SIZE
> -
Spurious new line removal?
> REG_S ra, sp, 9*SZREG
> REG_S a0, sp, 1*SZREG
> REG_S a1, sp, 2*SZREG
> @@ -88,3 +89,175 @@ ENTRY (_dl_runtime_resolve)
> /* Invoke the callee. */
> jirl zero, t1, 0
> END (_dl_runtime_resolve)
> +
> +
> +ENTRY (_dl_runtime_profile)
> + /* LoongArch we get called with:
> + t0 linkr_map pointer
> + t1 the scaled offset stored in t0, which can be used
> + to calculate the offset of the current symbol in .rela.plt
> + t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function
> + t3 dl resolver entry point, no use in this function
> +
> + Stack frame layout:
> + [sp, #96] La_loongarch_regs
> + [sp, #48] La_loongarch_retval
> + [sp, #40] frame size return from pltenter
> + [sp, #32] dl_profile_call saved a1
> + [sp, #24] dl_profile_call saved a0
> + [sp, #16] T1
> + [sp, #0] ra, fp <- fp
> + */
> +
> +# define OFFSET_T1 16
> +# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8
> +# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16
> +# define OFFSET_RV OFFSET_FS + 8
> +# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
> +
> +# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
> +
> + /* Save arguments to stack. */
> + ADDI sp, sp, -SF_SIZE
> + REG_S ra, sp, 0
> + REG_S fp, sp, 8
> +
> + or fp, sp, zero
> +
> + REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> + REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> + REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> + REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> + REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> + REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> + REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> + REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> +
> +#ifndef __loongarch_soft_float
> + FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
> + FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
> + FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
> + FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
> + FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
> + FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
> + FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
> + FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
> +#endif
> +
> + /* Update .got.plt and obtain runtime address of callee. */
> + SLLI a1, t1, 1
> + or a0, t0, zero
> + ADD a1, a1, t1
> + or a2, ra, zero /* return addr */
> + ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */
> + ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */
> +
> + REG_S a0, fp, OFFSET_SAVED_CALL_A0
> + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
> +
> + la t2, _dl_profile_fixup
> + jirl ra, t2, 0
> +
> + REG_L t3, fp, OFFSET_FS
> + bge t3, zero, 1f
> +
> + /* Save the return. */
> + or t4, v0, zero
> +
> + /* Restore arguments from stack. */
> + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> +
> +#ifndef __loongarch_soft_float
> + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
> + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
> + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
> + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
> + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
> + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
> + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
> + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
> +#endif
> +
> + REG_L ra, fp, 0
> + REG_L fp, fp, SZREG
> +
> + ADDI sp, sp, SF_SIZE
> + jirl zero, t4, 0
> +
> +1:
> + /* The new frame size is in t3. */
> + SUB sp, fp, t3
> + BSTRINS sp, zero, 3, 0
> +
> + REG_S a0, fp, OFFSET_T1
> +
> + or a0, sp, zero
> + ADDI a1, fp, SF_SIZE
> + or a2, t3, zero
> + la t5, memcpy
> + jirl ra, t5, 0
> +
> + REG_L t6, fp, OFFSET_T1
> +
> + /* Call the function. */
> + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
> + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
> + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
> + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
> + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
> + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
> + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
> + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
> +
> +#ifndef __loongarch_soft_float
> + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
> + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
> + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
> + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
> + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
> + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
> + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
> + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
> +#endif
> + jirl ra, t6, 0
> +
> + REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
> + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
> +
> +#ifndef __loongarch_soft_float
> + FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
> + FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
> +#endif
> +
> + /* Setup call to pltexit. */
> + REG_L a0, fp, OFFSET_SAVED_CALL_A0
> + REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
> + ADDI a2, fp, OFFSET_RG
> + ADDI a3, fp, OFFSET_RV
> + la t7, _dl_audit_pltexit
> + jirl ra, t7, 0
> +
> + REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
> + REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
> +
> +#ifndef __loongarch_soft_float
> + FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
> + FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
> +#endif
> +
> + /* RA from within La_loongarch_reg. */
> + REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
> + or sp, fp, zero
> + ADDI sp, sp, SF_SIZE
> + REG_S fp, fp, SZREG
> +
> + jirl zero, ra, 0
> +
> +END (_dl_runtime_profile)
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 3dca70a107..0bb430bb05 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -31,6 +31,7 @@
> #define SLLI slli.d
> #define ADDI addi.d
> #define ADD add.d
> +#define SUB sub.d
> #define BSTRINS bstrins.d
> #define LI li.d
> #define FREG_L fld.d
More information about the Libc-alpha
mailing list