Bug 14538 - Complicated x86-64 elf_machine_dynamic/elf_machine_load_address
Summary: Complicated x86-64 elf_machine_dynamic/elf_machine_load_address
Status: RESOLVED FIXED
Alias: None
Product: glibc
Classification: Unclassified
Component: dynamic-link (show other bugs)
Version: 2.17
: P2 normal
Target Milestone: 2.17
Assignee: Not yet assigned to anyone
URL: http://sourceware.org/ml/libc-alpha/2...
Keywords:
Depends on:
Blocks:
 
Reported: 2012-09-01 05:00 UTC by H.J. Lu
Modified: 2014-06-17 04:47 UTC (History)
0 users

See Also:
Host:
Target: x86-64
Build:
Last reconfirmed:
fweimer: security-


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description H.J. Lu 2012-09-01 05:00:27 UTC
x86-64 has

/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
   first element of the GOT.  This must be inlined in a function which
   uses global data.  */
static inline ElfW(Addr) __attribute__ ((unused))
elf_machine_dynamic (void)
{
  ElfW(Addr) addr;

  /* This works because we have our GOT address available in the small PIC
     model.  */
  addr = (ElfW(Addr)) &_DYNAMIC;

  return addr;
}

/* Return the run-time load address of the shared object.  */
static inline ElfW(Addr) __attribute__ ((unused))
elf_machine_load_address (void)
{
  ElfW(Addr) addr;

  /* The easy way is just the same as on x86:
       leaq _dl_start, %0
       leaq _dl_start(%%rip), %1
       subq %0, %1
     but this does not work with binutils since we then have
     a R_X86_64_32S relocation in a shared lib.

     Instead we store the address of _dl_start in the data section
     and compare it with the current value that we can get via
     an RIP relative addressing mode.  Note that this is the address
     of _dl_start before any relocation performed at runtime.  In case
     the binary is prelinked the resulting "address" is actually a
     load offset which is zero if the binary was loaded at the address
     it is prelinked for.  */

  asm ("lea _dl_start(%%rip), %0\n\t"
       "sub 1f(%%rip), %0\n\t"
       ".section\t.data.rel.ro\n"
       "1:\t" ASM_ADDR " _dl_start\n\t"
       ".previous\n\t"
       : "=r" (addr) : : "cc");

  return addr;
}

while i386 has

/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
   first element of the GOT, a special entry that is never relocated.  */
static inline Elf32_Addr __attribute__ ((unused, const))
elf_machine_dynamic (void)
{
  /* This produces a GOTOFF reloc that resolves to zero at link time, so in
     fact just loads from the GOT register directly.  By doing it without
     an asm we can let the compiler choose any register.  */
  extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
  return _GLOBAL_OFFSET_TABLE_[0];
}

/* Return the run-time load address of the shared object.  */
static inline Elf32_Addr __attribute__ ((unused))
elf_machine_load_address (void)
{
  /* Compute the difference between the runtime address of _DYNAMIC as seen
     by a GOTOFF reference, and the link-time address found in the special
     unrelocated first GOT entry.  */
  extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden;
  return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); 
}

i386 version is straight forward and easy to understand.
Comment 1 H.J. Lu 2012-09-01 05:11:21 UTC
The new version:

/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
   first element of the GOT.  This must be inlined in a function which
   uses global data.  */
static inline ElfW(Addr) __attribute__ ((unused))
elf_machine_dynamic (void)
{
  /* This produces a PC32 reloc that resolves to zero at link time, so in
     fact just loads from the GOT register directly.  By doing it without
     an asm we can let the compiler choose any register.  */
  extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
  return _GLOBAL_OFFSET_TABLE_[0];
}


/* Return the run-time load address of the shared object.  */
static inline ElfW(Addr) __attribute__ ((unused))
elf_machine_load_address (void)
{
  /* Compute the difference between the runtime address of _DYNAMIC as seen
     by a PC32 reference, and the link-time address found in the special
     unrelocated first GOT entry.  */
  extern ElfW(Dyn) bygotoff[] asm ("_DYNAMIC") attribute_hidden;
  return (ElfW(Addr)) &bygotoff - elf_machine_dynamic ();
}

It generates:

        movq    _DYNAMIC(%rip), %rax 
        movq    %rdx, start_time(%rip)
        leaq    _DYNAMIC(%rip), %rdx 
        movq    %rdx, %r13 
        subq    _GLOBAL_OFFSET_TABLE_(%rip), %r13 
        testq   %rax, %rax 
        movq    %rdx, 2472+_rtld_local(%rip)
        movq    %r13, 2456+_rtld_local(%rip)
        je      .L993

instead of

#APP
# 75 "../sysdeps/x86_64/dl-machine.h" 1
        lea _dl_start(%rip), %r13 
        sub 1f(%rip), %r13 
        .section        .data.rel.ro
1:      .quad _dl_start
        .previous
     
# 0 "" 2
#NO_APP
        movq    %rdx, start_time(%rip)
        movq    %r13, %rdx 
        addq    _DYNAMIC@GOTPCREL(%rip), %rdx 
        movq    %r13, 2456+_rtld_local(%rip)
        movq    (%rdx), %rax 
        movq    %rdx, 2472+_rtld_local(%rip)
        testq   %rax, %rax
        je      .L994

The new one avoids one load and reduces one GOT entry.
Comment 2 H.J. Lu 2012-09-01 12:49:44 UTC
A patch is posted at

http://sourceware.org/ml/libc-alpha/2012-09/msg00009.html
Comment 3 H.J. Lu 2012-09-02 18:08:57 UTC
Fixed.