x86-64 has /* Return the link-time address of _DYNAMIC. Conveniently, this is the first element of the GOT. This must be inlined in a function which uses global data. */ static inline ElfW(Addr) __attribute__ ((unused)) elf_machine_dynamic (void) { ElfW(Addr) addr; /* This works because we have our GOT address available in the small PIC model. */ addr = (ElfW(Addr)) &_DYNAMIC; return addr; } /* Return the run-time load address of the shared object. */ static inline ElfW(Addr) __attribute__ ((unused)) elf_machine_load_address (void) { ElfW(Addr) addr; /* The easy way is just the same as on x86: leaq _dl_start, %0 leaq _dl_start(%%rip), %1 subq %0, %1 but this does not work with binutils since we then have a R_X86_64_32S relocation in a shared lib. Instead we store the address of _dl_start in the data section and compare it with the current value that we can get via an RIP relative addressing mode. Note that this is the address of _dl_start before any relocation performed at runtime. In case the binary is prelinked the resulting "address" is actually a load offset which is zero if the binary was loaded at the address it is prelinked for. */ asm ("lea _dl_start(%%rip), %0\n\t" "sub 1f(%%rip), %0\n\t" ".section\t.data.rel.ro\n" "1:\t" ASM_ADDR " _dl_start\n\t" ".previous\n\t" : "=r" (addr) : : "cc"); return addr; } while i386 has /* Return the link-time address of _DYNAMIC. Conveniently, this is the first element of the GOT, a special entry that is never relocated. */ static inline Elf32_Addr __attribute__ ((unused, const)) elf_machine_dynamic (void) { /* This produces a GOTOFF reloc that resolves to zero at link time, so in fact just loads from the GOT register directly. By doing it without an asm we can let the compiler choose any register. */ extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden; return _GLOBAL_OFFSET_TABLE_[0]; } /* Return the run-time load address of the shared object. */ static inline Elf32_Addr __attribute__ ((unused)) elf_machine_load_address (void) { /* Compute the difference between the runtime address of _DYNAMIC as seen by a GOTOFF reference, and the link-time address found in the special unrelocated first GOT entry. */ extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden; return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); } i386 version is straight forward and easy to understand.
The new version: /* Return the link-time address of _DYNAMIC. Conveniently, this is the first element of the GOT. This must be inlined in a function which uses global data. */ static inline ElfW(Addr) __attribute__ ((unused)) elf_machine_dynamic (void) { /* This produces a PC32 reloc that resolves to zero at link time, so in fact just loads from the GOT register directly. By doing it without an asm we can let the compiler choose any register. */ extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden; return _GLOBAL_OFFSET_TABLE_[0]; } /* Return the run-time load address of the shared object. */ static inline ElfW(Addr) __attribute__ ((unused)) elf_machine_load_address (void) { /* Compute the difference between the runtime address of _DYNAMIC as seen by a PC32 reference, and the link-time address found in the special unrelocated first GOT entry. */ extern ElfW(Dyn) bygotoff[] asm ("_DYNAMIC") attribute_hidden; return (ElfW(Addr)) &bygotoff - elf_machine_dynamic (); } It generates: movq _DYNAMIC(%rip), %rax movq %rdx, start_time(%rip) leaq _DYNAMIC(%rip), %rdx movq %rdx, %r13 subq _GLOBAL_OFFSET_TABLE_(%rip), %r13 testq %rax, %rax movq %rdx, 2472+_rtld_local(%rip) movq %r13, 2456+_rtld_local(%rip) je .L993 instead of #APP # 75 "../sysdeps/x86_64/dl-machine.h" 1 lea _dl_start(%rip), %r13 sub 1f(%rip), %r13 .section .data.rel.ro 1: .quad _dl_start .previous # 0 "" 2 #NO_APP movq %rdx, start_time(%rip) movq %r13, %rdx addq _DYNAMIC@GOTPCREL(%rip), %rdx movq %r13, 2456+_rtld_local(%rip) movq (%rdx), %rax movq %rdx, 2472+_rtld_local(%rip) testq %rax, %rax je .L994 The new one avoids one load and reduces one GOT entry.
A patch is posted at http://sourceware.org/ml/libc-alpha/2012-09/msg00009.html
Fixed.