This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[gold] PATCH: Support x32 GD->IE and GD->LE optimizations
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: binutils at sourceware dot org
- Cc: Ian Lance Taylor <iant at google dot com>
- Date: Fri, 27 Jan 2012 14:36:14 -0800
- Subject: [gold] PATCH: Support x32 GD->IE and GD->LE optimizations
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
Hi,
X32 GD sequence doesn't have 0x66 prefix since x32 IE/LE sequence uses
movl %fs:0,%eax which is one byte shorter than movq %fs:0,%rax. OK to
install?
Thanks.
H.J.
--
2012-01-27 H.J. Lu <hongjiu.lu@intel.com>
* x86_64.cc (Relocate::tls_gd_to_ie): Support x32.
(Relocate::tls_gd_to_le): Likewise.
diff --git a/gold/x86_64.cc b/gold/x86_64.cc
index cfc6384..aafdd68 100644
--- a/gold/x86_64.cc
+++ b/gold/x86_64.cc
@@ -3533,19 +3533,37 @@ Target_x86_64<size>::Relocate::tls_gd_to_ie(
typename elfcpp::Elf_types<size>::Elf_Addr address,
section_size_type view_size)
{
- // .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
- // .word 0x6666; rex64; call __tls_get_addr
- // ==> movq %fs:0,%rax; addq x@gottpoff(%rip),%rax
+ // For SIZE == 64:
+ // .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x6666; rex64; call __tls_get_addr
+ // ==> movq %fs:0,%rax; addq x@gottpoff(%rip),%rax
+ // For SIZE == 32:
+ // leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x6666; rex64; call __tls_get_addr
+ // ==> movl %fs:0,%eax; addq x@gottpoff(%rip),%rax
- tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, -4);
tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, 12);
-
- tls::check_tls(relinfo, relnum, rela.get_r_offset(),
- (memcmp(view - 4, "\x66\x48\x8d\x3d", 4) == 0));
tls::check_tls(relinfo, relnum, rela.get_r_offset(),
(memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0));
- memcpy(view - 4, "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x03\x05\0\0\0\0", 16);
+ if (size == 64)
+ {
+ tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size,
+ -4);
+ tls::check_tls(relinfo, relnum, rela.get_r_offset(),
+ (memcmp(view - 4, "\x66\x48\x8d\x3d", 4) == 0));
+ memcpy(view - 4, "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x03\x05\0\0\0\0",
+ 16);
+ }
+ else
+ {
+ tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size,
+ -3);
+ tls::check_tls(relinfo, relnum, rela.get_r_offset(),
+ (memcmp(view - 3, "\x48\x8d\x3d", 4) == 0));
+ memcpy(view - 3, "\x64\x8b\x04\x25\0\0\0\0\x48\x03\x05\0\0\0\0",
+ 15);
+ }
const elfcpp::Elf_Xword addend = rela.get_r_addend();
Relocate_functions<size, false>::pcrela32(view + 8, value, addend - 8,
@@ -3571,19 +3589,38 @@ Target_x86_64<size>::Relocate::tls_gd_to_le(
unsigned char* view,
section_size_type view_size)
{
- // .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
- // .word 0x6666; rex64; call __tls_get_addr
- // ==> movq %fs:0,%rax; leaq x@tpoff(%rax),%rax
+ // For SIZE == 64:
+ // .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x6666; rex64; call __tls_get_addr
+ // ==> movq %fs:0,%rax; leaq x@tpoff(%rax),%rax
+ // For SIZE == 32:
+ // leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x6666; rex64; call __tls_get_addr
+ // ==> movl %fs:0,%eax; leaq x@tpoff(%rax),%rax
- tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, -4);
tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, 12);
-
tls::check_tls(relinfo, relnum, rela.get_r_offset(),
- (memcmp(view - 4, "\x66\x48\x8d\x3d", 4) == 0));
- tls::check_tls(relinfo, relnum, rela.get_r_offset(),
- (memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0));
+ (memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0));
+
+ if (size == 64)
+ {
+ tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size,
+ -4);
+ tls::check_tls(relinfo, relnum, rela.get_r_offset(),
+ (memcmp(view - 4, "\x66\x48\x8d\x3d", 4) == 0));
+ memcpy(view - 4, "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x8d\x80\0\0\0\0",
+ 16);
+ }
+ else
+ {
+ tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size,
+ -3);
+ tls::check_tls(relinfo, relnum, rela.get_r_offset(),
+ (memcmp(view - 3, "\x48\x8d\x3d", 4) == 0));
- memcpy(view - 4, "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x8d\x80\0\0\0\0", 16);
+ memcpy(view - 3, "\x64\x8b\x04\x25\0\0\0\0\x48\x8d\x80\0\0\0\0",
+ 15);
+ }
value -= tls_segment->memsz();
Relocate_functions<size, false>::rela32(view + 8, value, 0);