* x86_64.cc (can_convert_callq_to_direct): New function. Target_x86_64::Scan::global: Check if an indirect call via GOT can be converted to direct. Target_x86_64::Relocate::relocate: Change any indirect call via GOT that can be converted. * testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test. * testsuite/Makefile.in: Regenerate. * testsuite/x86_64_indirect_call_to_direct1.s: New file. * testsuite/x86_64_indirect_jump_to_direct1.s: New file. diff --git a/gold/testsuite/Makefile.am b/gold/testsuite/Makefile.am index bf222c3..797c6b0 100644 --- a/gold/testsuite/Makefile.am +++ b/gold/testsuite/Makefile.am @@ -1084,6 +1084,25 @@ x86_64_mov_to_lea13.stdout: x86_64_mov_to_lea13 x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14 $(TEST_OBJDUMP) -dw $< > $@ +check_SCRIPTS += x86_64_indirect_call_to_direct.sh +check_DATA += x86_64_indirect_call_to_direct1.stdout \ + x86_64_indirect_jump_to_direct1.stdout +MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \ + x86_64_indirect_jump_to_direct1 + +x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s + $(TEST_AS) --64 -o $@ $< +x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld + gcctestdir/ld -o $@ $< +x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1 + $(TEST_OBJDUMP) -dw $< > $@ +x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s + $(TEST_AS) --64 -o $@ $< +x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld + gcctestdir/ld -o $@ $< +x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1 + $(TEST_OBJDUMP) -dw $< > $@ + check_SCRIPTS += x86_64_overflow_pc32.sh check_DATA += x86_64_overflow_pc32.err MOSTLYCLEANFILES += x86_64_overflow_pc32.err diff --git a/gold/testsuite/x86_64_indirect_call_to_direct1.s b/gold/testsuite/x86_64_indirect_call_to_direct1.s index e69de29..5ca2e38 100644 --- a/gold/testsuite/x86_64_indirect_call_to_direct1.s +++ b/gold/testsuite/x86_64_indirect_call_to_direct1.s @@ -0,0 +1,12 @@ + .text + .globl foo + .type foo, @function +foo: + ret + .size foo, .-foo + .globl main + .type main, @function +main: + call *foo@GOTPCREL(%rip) + ret + .size main, .-main diff --git a/gold/testsuite/x86_64_indirect_jump_to_direct1.s b/gold/testsuite/x86_64_indirect_jump_to_direct1.s index e69de29..b817e34 100644 --- a/gold/testsuite/x86_64_indirect_jump_to_direct1.s +++ b/gold/testsuite/x86_64_indirect_jump_to_direct1.s @@ -0,0 +1,11 @@ + .text + .globl foo + .type foo, @function +foo: + ret + .size foo, .-foo + .globl main + .type main, @function +main: + jmp *foo@GOTPCREL(%rip) + .size main, .-main diff --git a/gold/x86_64.cc b/gold/x86_64.cc index 81126ef..59b7de5 100644 --- a/gold/x86_64.cc +++ b/gold/x86_64.cc @@ -891,6 +891,21 @@ class Target_x86_64 : public Sized_target && strcmp(gsym->name(), "_DYNAMIC") != 0); } + // Check if relocation against this symbol is a candidate for + // conversion from + // (callq|jmpq) *foo@GOTPCREL(%rip) to + // nop + // (callq|jmpq) foo + static bool + can_convert_callq_to_direct(const Symbol* gsym) + { + gold_assert(gsym != NULL); + return (gsym->type() == elfcpp::STT_FUNC + && !gsym->is_undefined () + && !gsym->is_from_dynobj() + && !gsym->is_preemptible()); + } + // Adjust TLS relocation type based on the options and whether this // is a local symbol. static tls::Tls_optimization @@ -2931,17 +2946,31 @@ Target_x86_64::Scan::global(Symbol_table* symtab, // If we convert this from // mov foo@GOTPCREL(%rip), %reg // to lea foo(%rip), %reg. + // OR + // if we convert + // (callq|jmpq) *foo@GOTPCREL(%rip) to + // nop + // (callq|jmpq) foo // in Relocate::relocate, then there is nothing to do here. if ((r_type == elfcpp::R_X86_64_GOTPCREL || r_type == elfcpp::R_X86_64_GOTPCRELX || r_type == elfcpp::R_X86_64_REX_GOTPCRELX) && reloc.get_r_offset() >= 2 - && Target_x86_64::can_convert_mov_to_lea(gsym)) + && (Target_x86_64::can_convert_mov_to_lea(gsym) + || Target_x86_64::can_convert_callq_to_direct(gsym))) { section_size_type stype; const unsigned char* view = object->section_contents(data_shndx, &stype, true); - if (view[reloc.get_r_offset() - 2] == 0x8b) + if (Target_x86_64::can_convert_mov_to_lea(gsym) + && view[reloc.get_r_offset() - 2] == 0x8b) + break; + + // Opcode for call is 0xff 0x15 and opcode for jmp is 0xff 0x25 + if (Target_x86_64::can_convert_callq_to_direct(gsym) + && view[reloc.get_r_offset() - 2] == 0xff + && (view[reloc.get_r_offset() - 1] == 0x15 + || view[reloc.get_r_offset() - 1] == 0x25)) break; } @@ -3634,6 +3663,28 @@ Target_x86_64::Relocate::relocate( view[-2] = 0x8d; Reloc_funcs::pcrela32(view, object, psymval, addend, address); } + // Convert + // (callq|jmpq) *foo@GOTPCREL(%rip) to + // nop + // (callq|jmpq) foo + else if (rela.get_r_offset() >= 2 + && view[-2] == 0xff + && (view [-1] == 0x15 || view [-1] == 0x25) + && (gsym != NULL + && Target_x86_64::can_convert_callq_to_direct(gsym))) + { + // Insert the 1-byte nop, whose opcode is 0x90. This is needed + // because the indirect call(jump) is one byte longer than the + // direct call(jump). + view[-2] = 0x90; + // Insert the direct call (opcode 0xe8) or jmp (opcode 0xe9). + if (view[-1] == 0x15) + view[-1] = 0xe8; + else + view[-1] = 0xe9; + // Convert GOTPCREL to 32-bit pc relative reloc. + Reloc_funcs::pcrela32(view, object, psymval, addend, address); + } else { if (gsym != NULL)