[PATCH][x86_64] Convert indirect call via GOT to direct when possible

Sriraman Tallam tmsriram@google.com
Fri May 20 20:28:00 GMT 2016


Hi,

   GCC has option -fno-plt which converts all extern calls to indirect
calls via GOT to prevent the linker for generating any PLT stubs.
However, if the function ends up defined in the executable this patch
will convert those indirect calls/jumps to direct.  Since the indirect
calls are one byte longer, an extra nop is needed at the beginning.

Here is a simple example:

main.c
---------
extern int foo();
int main() {
  return foo();
}

deffoo.c
-----------
int foo() {
  return 0;
}

$ gcc -fno-plt main.c deffoo.c
$objdump -d a.out

0000000000400626 <main>:
  ...
  40062a:       ff 15 28 14 00 00       callq  *0x1428(%rip)        #
401a58 <_DYNAMIC+0x1d8>

The call is indirect even though foo is defined in the executable.

With this patch,
0000000000400606 <main>:
   ....
   40060a:       90                      nop
  40060b:       e8 03 00 00 00          callq  400613 <foo>

The call is now direct with an extra nop.

   Please review.

Thanks
Sri

* x86_64.cc (can_convert_callq_to_direct): New function.
Target_x86_64<size>::Scan::global: Check if an indirect call via
GOT can be converted to direct.
Target_x86_64<size>::Relocate::relocate: Change any indirect call
via GOT that can be converted.
* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
* testsuite/Makefile.in: Regenerate.
* testsuite/x86_64_indirect_call_to_direct1.s: New file.
* testsuite/x86_64_indirect_jump_to_direct1.s: New file.
-------------- next part --------------
	* x86_64.cc (can_convert_callq_to_direct): New function.
	Target_x86_64<size>::Scan::global: Check if an indirect call via
	GOT can be converted to direct.
	Target_x86_64<size>::Relocate::relocate: Change any indirect call
	via GOT that can be converted.
	* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
	* testsuite/Makefile.in: Regenerate.
	* testsuite/x86_64_indirect_call_to_direct1.s: New file.
	* testsuite/x86_64_indirect_jump_to_direct1.s: New file.


diff --git a/gold/testsuite/Makefile.am b/gold/testsuite/Makefile.am
index bf222c3..797c6b0 100644
--- a/gold/testsuite/Makefile.am
+++ b/gold/testsuite/Makefile.am
@@ -1084,6 +1084,25 @@ x86_64_mov_to_lea13.stdout: x86_64_mov_to_lea13
 x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
 	$(TEST_OBJDUMP) -dw $< > $@
 
+check_SCRIPTS += x86_64_indirect_call_to_direct.sh
+check_DATA += x86_64_indirect_call_to_direct1.stdout \
+	x86_64_indirect_jump_to_direct1.stdout
+MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \
+	x86_64_indirect_jump_to_direct1
+
+x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
+	$(TEST_AS) --64 -o $@ $<
+x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
+	gcctestdir/ld -o $@ $<
+x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
+	$(TEST_OBJDUMP) -dw $< > $@
+x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
+	$(TEST_AS) --64 -o $@ $<
+x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
+	gcctestdir/ld -o $@ $<
+x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
+	$(TEST_OBJDUMP) -dw $< > $@
+
 check_SCRIPTS += x86_64_overflow_pc32.sh
 check_DATA += x86_64_overflow_pc32.err
 MOSTLYCLEANFILES += x86_64_overflow_pc32.err
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct1.s b/gold/testsuite/x86_64_indirect_call_to_direct1.s
index e69de29..5ca2e38 100644
--- a/gold/testsuite/x86_64_indirect_call_to_direct1.s
+++ b/gold/testsuite/x86_64_indirect_call_to_direct1.s
@@ -0,0 +1,12 @@
+	.text
+	.globl	foo
+	.type	foo, @function
+foo:
+	ret
+	.size	foo, .-foo
+	.globl	main
+	.type	main, @function
+main:
+	call	*foo@GOTPCREL(%rip)
+	ret
+	.size	main, .-main
diff --git a/gold/testsuite/x86_64_indirect_jump_to_direct1.s b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
index e69de29..b817e34 100644
--- a/gold/testsuite/x86_64_indirect_jump_to_direct1.s
+++ b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
@@ -0,0 +1,11 @@
+	.text
+	.globl	foo
+	.type	foo, @function
+foo:
+	ret
+	.size	foo, .-foo
+	.globl	main
+	.type	main, @function
+main:
+	jmp	*foo@GOTPCREL(%rip)
+	.size	main, .-main
diff --git a/gold/x86_64.cc b/gold/x86_64.cc
index 81126ef..59b7de5 100644
--- a/gold/x86_64.cc
+++ b/gold/x86_64.cc
@@ -891,6 +891,21 @@ class Target_x86_64 : public Sized_target<size, false>
 	    && strcmp(gsym->name(), "_DYNAMIC") != 0);
   }
 
+  // Check if relocation against this symbol is a candidate for
+  // conversion from
+  // (callq|jmpq) *foo@GOTPCREL(%rip) to
+  // nop
+  // (callq|jmpq) foo
+  static bool
+  can_convert_callq_to_direct(const Symbol* gsym)
+  {
+    gold_assert(gsym != NULL);
+    return (gsym->type() == elfcpp::STT_FUNC
+	    && !gsym->is_undefined ()
+	    && !gsym->is_from_dynobj()
+	    && !gsym->is_preemptible());
+  }
+
   // Adjust TLS relocation type based on the options and whether this
   // is a local symbol.
   static tls::Tls_optimization
@@ -2931,17 +2946,31 @@ Target_x86_64<size>::Scan::global(Symbol_table* symtab,
 	// If we convert this from
 	// mov foo@GOTPCREL(%rip), %reg
 	// to lea foo(%rip), %reg.
+	// OR
+	// if we convert
+	// (callq|jmpq) *foo@GOTPCREL(%rip) to
+	// nop
+	// (callq|jmpq) foo
 	// in Relocate::relocate, then there is nothing to do here.
 	if ((r_type == elfcpp::R_X86_64_GOTPCREL
 	     || r_type == elfcpp::R_X86_64_GOTPCRELX
 	     || r_type == elfcpp::R_X86_64_REX_GOTPCRELX)
 	    && reloc.get_r_offset() >= 2
-	    && Target_x86_64<size>::can_convert_mov_to_lea(gsym))
+	    && (Target_x86_64<size>::can_convert_mov_to_lea(gsym)
+		|| Target_x86_64<size>::can_convert_callq_to_direct(gsym)))
 	  {
 	    section_size_type stype;
 	    const unsigned char* view = object->section_contents(data_shndx,
 								 &stype, true);
-	    if (view[reloc.get_r_offset() - 2] == 0x8b)
+	    if (Target_x86_64<size>::can_convert_mov_to_lea(gsym)
+		&& view[reloc.get_r_offset() - 2] == 0x8b)
+	      break;
+
+	    // Opcode for call is 0xff 0x15 and opcode for jmp is 0xff 0x25
+	    if (Target_x86_64<size>::can_convert_callq_to_direct(gsym)
+		&& view[reloc.get_r_offset() - 2] == 0xff
+		&& (view[reloc.get_r_offset() - 1] == 0x15
+		    || view[reloc.get_r_offset() - 1] == 0x25))
 	      break;
 	  }
 
@@ -3634,6 +3663,28 @@ Target_x86_64<size>::Relocate::relocate(
 	  view[-2] = 0x8d;
 	  Reloc_funcs::pcrela32(view, object, psymval, addend, address);
 	}
+      // Convert
+      // (callq|jmpq) *foo@GOTPCREL(%rip) to
+      // nop
+      // (callq|jmpq) foo
+      else if (rela.get_r_offset() >= 2
+	       && view[-2] == 0xff
+	       && (view [-1] == 0x15 || view [-1] == 0x25)
+	       && (gsym != NULL
+		   && Target_x86_64<size>::can_convert_callq_to_direct(gsym)))
+	{
+	  // Insert the 1-byte nop, whose opcode is 0x90.  This is needed
+	  // because the indirect call(jump) is one byte longer than the
+	  // direct call(jump).
+	  view[-2] = 0x90;
+	  // Insert the direct call (opcode 0xe8) or jmp (opcode 0xe9).
+	  if (view[-1] == 0x15)
+	    view[-1] = 0xe8;
+	  else
+	    view[-1] = 0xe9;
+	  // Convert GOTPCREL to 32-bit pc relative reloc.
+	  Reloc_funcs::pcrela32(view, object, psymval, addend, address);
+	}
       else
 	{
 	  if (gsym != NULL)


More information about the Binutils mailing list