[PATCH][x86_64] Convert indirect call via GOT to direct when possible

Sriraman Tallam tmsriram@google.com
Tue Jun 14 18:33:00 GMT 2016


On Fri, Jun 10, 2016 at 1:06 PM, Cary Coutant <ccoutant@gmail.com> wrote:
>>>> * x86_64.cc (can_convert_callq_to_direct): New function.
>>>> Target_x86_64<size>::Scan::global: Check if an indirect call via
>>>> GOT can be converted to direct.
>>>> Target_x86_64<size>::Relocate::relocate: Change any indirect call
>>>> via GOT that can be converted.
>>>> * testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
>>>> * testsuite/Makefile.in: Regenerate.
>>>> * testsuite/x86_64_indirect_call_to_direct1.s: New file.
>>>> * testsuite/x86_64_indirect_jump_to_direct1.s: New file.
>>>>
>>>
>>> Do you need to check R_X86_64_REX_GOTPCRELX for branch?
>>
>> Ok, patch changed to not check for this and refactored a bit.
>
> Thanks for the refactoring -- I was about to suggest that. There's
> still too much logic outside the can_convert_* functions that is
> duplicated in Scan::global() and in Relocate::relocate(), all because
> we don't want to fetch the section contents without doing a few
> preliminary checks. I'd like to move most of that logic into the
> can_convert_* functions, so let's start with a Lazy_view class, which
> will fetch the section contents only when needed:
>
> template<int size>
> class Lazy_view
> {
>  public:
>   Lazy_view(Sized_relobj_file<size, false>* object, unsigned int data_shndx)
>     : object_(object), data_shndx_(data_shndx), view_(NULL), view_size_(0)
>   { }
>
>   inline unsigned char
>   operator[](size_t offset)
>   {
>     if (this->view_ == NULL)
>       this->view_ = this->object_->section_contents(this->data_shndx_,
>                                                     &this->view_size_,
>                                                     true);
>     if (offset >= this->view_size_)
>       return 0;
>     return this->view_[offset];
>   }
>
>  private:
>   Sized_relobj_file<size, false>* object_;
>   unsigned int data_shndx_;
>   const unsigned char* view_;
>   section_size_type view_size_;
> };
>
> Now we can move (almost) all of that external logic into the
> can_convert_* routines, leaving us with this in Scan::global():
>
>         Lazy_view<size> view(object, data_shndx);
>         size_t r_offset = reloc.get_r_offset();
>         if (r_offset >= 2
>             && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
>                                                            r_offset, &view))
>           break;
>         if (r_offset >= 2
>             && Target_x86_64<size>::can_convert_callq_to_direct(gsym, r_type,
>
> r_offset, &view))
>           break;
>
> ... and this in Relocate::relocate():
>
>         if ((gsym == NULL
>              && rela.get_r_offset() >= 2
>              && view[-2] == 0x8b
>              && !psymval->is_ifunc_symbol())
>             || (gsym != NULL
>                 && rela.get_r_offset() >= 2
>                 && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
>                                                                0, &view)))
>           {
>             ...
>           }
>         else if (gsym != NULL
>                  && rela.get_r_offset() >= 2
>                  && Target_x86_64<size>::can_convert_callq_to_direct(gsym,
>                                                                      r_type,
>                                                                      0, &view))
>           {
>             if (view[-1] == 0x15)
>               {
>                 ...
>               }
>             else
>               {
>                 ...
>               }
>           }
>
> Folding all the external logic into the can_convert_* functions, and
> refactoring a bit gives this:
>
>   template<class View_type>
>   static inline bool
>   can_convert_mov_to_lea(const Symbol* gsym, unsigned int r_type,
>                          size_t r_offset, View_type* view)
>   {
>     gold_assert(gsym != NULL);
>     // We cannot do the conversion unless it's one of these relocations.
>     if (r_type != elfcpp::R_X86_64_GOTPCREL
>         && r_type != elfcpp::R_X86_64_GOTPCRELX
>         && r_type != elfcpp::R_X86_64_REX_GOTPCRELX)
>       return false;
>     // We cannot convert references to IFUNC symbols, or to symbols that
>     // are not local to the current module.
>     if (gsym->type() == elfcpp::STT_GNU_IFUNC
>         || gsym->is_undefined ()
>         || gsym->is_from_dynobj()
>         || gsym->is_preemptible())
>       return false;
>     if (parameters->options().shared()
>         && (gsym->visibility() == elfcpp::STV_DEFAULT
>             || gsym->visibility() == elfcpp::STV_PROTECTED)
>         && !parameters->options().Bsymbolic())
>       return false;
>     // We cannot convert references to the _DYNAMIC symbol.
>     if (strcmp(gsym->name(), "_DYNAMIC") == 0)
>       return false;
>     // Check for a MOV opcode.
>     return (*view)[r_offset - 2] == 0x8b;
>   }
>
>   template<class View_type>
>   static inline bool
>   can_convert_callq_to_direct(const Symbol* gsym, unsigned int r_type,
>                               size_t r_offset, View_type* view)
>   {
>     gold_assert(gsym != NULL);
>     // We cannot do the conversion unless it's a GOTPCRELX relocation.
>     if (r_type != elfcpp::R_X86_64_GOTPCRELX)
>       return false;
>     // We cannot convert references to IFUNC symbols, or to symbols that
>     // are not local to the current module.
>     if (gsym->type() == elfcpp::STT_GNU_IFUNC
>         || gsym->is_undefined ()
>         || gsym->is_from_dynobj()
>         || gsym->is_preemptible())
>       return false;
>     // Check for a CALLQ or JMPQ opcode.
>     return ((*view)[r_offset - 2] == 0xff
>             && ((*view)[r_offset - 1] == 0x15
>                 || (*view)[r_offset - 1] == 0x25));
>   }
>
> A couple of observations, though:
>
> 1. Sri, in your patch, you just test for sym type == STT_FUNC. Isn't
> it sufficient to test for sym type != STT_GNU_IFUNC (as in the
> convert-to-lea case)? I don't think it really matters -- if we see a
> jump to an STT_OBJECT or STT_NOTYPE symbol, why isn't the
> transformation just as valid?
>
> 2. HJ, given an R_X86_64_GOTPCRELX relocation, is it still necessary
> to check the opcode during Scan::global()? Doesn't the relocation
> guarantee that it's an appropriate instruction for the transformation?
> I think in both cases, we could skip fetching the section contents if
> we have this relocation.
>
> 3. This piece of can_convert_mov_to_lea has me a bit puzzled:
>
>     if (parameters->options().shared()
>         && (gsym->visibility() == elfcpp::STV_DEFAULT
>             || gsym->visibility() == elfcpp::STV_PROTECTED)
>         && !parameters->options().Bsymbolic())
>       return false;
>
> We've already tested for is_preemptible, which should take care of all
> of these cases. If I remove this code, however, I get a couple of
> failures that I need to investigate. At the very least, I suspect this
> part of the logic can be simplified. I want to investigate this
> further before committing the whole thing.

This case is needed if the shared object specifies "protected"
visibility using an asm directive instead of the attribute.  In this
case, the compiler prefers to use the GOT.  Here is the relevant code
from the test that fails if this condition is removed:

// The function f2 is used to test that the executable can see the
// same function address for a protected function in the executable
// and in the shared library.  We can't use the visibility attribute
// here, becaues that may cause gcc to generate a PC relative reloc;
// we need it to get the value from the GOT.  I'm not sure this is
// really useful, given that it doesn't work with the visibility
// attribute.  This test exists here mainly because the glibc
// testsuite has the same test, and we want to make sure that gold
// passes the glibc testsuite.

extern "C" int f2();
asm(".protected f2");


Like I said earlier, the condition can be simplified and I have done that.


I am attaching the patch after making all the changes mentioned.
Please take a look.

* x86_64.cc (Lazy_view): New class.
(can_convert_mov_to_lea): Templatize function.  Make the function
check for appropriate relocation types and use the view parameter
to get section contents.
(can_convert_callq_to_direct): New function.
(Target_x86_64<size>::Scan::global): Refactor.
(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
call via GOT that can be converted.
* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
* testsuite/Makefile.in: Regenerate.
* testsuite/x86_64_indirect_call_to_direct1.s: New file.
* testsuite/x86_64_indirect_jump_to_direct1.s: New file.


Thanks
Sri



>
> -cary
-------------- next part --------------
	* x86_64.cc (Lazy_view): New class.
	(can_convert_mov_to_lea): Templatize function.  Make the function
	check for appropriate relocation types and use the view parameter
	to get section contents.
	(can_convert_callq_to_direct): New function.
	(Target_x86_64<size>::Scan::global): Refactor.
	(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
	call via GOT that can be converted.
	* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
	* testsuite/Makefile.in: Regenerate.
	* testsuite/x86_64_indirect_call_to_direct1.s: New file.
	* testsuite/x86_64_indirect_jump_to_direct1.s: New file.


diff --git a/gold/testsuite/Makefile.am b/gold/testsuite/Makefile.am
index 01cae9f..f5cc0db 100644
--- a/gold/testsuite/Makefile.am
+++ b/gold/testsuite/Makefile.am
@@ -1096,6 +1096,25 @@ x86_64_mov_to_lea13.stdout: x86_64_mov_to_lea13
 x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
 	$(TEST_OBJDUMP) -dw $< > $@
 
+check_SCRIPTS += x86_64_indirect_call_to_direct.sh
+check_DATA += x86_64_indirect_call_to_direct1.stdout \
+	x86_64_indirect_jump_to_direct1.stdout
+MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \
+	x86_64_indirect_jump_to_direct1
+
+x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
+	$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
+	gcctestdir/ld -o $@ $<
+x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
+	$(TEST_OBJDUMP) -dw $< > $@
+x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
+	$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
+	gcctestdir/ld -o $@ $<
+x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
+	$(TEST_OBJDUMP) -dw $< > $@
+
 check_SCRIPTS += x86_64_overflow_pc32.sh
 check_DATA += x86_64_overflow_pc32.err
 MOSTLYCLEANFILES += x86_64_overflow_pc32.err
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct.sh b/gold/testsuite/x86_64_indirect_call_to_direct.sh
index e69de29..916e1a3 100755
--- a/gold/testsuite/x86_64_indirect_call_to_direct.sh
+++ b/gold/testsuite/x86_64_indirect_call_to_direct.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# x86_64_indirect_call_to_direct.sh -- a test for indirect call(jump) to direct
+# conversion.
+
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# Written by Sriraman Tallam <tmsriram@google.com>
+
+# This file is part of gold.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+set -e
+
+grep -q "callq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_call_to_direct1.stdout
+grep -q "jmpq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_jump_to_direct1.stdout
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct1.s b/gold/testsuite/x86_64_indirect_call_to_direct1.s
index e69de29..5ca2e38 100644
--- a/gold/testsuite/x86_64_indirect_call_to_direct1.s
+++ b/gold/testsuite/x86_64_indirect_call_to_direct1.s
@@ -0,0 +1,12 @@
+	.text
+	.globl	foo
+	.type	foo, @function
+foo:
+	ret
+	.size	foo, .-foo
+	.globl	main
+	.type	main, @function
+main:
+	call	*foo@GOTPCREL(%rip)
+	ret
+	.size	main, .-main
diff --git a/gold/testsuite/x86_64_indirect_jump_to_direct1.s b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
index e69de29..b817e34 100644
--- a/gold/testsuite/x86_64_indirect_jump_to_direct1.s
+++ b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
@@ -0,0 +1,11 @@
+	.text
+	.globl	foo
+	.type	foo, @function
+foo:
+	ret
+	.size	foo, .-foo
+	.globl	main
+	.type	main, @function
+main:
+	jmp	*foo@GOTPCREL(%rip)
+	.size	main, .-main
diff --git a/gold/x86_64.cc b/gold/x86_64.cc
index 81126ef..85a98da 100644
--- a/gold/x86_64.cc
+++ b/gold/x86_64.cc
@@ -403,6 +403,33 @@ class Output_data_plt_x86_64_standard : public Output_data_plt_x86_64<size>
   static const unsigned char plt_eh_frame_fde[plt_eh_frame_fde_size];
 };
 
+template<int size>
+class Lazy_view
+{
+ public:
+  Lazy_view(Sized_relobj_file<size, false>* object, unsigned int data_shndx)
+    : object_(object), data_shndx_(data_shndx), view_(NULL), view_size_(0)
+  { }
+
+  inline unsigned char
+  operator[](size_t offset)
+  {
+    if (this->view_ == NULL)
+      this->view_ = this->object_->section_contents(this->data_shndx_,
+                                                    &this->view_size_,
+                                                    true);
+    if (offset >= this->view_size_)
+      return 0;
+    return this->view_[offset];
+  }
+ 
+ private:
+  Sized_relobj_file<size, false>* object_;
+  unsigned int data_shndx_;
+  const unsigned char* view_;
+  section_size_type view_size_;
+};
+
 // The x86_64 target class.
 // See the ABI at
 //   http://www.x86-64.org/documentation/abi.pdf
@@ -876,19 +903,62 @@ class Target_x86_64 : public Sized_target<size, false>
   // conversion from
   // mov foo@GOTPCREL(%rip), %reg
   // to lea foo(%rip), %reg.
-  static bool
-  can_convert_mov_to_lea(const Symbol* gsym)
+  template<class View_type>
+  static inline bool
+  can_convert_mov_to_lea(const Symbol* gsym, unsigned int r_type,
+                         size_t r_offset, View_type* view)
+  {
+    gold_assert(gsym != NULL);
+    // We cannot do the conversion unless it's one of these relocations.
+    if (r_type != elfcpp::R_X86_64_GOTPCREL
+        && r_type != elfcpp::R_X86_64_GOTPCRELX
+        && r_type != elfcpp::R_X86_64_REX_GOTPCRELX)
+      return false;
+    // We cannot convert references to IFUNC symbols, or to symbols that
+    // are not local to the current module.
+    if (gsym->type() == elfcpp::STT_GNU_IFUNC
+        || gsym->is_undefined ()
+        || gsym->is_from_dynobj()
+        || gsym->is_preemptible())
+      return false;
+    // If we are building a shared object and the symbol is protected, we may
+    // need to go through the GOT.
+    if (parameters->options().shared()
+        && gsym->visibility() == elfcpp::STV_PROTECTED)
+      return false;
+    // We cannot convert references to the _DYNAMIC symbol.
+    if (strcmp(gsym->name(), "_DYNAMIC") == 0)
+      return false;
+    // Check for a MOV opcode.
+    return (*view)[r_offset - 2] == 0x8b;
+  }
+
+  // Convert
+  // callq *foo@GOTPCRELX(%rip) to
+  // addr32 callq foo
+  // and jmpq *foo@GOTPCRELX(%rip) to
+  // jmpq foo
+  // nop
+  template<class View_type>
+  static inline bool
+  can_convert_callq_to_direct(const Symbol* gsym, unsigned int r_type,
+			      size_t r_offset, View_type* view)
   {
     gold_assert(gsym != NULL);
-    return (gsym->type() != elfcpp::STT_GNU_IFUNC
-	    && !gsym->is_undefined ()
-	    && !gsym->is_from_dynobj()
-	    && !gsym->is_preemptible()
-	    && (!parameters->options().shared()
-		|| (gsym->visibility() != elfcpp::STV_DEFAULT
-		    && gsym->visibility() != elfcpp::STV_PROTECTED)
-		|| parameters->options().Bsymbolic())
-	    && strcmp(gsym->name(), "_DYNAMIC") != 0);
+    // We cannot do the conversion unless it's a GOTPCRELX relocation.
+    if (r_type != elfcpp::R_X86_64_GOTPCRELX)
+      return false;
+    // We cannot convert references to IFUNC symbols, or to symbols that
+    // are not local to the current module.
+    if (gsym->type() == elfcpp::STT_GNU_IFUNC
+        || gsym->is_undefined ()
+        || gsym->is_from_dynobj()
+        || gsym->is_preemptible())
+      return false;
+    // Check for a CALLQ or JMPQ opcode.
+    return ((*view)[r_offset - 2] == 0xff
+            && ((*view)[r_offset - 1] == 0x15
+                || (*view)[r_offset - 1] == 0x25));
   }
 
   // Adjust TLS relocation type based on the options and whether this
@@ -2931,19 +3001,23 @@ Target_x86_64<size>::Scan::global(Symbol_table* symtab,
 	// If we convert this from
 	// mov foo@GOTPCREL(%rip), %reg
 	// to lea foo(%rip), %reg.
+	// OR
+	// if we convert
+	// (callq|jmpq) *foo@GOTPCRELX(%rip) to
+	// (callq|jmpq) foo
 	// in Relocate::relocate, then there is nothing to do here.
-	if ((r_type == elfcpp::R_X86_64_GOTPCREL
-	     || r_type == elfcpp::R_X86_64_GOTPCRELX
-	     || r_type == elfcpp::R_X86_64_REX_GOTPCRELX)
-	    && reloc.get_r_offset() >= 2
-	    && Target_x86_64<size>::can_convert_mov_to_lea(gsym))
-	  {
-	    section_size_type stype;
-	    const unsigned char* view = object->section_contents(data_shndx,
-								 &stype, true);
-	    if (view[reloc.get_r_offset() - 2] == 0x8b)
-	      break;
-	  }
+
+	// If relocation type is R_X86_64_GOTPCRELX it is automatically a
+	// candidate for conversion.
+	if (r_type ==  elfcpp::R_X86_64_GOTPCRELX)
+	  break;
+
+        Lazy_view<size> view(object, data_shndx);
+        size_t r_offset = reloc.get_r_offset();
+        if (r_offset >= 2
+            && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
+                                                           r_offset, &view))
+          break;
 
 	if (gsym->final_value_is_known())
 	  {
@@ -3625,15 +3699,56 @@ Target_x86_64<size>::Relocate::relocate(
       // mov foo@GOTPCREL(%rip), %reg
       // to lea foo(%rip), %reg.
       // if possible.
-      if (rela.get_r_offset() >= 2
-	  && view[-2] == 0x8b
-	  && ((gsym == NULL && !psymval->is_ifunc_symbol())
-	      || (gsym != NULL
-		  && Target_x86_64<size>::can_convert_mov_to_lea(gsym))))
+       if ((gsym == NULL
+             && rela.get_r_offset() >= 2
+             && view[-2] == 0x8b
+             && !psymval->is_ifunc_symbol())
+            || (gsym != NULL
+                && rela.get_r_offset() >= 2
+                && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
+                                                               0, &view)))
 	{
 	  view[-2] = 0x8d;
 	  Reloc_funcs::pcrela32(view, object, psymval, addend, address);
 	}
+      // Convert
+      // callq *foo@GOTPCRELX(%rip) to
+      // addr32 callq foo
+      // and jmpq *foo@GOTPCRELX(%rip) to
+      // jmpq foo
+      // nop
+      else if (gsym != NULL
+	       && rela.get_r_offset() >= 2
+	       && Target_x86_64<size>::can_convert_callq_to_direct(gsym,
+								   r_type,
+								   0, &view))
+	{
+	  if (view[-1] == 0x15)
+	    {
+	      // Convert callq *foo@GOTPCRELX(%rip) to addr32 callq.
+	      // Opcode of addr32 is 0x67 and opcode of direct callq is 0xe8.
+	      view[-2] = 0x67;
+	      view[-1] = 0xe8;
+	      // Convert GOTPCRELX to 32-bit pc relative reloc.
+	      Reloc_funcs::pcrela32(view, object, psymval, addend, address);
+	    }
+	  else
+	    {
+	      // Convert jmpq *foo@GOTPCRELX(%rip) to
+	      // jmpq foo
+	      // nop
+	      // The opcode of direct jmpq is 0xe9.
+	      view[-2] = 0xe9;
+	      // The opcode of nop is 0x90.
+	      view[3] = 0x90;
+	      // Convert GOTPCRELX to 32-bit pc relative reloc.  jmpq is rip
+	      // relative and since the instruction following the jmpq is now
+	      // the nop, offset the address by 1 byte.  The start of the
+              // relocation also moves ahead by 1 byte.
+	      Reloc_funcs::pcrela32(&view[-1], object, psymval, addend,
+				    address - 1);
+	    }
+	}
       else
 	{
 	  if (gsym != NULL)


More information about the Binutils mailing list