This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFC: Improving the disassembly of relro binaries.


Hi Guys,

  I have been working on a patch to fix a problem reported by a Fedora
  user about objdump's less than helpful output when disassembling a
  dynamic executable compiled with -z relro enabled.  (1370275 in case
  anyone is interested).  The problem can be demonstrated with a small
  test case:

  % cat main.c
  #include <stdio.h>
  int main(void) { printf("Hello World\n"); return 0; }

  % gcc -g -DPIE -fPIE main.c
  % objdump -d a.out
  [...]
  0000000000400526 <main>:
    400526:	55                   	push   %rbp
    400527:	48 89 e5             	mov    %rsp,%rbp
    40052a:	bf d0 05 40 00       	mov    $0x4005d0,%edi
    40052f:	e8 cc fe ff ff       	callq  400400 <printf@plt>
  [...]

  % gcc -g -DPIE -fPIE -Wl,-z,relro -Wl,-z,now
  % objdump -d a.out
  [...]
  0000000000400516 <main>:
    400516:	55                   	push   %rbp
    400517:	48 89 e5             	mov    %rsp,%rbp
    40051a:	48 8d 3d 9f 00 00 00 	lea    0x9f(%rip),%rdi        # 4005c0 <__dso_handle+0x8>
    400521:	e8 da fe ff ff       	callq  400400 <_init+0x30>
  [...]

  Note how in the first objdump the call instruction is annotated with
  function name being called whereas in the second invocation the call
  goes to what appears to be a random address.

  I have developed a patch (attached) to change this behaviour so that
  objdump now shows:

  0000000000400516 <main>:
    400516:	55                   	push   %rbp
    400517:	48 89 e5             	mov    %rsp,%rbp
    40051a:	48 8d 3d 9f 00 00 00 	lea    0x9f(%rip),%rdi        # 4005c0 <__dso_handle+0x8>
    400521:	b8 00 00 00 00       	mov    $0x0,%eax
    400526:	e8 d5 fe ff ff       	callq  400400 <.plt.got>

  Which is slightly more helpful.  Plus the patch also now arranges for
  the PLT to be disassembled like this:

  0000000000400400 <.plt.got>:
    400400:	ff 25 e2 0b 20 00    	jmpq   *0x200be2(%rip)        # 600fe8 <printf@GLIBC_2.2.5 ?>
    400406:	66 90                	xchg   %ax,%ax
    400408:	ff 25 e2 0b 20 00    	jmpq   *0x200be2(%rip)        # 600ff0 <__libc_start_main@GLIBC_2.2.5 ?>
    40040e:	66 90                	xchg   %ax,%ax
    400410:	ff 25 e2 0b 20 00    	jmpq   *0x200be2(%rip)        # 600ff8 <__gmon_start__ ?>
    400416:	66 90                	xchg   %ax,%ax

  So the user can see that the first entry in the .plt.got section is a
  jump to the printf function.

  I added the ? character at the end of the interpreted addresses in
  order to indicate that these symbols are undefined and in theory they
  could remain unresolved or even replaced with some other function.

  Rather than just go ahead and apply this patch however, I thought that
  I would ask you guys first if you had any thoughts or suggestions on
  the enhancement.  Especially given that it changes objdump's output.
  So - any comments ?

Cheers
  Nick

Bug report: https://bugzilla.redhat.com/show_bug.cgi?id=1370275

Here is the body of the proposed patch.  The real patch is actually
quite a lot larger as there are a lot of linker tests that need to be
tweaked to match the new output from objdump.

diff --git a/binutils/objdump.c b/binutils/objdump.c
index 2d2bddb..1da05c7 100644
--- a/binutils/objdump.c
+++ b/binutils/objdump.c
@@ -615,6 +615,18 @@ slurp_dynamic_symtab (bfd *abfd)
   return sy;
 }
 
+/* Some symbol names are significant and should be kept in the
+   table of sorted symbol names, even if they are marked as
+   debugging/section symbols.  */
+
+static bfd_boolean
+is_significant_symbol_name (const char * name)
+{
+  return strcmp (name, ".plt") == 0
+    ||   strcmp (name, ".got") == 0
+    ||   strcmp (name, ".plt.got") == 0;
+}
+
 /* Filter out (in place) symbols that are useless for disassembly.
    COUNT is the number of elements in SYMBOLS.
    Return the number of useful symbols.  */
@@ -630,7 +642,8 @@ remove_useless_symbols (asymbol **symbols, long count)
 
       if (sym->name == NULL || sym->name[0] == '\0')
 	continue;
-      if (sym->flags & (BSF_DEBUGGING | BSF_SECTION_SYM))
+      if ((sym->flags & (BSF_DEBUGGING | BSF_SECTION_SYM))
+	  && ! is_significant_symbol_name (sym->name))
 	continue;
       if (bfd_is_und_section (sym->section)
 	  || bfd_is_com_section (sym->section))
@@ -1003,6 +1016,33 @@ find_symbol_for_address (bfd_vma vma,
 	return NULL;
     }
 
+  if (!want_section
+      && sorted_syms[thisplace]->value != vma
+      && ((struct objdump_disasm_info *) (inf->application_data))->dynrelbuf != NULL)
+    {
+      arelent **  rel_pp;
+      long        rel_count;
+
+      /* Possibly there is a dynamic reloc for the address we seek.  */
+      for (rel_count = ((struct objdump_disasm_info *) (inf->application_data))->dynrelcount,
+	     rel_pp = ((struct objdump_disasm_info *) (inf->application_data))->dynrelbuf;
+	   rel_count--;)
+	{
+	  if (rel_pp[rel_count]->address == vma
+	      && rel_pp[rel_count]->sym_ptr_ptr != NULL)
+	    {
+	      if (place != NULL)
+		* place = sorted_symcount;
+	      return * rel_pp[rel_count]->sym_ptr_ptr;
+	    }
+
+	  /* We are scanning backwards, so if we go below the target address
+	     we have failed.  */
+	  if (rel_pp[rel_count]->address < vma)
+	    break;
+	}
+    }
+
   if (place != NULL)
     *place = thisplace;
 
@@ -1041,7 +1081,15 @@ objdump_print_addr_with_sym (bfd *abfd, asection *sec, asymbol *sym,
     {
       (*inf->fprintf_func) (inf->stream, " <");
       objdump_print_symname (abfd, inf, sym);
-      if (bfd_asymbol_value (sym) > vma)
+      /* Undefined/common symbols do not have a value/address associated
+	 with them, so it does not make sense to display an offset relative
+	 to such a symbol.  Normally we would not be provided with this
+	 kind of symbol, but the target backend might choose to do so, and
+	 the code in find_symbol_for_address might return a symbol associated
+	 with a dynamic reloc.  */
+      if (bfd_is_und_section (sym->section) || bfd_is_com_section (sym->section))
+	(*inf->fprintf_func) (inf->stream, " ?");
+      else if (bfd_asymbol_value (sym) > vma)
 	{
 	  (*inf->fprintf_func) (inf->stream, "-0x");
 	  objdump_print_value (bfd_asymbol_value (sym) - vma, inf, TRUE);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]