[review] jit: enhance test suite

Simon Marchi (Code Review) gerrit@gnutoolchain-gerrit.osci.io
Wed Feb 5 03:27:00 GMT 2020


Simon Marchi has posted comments on this change.

Change URL: https://gnutoolchain-gerrit.osci.io/r/c/binutils-gdb/+/757
......................................................................


Patch Set 1:

(7 comments)

As mentioned in my commit in jit-elf.h, I don't think this is a very good way to go.  I'm afraid it will lead to a very fragile test.

What about this: the -Ttext-segment=org linker option allows us to define the base of the text segment of the generated binary.  We could use that to generate a binary whose text is specifically made to live at a certain address (say, 0x800000), and the generated DWARF information will automatically be generated based on that address.

I checked ld.bfd (GNU ld), ld.gold and ld.lld, and they all support this option, so it would be a relatively portable solution, I think.

| --- /dev/null
| +++ gdb/testsuite/gdb.base/jit-elf.h
| @@ -1,0 +39,19 @@ #else
| +#define WORDSIZE 32
| +#endif /* _LP64 || __LP64__  */
| +#define ElfW(type) _ElfW (Elf, WORDSIZE, type)
| +#define _ElfW(e, w, t) _ElfW_1 (e, w, _##t)
| +#define _ElfW_1(e, w, t) e##w##t
| +#endif /* !ElfW  */
| +
| +/* Update section addresses to use `addr` as a base.
| +   If `rename_num` is >= 0, look into string tables for entry
| +   "jit_function_XXXX" and update it to use the supplied number. */

PS1, Line 48:

We use two spaces after periods (even those at the end of the
comment).

| +static void
| +update_locations (ElfW (Addr) addr, int rename_num)
| +{
| +  const ElfW (Ehdr) *const ehdr = (ElfW (Ehdr) *) addr;
| +  ElfW (Shdr) *const shdr = (ElfW (Shdr) *) ((char *) addr + ehdr->e_shoff);
| +  ElfW (Phdr) *const phdr = (ElfW (Phdr) *) ((char *) addr + ehdr->e_phoff);
| +
| +  /* Update .p_vaddr and .sh_addr as if the code was JITted to ADDR.  */
| +

 ...

| @@ -1,0 +78,19 @@ update_locations (ElfW (Addr) addr, int rename_num)
| +
| +      if (shdr[i].sh_flags & SHF_ALLOC)
| +	shdr[i].sh_addr += addr;
| +    }
| +}
| +
| +/* Find symbol with the name `sym_name`, relocate it to
| +   use `addr` as a base and update all mentions to use the
| +   new address. */
| +static ElfW (Addr) load_symbol (ElfW (Addr) addr, const char *sym_name)

PS1, Line 87:

The function name should be at column 0, on the next line.

| +{
| +  const ElfW (Ehdr) *const ehdr = (ElfW (Ehdr) *) addr;
| +  ElfW (Shdr) *const shdr = (ElfW (Shdr) *) ((char *) addr + ehdr->e_shoff);
| +
| +  ElfW (Addr) sym_old_addr = 0;
| +  ElfW (Addr) sym_new_addr = 0;
| +
| +  /* Find `func_name` in symbol_table and adjust it from the addr */
| +

 ...

| @@ -1,0 +98,19 @@ static ElfW (Addr) load_symbol (ElfW (Addr) addr, const char *sym_name)
| +    {
| +      if (shdr[i].sh_type == SHT_SYMTAB)
| +	{
| +	  ElfW (Sym) *symtab = (ElfW (Sym) *) (addr + shdr[i].sh_offset);
| +	  ElfW (Sym) *symtab_end
| +	      = (ElfW (Sym) *) (addr + shdr[i].sh_offset + shdr[i].sh_size);
| +	  char *const strtab
| +	      = (char *) (addr + shdr[shdr[i].sh_link].sh_offset);
| +
| +	  for (ElfW (Sym) *p = symtab; p < symtab_end; p += 1)

PS1, Line 107:

p++?

| +	    {
| +	      const char *s = strtab + p->st_name;
| +	      if (strcmp (s, sym_name) == 0)
| +		{
| +		  sym_old_addr = p->st_value;
| +		  p->st_value += addr;
| +		  sym_new_addr = p->st_value;
| +		  break;
| +		}

 ...

| @@ -1,0 +122,28 @@ static ElfW (Addr) load_symbol (ElfW (Addr) addr, const char *sym_name)
| +    }
| +
| +  if (sym_new_addr == 0)
| +    {
| +      fprintf (stderr, "symbol '%s' not found\n", sym_name);
| +      exit (1);
| +    }
| +
| +  /* Find all mentions of `func_name` old address in debug sections and adjust
| +   * values */

PS1, Line 131:

End with a period (and two spaces).

| +
| +  for (int i = 0; i < ehdr->e_shnum; ++i)
| +    {
| +      if (shdr[i].sh_type == SHT_PROGBITS)
| +	{
| +	  size_t *start = (size_t *) (addr + shdr[i].sh_offset);
| +	  size_t *end
| +	      = (size_t *) (addr + shdr[i].sh_offset + shdr[i].sh_size);
| +	  for (size_t *p = start; p < end; ++p)

PS1, Line 140:

I'm not super comfortable with this.  On one hand, I understand that
it would be unthinkable to actually parse and modify the DWARF.
However, I'm afraid that blindly updating bytes like this may lead to
false positives and false negatives, leading to test instability
(remember that this runs on many architectures).

For example, we might update bytes that happen to match the address
but aren't actually the address.  Also, in DWARF, numbers aren't
always expressed directly as 4 or 8 consecutive bytes.  For example,
there's LEB128 (not sure if that is actually used to represent
addresses though).  Also, I don't think that the addresses you'll want
to replace will always end up aligned on a multiple of `size_t`, which
this code seems to assume.

| +	    if (*p == (size_t) sym_old_addr)
| +	      *p = (size_t) sym_new_addr;
| +	}
| +    }
| +
| +  return sym_new_addr;
| +}
| +
| +/* Open an elf binary file and memory map it
| --- gdb/testsuite/gdb.base/jit-main.c
| +++ gdb/testsuite/gdb.base/jit-main.c
| @@ -184,10 +78,11 @@ MAIN (int argc, char *argv[])
| -      struct jit_code_entry *const entry = calloc (1, sizeof (*entry));
| -
| -      if (addr == MAP_FAILED)
| -	{
| -	  fprintf (stderr, "mmap: %s\n", strerror (errno));
| -	  exit (1);
| -	}
| -
| +      size_t obj_size;
| +      ElfW (Addr) addr = load_elf (libname, &obj_size);

PS1, Line 79:

This is missing an argument:

 jit-main.c:79:26: error: too few arguments to function ‘load_elf’
    79 |       ElfW (Addr) addr = load_elf (libname, &obj_size);
       |                          ^~~~~~~~

|        update_locations (addr, i);
|  
| +      char name[32];
| +      sprintf (name, "jit_function_%04d", i);
| +      int (*jit_function) () = (int (*) ()) load_symbol (addr, name);
| +
| +      struct jit_code_entry *const entry
| +	  = (struct jit_code_entry *) calloc (1, sizeof (*entry));
| +

 ...

| @@ -200,13 +95,19 @@ MAIN (int argc, char *argv[])
|        if (entry->prev_entry != NULL)
|  	entry->prev_entry->next_entry = entry;
|        else
|  	__jit_debug_descriptor.first_entry = entry;
|  
|        /* Notify GDB.  */
|        __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
|        __jit_debug_register_code ();
| +
| +      if (jit_function() != 42)

PS1, Line 104:

Missing space before the parentheses.

| +	{
| +	  fprintf (stderr, "unexpected return value\n");
| +	  exit (1);
| +	}
|      }
|  
|    WAIT_FOR_GDB; i = 0;  /* gdb break here 1 */
|  
|    /* Now unregister them all in reverse order.  */

-- 
Gerrit-Project: binutils-gdb
Gerrit-Branch: master
Gerrit-Change-Id: I5f9d56e8eaaa7cf88881eda6a92a72962dd3f066
Gerrit-Change-Number: 757
Gerrit-PatchSet: 1
Gerrit-Owner: Mihails Strasuns <mihails.strasuns@intel.com>
Gerrit-Reviewer: Mihails Strasuns <mihails.strasuns@intel.com>
Gerrit-Reviewer: Simon Marchi <simon.marchi@polymtl.ca>
Gerrit-Comment-Date: Wed, 05 Feb 2020 03:27:34 +0000
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Gerrit-MessageType: comment



More information about the Gdb-patches mailing list