This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
[RFC] Improve amd64 prologue analysis
- From: "Pierre Muller" <pierre dot muller at ics-cnrs dot unistra dot fr>
- To: <gdb-patches at sourceware dot org>
- Date: Mon, 15 Nov 2010 18:27:41 +0100
- Subject: [RFC] Improve amd64 prologue analysis
Free Pascal for x86_64 (amd64) CPU generates losts of functions
that do not use RBP as a frame pointer.
I tried to improve amd64-tdep.c to better cope
with this case.
The patch does mainly two things:
1) Fix the subl val,%rsp
(the REX prefix was missing)
2) Add a new function called amd64_analyze_register_saves
that tries to support:
pushq %reg
movq %reg,ofs(%rsp)
and
movq %reg,ofs(%rbp)
It took me a while to understand the generated code,
but I hope that I don't get any false positives
concerning register saves.
The output for a Free Pascal compiled program
greatly improved with that patch.
On the other hand, testing on complier farm
x86_64-unknown-linux-gnu showed no changes in the
testsuite results, which was a disappointment for me...
Comments most welcome.
Pierre Muller
GDB pascal language maintainer
2010-11-15 Pierre Muller <muller@ics.u-strasbg.fr>
* amd64-tdep.c (struct amd64_frame_cache): Add LOCALS field.
(amd64_init_frame_cache): Set LOCALS field to -1.
(amd64_analyze_register_saves): New function.
(amd64_analyze_prologue): Correct analyzis of 'subl val,%rsp'
and call amd64_analyze_register_saves.
Index: src/gdb/amd64-tdep.c
===================================================================
RCS file: /cvs/src/src/gdb/amd64-tdep.c,v
retrieving revision 1.82
diff -u -p -r1.82 amd64-tdep.c
--- src/gdb/amd64-tdep.c 11 Sep 2010 19:09:34 -0000 1.82
+++ src/gdb/amd64-tdep.c 15 Nov 2010 16:55:55 -0000
@@ -1629,6 +1629,7 @@ struct amd64_frame_cache
/* Do we have a frame? */
int frameless_p;
+ long locals;
};
/* Initialize a frame cache. */
@@ -1642,6 +1643,7 @@ amd64_init_frame_cache (struct amd64_fra
cache->base = 0;
cache->sp_offset = -8;
cache->pc = 0;
+ cache->locals = -1;
/* Saved registers. We initialize these to -1 since zero is a valid
offset (that's where %rbp is supposed to be stored).
@@ -1824,6 +1826,123 @@ amd64_analyze_stack_align (CORE_ADDR pc,
return min (pc + offset + 2, current_pc);
}
+/* Check whether PC points at code that saves registers on the stack.
+ If so, it updates CACHE and returns the address of the first
+ instruction after the register saves or CURRENT_PC, whichever is
+ smaller. Otherwise, return PC. */
+
+static CORE_ADDR
+amd64_analyze_register_saves (CORE_ADDR pc, CORE_ADDR current_pc,
+ struct amd64_frame_cache *cache,
+ enum bfd_endian byte_order)
+{
+ CORE_ADDR offset = 0;
+ gdb_byte op, opa[5];
+ int i, loc, regnum, has_rex_prefix;
+ gdb_byte modrm, mod, reg1, reg2;
+
+ if (cache->locals > 0)
+ offset -= cache->locals;
+ /* pushq $reg */
+ for (i = 0; i < 16 && pc < current_pc; i++)
+ {
+ target_read_memory (pc, &op, 1);
+ regnum = 0;
+ has_rex_prefix = 0;
+ /* REX prefix might be used for r8-r15 registers. */
+ if (rex_prefix_p (op))
+ {
+ /* Typically 0x41 will be used.
+ Prefix for r8-r15 registers. */
+ regnum = (op & 1) ? 8 : 0;
+ target_read_memory (pc + 1, &op, 1);
+ has_rex_prefix = 1;
+ }
+ if (op < 0x50 || op > 0x57)
+ break;
+
+ offset -= 8;
+ regnum += op -0x50;
+ if (regnum < AMD64_NUM_SAVED_REGS)
+ cache->saved_regs[amd64_arch_regmap[regnum]] = offset;
+ cache->sp_offset += 8;
+ /* Skip 2 bytes for r8-r15, one otherwise. */
+ if (has_rex_prefix)
+ pc++;
+ pc++;
+ }
+
+ /* movq $reg,offset (%rsp/%rbp)
+ exists in two version, with 1byte or 4byte offset.
+ rbp versions are shorter. */
+ for (i = 0; i < 16 * 4 && pc + 4 < current_pc; i++)
+ {
+ target_read_memory (pc, opa, 5);
+ if ((opa[0] != 0x48 && opa[0] !=0x4c)
+ || opa[1] != 0x89)
+ break;
+ modrm = opa[2];
+ reg1 = modrm & 7;
+ reg2 = (modrm >> 3) & 7;
+ mod = modrm >> 6;
+ if (mod == 3) /* eliminate movq %reg1,%reg2 */
+ break;
+
+ /* %rsp base needs use of SIB byte. */
+ if ((reg1 == 4) /* SIB indicator */
+ && (opa[3] != 0x24))
+ break;
+ if ((reg1 != 5) && (reg1 != 4))
+ break;
+
+ if (opa[0] == 0x4c)
+ regnum = 8;
+ else
+ regnum = 0;
+
+ loc = 0;
+ if (reg1 == 5) /* Indicator of rbp base */
+ {
+ if (mod == 0) /* This is not rbp base. */
+ break;
+ if (mod == 2)
+ loc = read_memory_integer (pc + 3, 4, byte_order);
+ else
+ loc = opa[3];
+ }
+ else if ((reg1 == 4) && (opa[3] == 0x24)) /* rsp base */
+ {
+ /* 4byte flag is MSB of third byte */
+ if (mod == 2)
+ loc = read_memory_integer (pc + 4, 4, byte_order);
+ else if (mod == 1)
+ loc = opa[4];
+ else
+ loc = 0;
+ /* Add rsp to base offset. */
+ loc += offset;
+ pc++;
+ }
+
+ regnum += reg2;
+
+ if (regnum < AMD64_NUM_SAVED_REGS)
+ cache->saved_regs[amd64_arch_regmap[regnum]] = loc;
+
+ /* Advance to next instruction. */
+ if (mod == 2)
+ pc += 7;
+ else if (mod == 1)
+ pc += 4;
+ else
+ pc += 3;
+ }
+
+
+ return pc;
+}
+
+
/* Do a limited analysis of the prologue at PC and update CACHE
accordingly. Bail out early if CURRENT_PC is reached. Return the
address where the analysis stopped.
@@ -1867,13 +1986,45 @@ amd64_analyze_prologue (struct gdbarch *
/* Check for `movq %rsp, %rbp'. */
read_memory (pc + 1, buf, 3);
if (memcmp (buf, proto, 3) != 0)
- return pc + 1;
+ pc++;
+ else
+ {
+ /* OK, we actually have a frame. */
+ cache->frameless_p = 0;
+ pc += 4;
+ }
+ }
- /* OK, we actually have a frame. */
- cache->frameless_p = 0;
- return pc + 4;
+ op = read_memory_unsigned_integer (pc, 1, byte_order);
+
+ /* Check for stack adjustment
+
+ subl $XXX, %rsp
+
+ NOTE: You can't subtract a 16-bit immediate from a 64-bit
+ reg, so we don't have to worry about a data16 prefix.
+ A REX prefix is required for 64bit sub instruction. */
+ if (op == 0x48 && current_pc >= pc + 4) /* subl value,%rsp */
+ {
+ read_memory (pc + 1, buf, 2);
+ /* Pattern for a 4 byte signed value. */
+ if (buf[0] == 0x81 && buf[1] == 0xec)
+ {
+ cache->locals = read_memory_integer (pc + 3, 4, byte_order);
+ cache->sp_offset += cache->locals;
+ pc = pc + 7;
+ }
+ /* Pattern for a 2 byte signed value. */
+ else if (buf[0] == 0x83 && buf[1] == 0xec)
+ {
+ cache->locals = read_memory_integer (pc + 3, 1, byte_order);
+ cache->sp_offset += cache->locals;
+ pc = pc + 4;
+ }
}
+ pc = amd64_analyze_register_saves (pc, current_pc, cache, byte_order);
+
return pc;
}