This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
[PATCH 2/2] Read memory in multiple lines in dcache_xfer_memory.
- From: Yao Qi <yao at codesourcery dot com>
- To: <gdb-patches at sourceware dot org>
- Date: Fri, 18 Oct 2013 11:21:06 +0800
- Subject: [PATCH 2/2] Read memory in multiple lines in dcache_xfer_memory.
- Authentication-results: sourceware.org; auth=none
- References: <1382066466-2551-1-git-send-email-yao at codesourcery dot com>
Hi, this is an optimization to dcache reading contents from target
memory. Nowadays, when GDB requests to read target memory and
requests go through dcache, dcache will read one cache line in on
time, regardless the size of the requested data. If GDB read a large
amount of data from target, dcache will read multiple times from
target memory (read in one cache line per time). In remote debugging,
it means multiple RSP packets to transfer memory from GDBserver, which
is slow.
This patch is to teach dcache to read continuous target memory as much
as possible in one time, and update the multiple cache lines when the
contents are read in. It can be done by several steps:
1. When GDB requests to read data [memaddr, memaddr + len), a
collection of ranges is created to record readable ranges, because
some memory may be marked as write-only.
2. Then, we'll check the cache state of these readable ranges. Some
of them are cached, and some are not. We record the uncached ranges.
3. Iterate the collection of uncached ranges, and issue target_read
to read these uncached ranges from the target memory and update cache
lines. For cached ranges, read from cache lines directly.
I am using a perf test case backtrace to measure the speed-up of this
patch. Every time, 'set dcache line-size N' and
'set dcache size 4096 * 64 / N', to make sure the total size of dcache
is unchanged.
With this patch, the number of 'm' RSP packet is reduced
dramatically:
cache line size: Original Patched
2 4657894 31224
4 2317896 28616
8 158948 21462
16 579474 14308
32 293314 14308
64 150234 14308
128 78694 10738
256 42938 8960
512 25046 8064
1024 16100 7616
2048 9184 7392
Performance comparison:
cache line size Patched Original
backtrace cpu_time 2 4.44 33.83
backtrace cpu_time 4 3.88 14.27
backtrace cpu_time 8 3.1 7.92
backtrace cpu_time 16 2.48 4.79
backtrace cpu_time 32 2.25 2.51
backtrace cpu_time 64 1.16 1.93
backtrace cpu_time 128 1.02 1.69
backtrace cpu_time 256 1.06 1.37
backtrace cpu_time 512 1.11 1.17
backtrace cpu_time 1024 1.1 1.22
backtrace cpu_time 2048 1.13 1.17
backtrace wall_time 2 5.49653506279 74.0839848518
backtrace wall_time 4 4.70916986465 29.94830513
backtrace wall_time 8 4.11279582977 15.6743021011
backtrace wall_time 16 3.68633985519 8.83114910126
backtrace wall_time 32 3.63511800766 5.79059791565
backtrace wall_time 64 1.61371517181 3.67003703117
backtrace wall_time 128 1.50599694252 2.60381913185
backtrace wall_time 256 1.47533297539 2.05611109734
backtrace wall_time 512 1.48193001747 1.80505800247
backtrace wall_time 1024 1.50955080986 1.69646501541
backtrace wall_time 2048 1.54235315323 1.61461496353
backtrace vmsize 2 104568 104576
backtrace vmsize 4 100556 102388
backtrace vmsize 8 95384 97540
backtrace vmsize 16 94092 94092
backtrace vmsize 32 93348 93276
backtrace vmsize 64 93148 92928
backtrace vmsize 128 93148 93100
backtrace vmsize 256 93148 93100
backtrace vmsize 512 93148 93100
backtrace vmsize 1024 93148 93100
backtrace vmsize 2048 93148 93100
gdb:
2013-10-18 Yao Qi <yao@codesourcery.com>
* dcache.c: Include "memrange.h".
Update comments.
(dcache_read_line): Remove.
(dcache_peek_byte): Remove.
(dcache_ranges_readable): New function.
(dcache_ranges_uncached): New function.
(dcache_xfer_memory): Read multiple cache lines from target
memory in one time.
---
gdb/dcache.c | 331 ++++++++++++++++++++++++++++++++++++++++++---------------
1 files changed, 244 insertions(+), 87 deletions(-)
diff --git a/gdb/dcache.c b/gdb/dcache.c
index 316f3dd..65bbad1 100644
--- a/gdb/dcache.c
+++ b/gdb/dcache.c
@@ -25,6 +25,7 @@
#include "target.h"
#include "inferior.h"
#include "splay-tree.h"
+#include "memrange.h"
/* Commands with a prefix of `{set,show} dcache'. */
static struct cmd_list_element *dcache_set_list = NULL;
@@ -60,8 +61,8 @@ static struct cmd_list_element *dcache_show_list = NULL;
/* NOTE: Interaction of dcache and memory region attributes
As there is no requirement that memory region attributes be aligned
- to or be a multiple of the dcache page size, dcache_read_line() and
- dcache_write_line() must break up the page by memory region. If a
+ to or be a multiple of the dcache page size, dcache_xfer_memory must
+ break up the page by memory region. If a
chunk does not have the cache attribute set, an invalid memory type
is set, etc., then the chunk is skipped. Those chunks are handled
in target_xfer_memory() (or target_xfer_memory_partial()).
@@ -122,8 +123,6 @@ typedef void (block_func) (struct dcache_block *block, void *param);
static struct dcache_block *dcache_hit (DCACHE *dcache, CORE_ADDR addr);
-static int dcache_read_line (DCACHE *dcache, struct dcache_block *db);
-
static struct dcache_block *dcache_alloc (DCACHE *dcache, CORE_ADDR addr);
static void dcache_info (char *exp, int tty);
@@ -305,56 +304,6 @@ dcache_hit (DCACHE *dcache, CORE_ADDR addr)
return db;
}
-/* Fill a cache line from target memory.
- The result is 1 for success, 0 if the (entire) cache line
- wasn't readable. */
-
-static int
-dcache_read_line (DCACHE *dcache, struct dcache_block *db)
-{
- CORE_ADDR memaddr;
- gdb_byte *myaddr;
- int len;
- int res;
- int reg_len;
- struct mem_region *region;
-
- len = dcache->line_size;
- memaddr = db->addr;
- myaddr = db->data;
-
- while (len > 0)
- {
- /* Don't overrun if this block is right at the end of the region. */
- region = lookup_mem_region (memaddr);
- if (region->hi == 0 || memaddr + len < region->hi)
- reg_len = len;
- else
- reg_len = region->hi - memaddr;
-
- /* Skip non-readable regions. The cache attribute can be ignored,
- since we may be loading this for a stack access. */
- if (region->attrib.mode == MEM_WO)
- {
- memaddr += reg_len;
- myaddr += reg_len;
- len -= reg_len;
- continue;
- }
-
- res = target_read (¤t_target, TARGET_OBJECT_RAW_MEMORY,
- NULL, myaddr, memaddr, reg_len);
- if (res < reg_len)
- return 0;
-
- memaddr += res;
- myaddr += res;
- len -= res;
- }
-
- return 1;
-}
-
/* Get a free cache block, put or keep it on the valid list,
and return its address. */
@@ -395,28 +344,6 @@ dcache_alloc (DCACHE *dcache, CORE_ADDR addr)
return db;
}
-/* Using the data cache DCACHE, store in *PTR the contents of the byte at
- address ADDR in the remote machine.
-
- Returns 1 for success, 0 for error. */
-
-static int
-dcache_peek_byte (DCACHE *dcache, CORE_ADDR addr, gdb_byte *ptr)
-{
- struct dcache_block *db = dcache_hit (dcache, addr);
-
- if (!db)
- {
- db = dcache_alloc (dcache, addr);
-
- if (!dcache_read_line (dcache, db))
- return 0;
- }
-
- *ptr = db->data[XFORM (dcache, addr)];
- return 1;
-}
-
/* Write the byte at PTR into ADDR in the data cache.
The caller is responsible for also promptly writing the data
@@ -473,6 +400,105 @@ dcache_init (void)
return dcache;
}
+/* Check the readability of memory range [MEMORY, MEMORY + LEN) and
+ return the readable ranges and caller is responsible to release it. */
+
+static VEC(mem_range_s) *
+dcache_ranges_readable (CORE_ADDR memaddr, int len)
+{
+ VEC(mem_range_s) *readable_memory = NULL;
+
+ while (len > 0)
+ {
+ struct mem_range *r;
+ int reg_len;
+ /* Don't overrun if this block is right at the end of the region. */
+ struct mem_region *region = lookup_mem_region (memaddr);
+
+ if (region->hi == 0 || memaddr + len < region->hi)
+ reg_len = len;
+ else
+ reg_len = region->hi - memaddr;
+
+ /* Skip non-readable regions. The cache attribute can be ignored,
+ since we may be loading this for a stack access. */
+ if (region->attrib.mode == MEM_WO)
+ {
+ memaddr += reg_len;
+ len -= reg_len;
+ continue;
+ }
+
+ r = VEC_safe_push (mem_range_s, readable_memory, NULL);
+ r->start = memaddr;
+ r->length = reg_len;
+
+ memaddr += reg_len;
+ len -= reg_len;
+ }
+
+ return readable_memory;
+}
+
+/* Return the uncached ranges from RANGES. */
+
+static VEC(mem_range_s) *
+dcache_ranges_uncached (DCACHE *dcache, VEC(mem_range_s) *ranges)
+{
+ int b;
+ struct mem_range *rb;
+ VEC(mem_range_s) *uncached = NULL;
+
+ for (b = 0; VEC_iterate (mem_range_s, ranges, b, rb); b++)
+ {
+ CORE_ADDR memaddr_start = rb->start;
+ CORE_ADDR memaddr_end = rb->start;
+
+ while (memaddr_end < rb->start + rb->length)
+ {
+ struct dcache_block *db = dcache_hit (dcache, memaddr_end);
+
+ if (db != NULL)
+ {
+ /* Set MEMADDR_END to the start address of this cache line. */
+ memaddr_end = align_down (memaddr_end, dcache->line_size);
+
+ if (memaddr_end > memaddr_start)
+ {
+ struct mem_range *r;
+
+ r = VEC_safe_push (mem_range_s, uncached, NULL);
+ r->start = memaddr_start;
+ r->length = memaddr_end - memaddr_start;
+ }
+ }
+
+ /* Increase memaddr_end to a dcache->line_size-aligned value. */
+ if (memaddr_end < align_up (memaddr_end, dcache->line_size))
+ memaddr_end = align_up (memaddr_end, dcache->line_size);
+ else
+ memaddr_end += dcache->line_size;
+
+ if (db != NULL)
+ memaddr_start = memaddr_end;
+ }
+
+ if (memaddr_end > rb->start + rb->length)
+ memaddr_end = rb->start + rb->length;
+
+ if (memaddr_start < memaddr_end)
+ {
+ struct mem_range *r;
+
+ r = VEC_safe_push (mem_range_s, uncached, NULL);
+
+ r->start = memaddr_start;
+ r->length = memaddr_end - memaddr_start;
+ }
+ }
+
+ return uncached;
+}
/* Read or write LEN bytes from inferior memory at MEMADDR, transferring
to or from debugger address MYADDR. Write to inferior if SHOULD_WRITE is
@@ -489,9 +515,6 @@ dcache_xfer_memory (struct target_ops *ops, DCACHE *dcache,
CORE_ADDR memaddr, gdb_byte *myaddr,
int len, int should_write)
{
- int i;
- int res;
-
/* If this is a different inferior from what we've recorded,
flush the cache. */
@@ -506,8 +529,10 @@ dcache_xfer_memory (struct target_ops *ops, DCACHE *dcache,
if (should_write)
{
- res = target_write (ops, TARGET_OBJECT_RAW_MEMORY,
- NULL, myaddr, memaddr, len);
+ int res = target_write (ops, TARGET_OBJECT_RAW_MEMORY,
+ NULL, myaddr, memaddr, len);
+ int i;
+
if (res <= 0)
return res;
/* Update LEN to what was actually written. */
@@ -527,16 +552,148 @@ dcache_xfer_memory (struct target_ops *ops, DCACHE *dcache,
}
else
{
- for (i = 0; i < len; i++)
+ int i;
+ struct mem_range *r;
+ /* The starting address of each cached range. */
+ CORE_ADDR cached_addr = memaddr;
+
+ VEC(mem_range_s) *memory;
+ VEC(mem_range_s) *uncached = NULL;
+
+ /* Find readable ranges in range [MEMADDR, MEMADDR + LEN),
+ supposing write-only regions are wo1 and wo2. Then,
+ readable ranges are r1, r2 and r3.
+
+ MEMADDR MEMADDR + LEN
+ |<------------------------------------------------->|
+ |<-- wo1 -->| |<-- wo2 -->|
+
+ |<-r1->| |<--r2-->| |<---r3---->| */
+ memory = dcache_ranges_readable (memaddr, len);
+
+ /* GDB will read from these three readable ranges, r1, r2 and r3.
+ GDB has to check the corresponding cache lines' state (cached
+ or uncached) to determine whether to read from the target
+ memory or the cache lines.
+
+ MEMADDR MEMADDR + LEN
+ |<------------------------------------------------->|
+ |<-- wo1 -->| |<-- wo2 -->|
+
+ |<-r1->| |<--r2-->| |<---r3---->|
+
+ -u-|-----c----|-----u----|-----c----|-----c----|--u--
+ 'u' stands for unchaced 'c' stands for cached.
+
+ |u1|-c1-| | u2 |c2| |--c3--| u3 |
+
+ Uncached ranges are u1, u2 and u3, and cached ranges are c1,
+ c2 and c3. */
+ uncached = dcache_ranges_uncached (dcache, memory);
+
+ VEC_free (mem_range_s, memory);
+
+ /* Iterate each uncached range. Read memory from cache lines if
+ memory address is not within the uncached range, otherwise, read
+ from the target memory and update corresponding cache lines. */
+
+ for (i = 0; VEC_iterate (mem_range_s, uncached, i, r); i++)
{
- if (!dcache_peek_byte (dcache, memaddr + i, myaddr + i))
+ int j;
+
+ if (cached_addr < r->start)
{
- /* That failed. Discard its cache line so we don't have a
- partially read line. */
- dcache_invalidate_line (dcache, memaddr + i);
- return i;
+ /* Read memory [cached_addr, MIN (r->start, MEMADDR + LEN))
+ from cache lines. */
+
+ for (; cached_addr < r->start && cached_addr < (memaddr + len);
+ cached_addr++)
+ {
+ struct dcache_block *db = dcache_hit (dcache, cached_addr);
+
+ gdb_assert (db != NULL);
+
+ myaddr[cached_addr - memaddr]
+ = db->data[XFORM (dcache, cached_addr)];
+ }
+ }
+ cached_addr = r->start + r->length;
+
+ /* Part of the memory range [MEMADDR, MEMADDR + LEN) is
+ not cached. */
+ if (r->start < len + memaddr)
+ {
+ /* MEMADDR_START and MEMADDR_END are aligned on
+ dcache->line_size, because dcache->line_size is the
+ minimal unit to update cache and fetch from the target
+ memory. */
+ CORE_ADDR memaddr_start
+ = align_down (r->start, dcache->line_size);
+ CORE_ADDR memaddr_end
+ = align_up (r->start + r->length, dcache->line_size);
+ int res;
+ int len1 = memaddr_end - memaddr_start;
+ int len2;
+ gdb_byte *buf = xmalloc (len1);
+
+ /* Read multiple cache lines to cover memory range
+ [r->start, r->start + MIN (r->length,
+ LEN + MEMADDR - r->start)) from target. */
+
+ res = target_read (¤t_target, TARGET_OBJECT_RAW_MEMORY,
+ NULL, buf, memaddr_start, len1);
+
+ if (res == -1)
+ {
+ VEC_free (mem_range_s, uncached);
+ xfree (buf);
+ return r->start - memaddr;
+ }
+
+ /* Copy contents to MYADDR. */
+ len2 = r->length;
+ if (len2 > len + memaddr - r->start)
+ len2 = len + memaddr - r->start;
+
+ memcpy ((r->start - memaddr) + myaddr,
+ buf + (r->start - memaddr_start),
+ len2);
+
+ /* Update cache lines in range
+ [MEMADDR_START, MEMADDR_START + LEN1). */
+ for (j = 0; j < (len1 / dcache->line_size); j++)
+ {
+ struct dcache_block *db
+ = dcache_hit (dcache, memaddr_start + j * dcache->line_size);
+
+ gdb_assert (db == NULL);
+
+ db = dcache_alloc (dcache, memaddr_start + j * dcache->line_size);
+
+ memcpy (db->data, &buf[j * dcache->line_size], dcache->line_size);
+ }
+
+ xfree (buf);
+
+ if (res < len1)
+ {
+ VEC_free (mem_range_s, uncached);
+ return r->start - memaddr + res;
+ }
}
}
+
+ VEC_free (mem_range_s, uncached);
+
+ for (; cached_addr < (memaddr + len); cached_addr++)
+ {
+ struct dcache_block *db = dcache_hit (dcache, cached_addr);
+
+ gdb_assert (db != NULL);
+
+ myaddr[cached_addr - memaddr]
+ = db->data[XFORM (dcache, cached_addr)];
+ }
}
return len;
--
1.7.7.6