[PATCH, RFC v2 8/8] libc: xtensa: fix PSRAM cache bug
Alexey Lapshin
alexey.lapshin@espressif.com
Mon May 15 13:55:35 GMT 2023
newlib:
* libc/machine/xtensa/Makefile.am: add PSRAM_FIX flag to AM_CCASFLAGS
* libc/machine/xtensa/Makefile.in: Likewise.
* libc/machine/xtensa/memcpy.S: add PSRAM_FIX workaround
* libc/machine/xtensa/memset.S: Likewise.
* libc/machine/xtensa/strcpy.S: Likewise.
* libc/machine/xtensa/strncpy.S: Likewise.
---
newlib/libc/machine/xtensa/Makefile.am | 3 +-
newlib/libc/machine/xtensa/Makefile.in | 3 +-
newlib/libc/machine/xtensa/memcpy.S | 54 ++++++++++++++++++++++++++
newlib/libc/machine/xtensa/memset.S | 23 +++++++++++
newlib/libc/machine/xtensa/strcpy.S | 39 +++++++++++++++++++
newlib/libc/machine/xtensa/strncpy.S | 20 +++++++++-
6 files changed, 138 insertions(+), 4 deletions(-)
---
newlib/libc/machine/xtensa/Makefile.am | 3 +-
newlib/libc/machine/xtensa/Makefile.in | 3 +-
newlib/libc/machine/xtensa/memcpy.S | 54 ++++++++++++++++++++++++++
newlib/libc/machine/xtensa/memset.S | 23 +++++++++++
newlib/libc/machine/xtensa/strcpy.S | 39 +++++++++++++++++++
newlib/libc/machine/xtensa/strncpy.S | 20 +++++++++-
6 files changed, 138 insertions(+), 4 deletions(-)
diff --git a/newlib/libc/machine/xtensa/Makefile.am b/newlib/libc/machine/xtensa/Makefile.am
index 9307cd871..16f593523 100644
--- a/newlib/libc/machine/xtensa/Makefile.am
+++ b/newlib/libc/machine/xtensa/Makefile.am
@@ -3,8 +3,9 @@
AUTOMAKE_OPTIONS = cygnus
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
+PSRAM_FLAG = $(if $(filter -mfix-esp32-psram-cache-issue,$(CCAS) $(INCLUDES)),-DPSRAM_FIX=1,)
-AM_CCASFLAGS = $(INCLUDES)
+AM_CCASFLAGS = $(INCLUDES) $(PSRAM_FLAG)
noinst_LIBRARIES = lib.a
diff --git a/newlib/libc/machine/xtensa/Makefile.in b/newlib/libc/machine/xtensa/Makefile.in
index ef546441b..4bf06cb3c 100644
--- a/newlib/libc/machine/xtensa/Makefile.in
+++ b/newlib/libc/machine/xtensa/Makefile.in
@@ -173,7 +173,8 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = cygnus
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
-AM_CCASFLAGS = $(INCLUDES)
+PSRAM_FLAG = $(if $(filter -mfix-esp32-psram-cache-issue,$(CCAS) $(INCLUDES)),-DPSRAM_FIX=1,)
+AM_CCASFLAGS = $(INCLUDES) $(PSRAM_FLAG)
noinst_LIBRARIES = lib.a
lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S strcpy.S strncpy.S strlen.S
lib_a_CCASFLAGS = $(AM_CCASFLAGS)
diff --git a/newlib/libc/machine/xtensa/memcpy.S b/newlib/libc/machine/xtensa/memcpy.S
index 7cc8ed0fb..eefa64324 100644
--- a/newlib/libc/machine/xtensa/memcpy.S
+++ b/newlib/libc/machine/xtensa/memcpy.S
@@ -72,8 +72,16 @@ __memcpy_aux:
#endif
1: l8ui a6, a3, 0
addi a3, a3, 1
+#ifdef PSRAM_FIX
+ nop
+ nop
+ nop
+#endif
s8i a6, a5, 0
addi a5, a5, 1
+#ifdef PSRAM_FIX
+ memw
+#endif
#if !XCHAL_HAVE_LOOPS
bltu a3, a7, 1b
#endif
@@ -93,6 +101,9 @@ __memcpy_aux:
addi a3, a3, 1
addi a4, a4, -1
s8i a6, a5, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
addi a5, a5, 1
/* Return to main algorithm if dst is now aligned. */
@@ -110,6 +121,9 @@ __memcpy_aux:
addi a4, a4, -2
s8i a6, a5, 0
s8i a7, a5, 1
+#ifdef PSRAM_FIX
+ memw
+#endif
addi a5, a5, 2
/* dst is now aligned; return to main algorithm. */
@@ -143,6 +157,9 @@ memcpy:
slli a8, a7, 4
add a8, a8, a3 // a8 = end of last 16B source chunk
#endif
+
+#ifndef PSRAM_FIX
+
1: l32i a6, a3, 0
l32i a7, a3, 4
s32i a6, a5, 0
@@ -153,6 +170,25 @@ memcpy:
addi a3, a3, 16
s32i a7, a5, 12
addi a5, a5, 16
+
+#else
+1: l32i a6, a3, 0
+ l32i a7, a3, 4
+ s32i a6, a5, 0
+ s32i a7, a5, 4
+ memw
+ l32i a6, a3, 8
+ l32i a7, a3, 12
+ s32i a6, a5, 8
+ s32i a7, a5, 12
+ memw
+
+ addi a3, a3, 16
+ addi a5, a5, 16
+
+#endif
+
+
#if !XCHAL_HAVE_LOOPS
bltu a3, a8, 1b
#endif
@@ -171,6 +207,9 @@ memcpy:
3: bbsi.l a4, 2, 4f
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
.align 4
@@ -181,6 +220,9 @@ memcpy:
addi a5, a5, 4
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
/* Copy 2 bytes. */
@@ -189,6 +231,9 @@ memcpy:
s16i a6, a5, 0
addi a5, a5, 2
bbsi.l a4, 0, 6f
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
/* Copy 1 byte. */
@@ -196,6 +241,9 @@ memcpy:
s8i a6, a5, 0
.Ldone:
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
@@ -277,11 +325,17 @@ memcpy:
s8i a7, a5, 1
addi a5, a5, 2
bbsi.l a4, 0, 6f
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
/* Copy 1 byte. */
6: l8ui a6, a3, 0
s8i a6, a5, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
.end schedule
diff --git a/newlib/libc/machine/xtensa/memset.S b/newlib/libc/machine/xtensa/memset.S
index dbfbe1a0c..a4480d5e7 100644
--- a/newlib/libc/machine/xtensa/memset.S
+++ b/newlib/libc/machine/xtensa/memset.S
@@ -59,6 +59,9 @@ __memset_aux:
add a6, a5, a4 // a6 = ending address
#endif
1: s8i a3, a5, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
bltu a5, a6, 1b
@@ -79,6 +82,9 @@ __memset_aux:
s8i a3, a5, 0
addi a5, a5, 1
addi a4, a4, -1
+#ifdef PSRAM_FIX
+ memw
+#endif
/* Now retest if dst is aligned. */
_bbci.l a5, 1, .Ldstaligned
@@ -92,6 +98,9 @@ __memset_aux:
s16i a3, a5, 0
addi a5, a5, 2
addi a4, a4, -2
+#ifdef PSRAM_FIX
+ memw
+#endif
/* dst is now aligned; return to main algorithm */
j .Ldstaligned
@@ -121,6 +130,14 @@ memset:
/* Get number of loop iterations with 16B per iteration. */
srli a7, a4, 4
+#ifdef PSRAM_FIX
+ //do not do this if we have less than one iteration to do
+ beqz a7, 2f
+ //this seems to work to prefetch the cache line
+ s32i a3, a5, 0
+ nop
+#endif
+
/* Destination is word-aligned. */
#if XCHAL_HAVE_LOOPS
loopnez a7, 2f
@@ -158,11 +175,17 @@ memset:
/* Set 2 bytes. */
s16i a3, a5, 0
addi a5, a5, 2
+#ifdef PSRAM_FIX
+ memw
+#endif
5: bbci.l a4, 0, 6f
/* Set 1 byte. */
s8i a3, a5, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
6: leaf_return
.end schedule
diff --git a/newlib/libc/machine/xtensa/strcpy.S b/newlib/libc/machine/xtensa/strcpy.S
index 167aa9e08..a4e07e3ac 100644
--- a/newlib/libc/machine/xtensa/strcpy.S
+++ b/newlib/libc/machine/xtensa/strcpy.S
@@ -52,6 +52,9 @@ strcpy:
l8ui a8, a3, 0 // get byte 0
addi a3, a3, 1 // advance src pointer
s8i a8, a10, 0 // store byte 0
+#ifdef PSRAM_FIX
+ memw
+#endif
beqz a8, 1f // if byte 0 is zero
addi a10, a10, 1 // advance dst pointer
bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned
@@ -60,11 +63,17 @@ strcpy:
l8ui a8, a3, 0 // get byte 0
/* 1-cycle interlock */
s8i a8, a10, 0 // store byte 0
+#ifdef PSRAM_FIX
+ memw
+#endif
beqz a8, 1f // if byte 0 is zero
l8ui a8, a3, 1 // get byte 0
addi a3, a3, 2 // advance src pointer
s8i a8, a10, 1 // store byte 0
addi a10, a10, 2 // advance dst pointer
+#ifdef PSRAM_FIX
+ memw
+#endif
bnez a8, .Lsrcaligned
1: leaf_return
@@ -93,6 +102,10 @@ strcpy:
bnone a8, a5, .Lz1 // if byte 1 is zero
bnone a8, a6, .Lz2 // if byte 2 is zero
s32i a8, a10, 0 // store word to dst
+#ifdef PSRAM_FIX
+ l32i a8, a10, 0
+ s32i a8, a10, 0
+#endif
bnone a8, a7, .Lz3 // if byte 3 is zero
addi a10, a10, 4 // advance dst pointer
@@ -106,6 +119,11 @@ strcpy:
bnone a8, a5, .Lz1 // if byte 1 is zero
bnone a8, a6, .Lz2 // if byte 2 is zero
s32i a8, a10, 0 // store word to dst
+#ifdef PSRAM_FIX
+ l32i a8, a10, 0
+ s32i a8, a10, 0
+#endif
+
bany a8, a7, 1b // if byte 3 is zero
#endif /* !XCHAL_HAVE_LOOPS */
@@ -117,6 +135,9 @@ strcpy:
movi a8, 0
#endif
s8i a8, a10, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
.Lz1: /* Byte 1 is zero. */
@@ -124,6 +145,9 @@ strcpy:
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
.Lz2: /* Byte 2 is zero. */
@@ -133,6 +157,9 @@ strcpy:
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
#if 1
@@ -162,6 +189,9 @@ strcpy:
addi a3, a3, 1
s8i a8, a10, 0
addi a10, a10, 1
+#ifdef PSRAM_FIX
+ memw
+#endif
#if XCHAL_HAVE_LOOPS
beqz a8, 2f
#else
@@ -214,6 +244,9 @@ strcpy:
movi a8, 0
#endif
s8i a8, a10, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
.Lu1: /* Byte 1 is zero. */
@@ -221,12 +254,18 @@ strcpy:
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
.Lu2: /* Byte 2 is zero. */
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
#endif /* 0 */
diff --git a/newlib/libc/machine/xtensa/strncpy.S b/newlib/libc/machine/xtensa/strncpy.S
index dc9363c2a..55f57d267 100644
--- a/newlib/libc/machine/xtensa/strncpy.S
+++ b/newlib/libc/machine/xtensa/strncpy.S
@@ -56,6 +56,9 @@ __strncpy_aux:
j .Lfill
.Lret:
+#ifdef PSRAM_FIX
+ memw
+#endif
leaf_return
@@ -122,7 +125,11 @@ strncpy:
addi a10, a10, 1
bnez a4, .Lfillcleanup
-2: leaf_return
+2:
+#ifdef PSRAM_FIX
+ memw
+#endif
+ leaf_return
.Lfill1mod2: // dst address is odd
s8i a9, a10, 0 // store byte 0
@@ -241,6 +248,11 @@ strncpy:
#endif
1: l8ui a8, a3, 0
addi a3, a3, 1
+#ifdef PSRAM_FIX
+ nop
+ nop
+ nop
+#endif
s8i a8, a10, 0
addi a4, a4, -1
beqz a4, 3f
@@ -252,7 +264,11 @@ strncpy:
#endif
2: j .Lfill
-3: leaf_return
+3:
+#ifdef PSRAM_FIX
+ memw
+#endif
+ leaf_return
.end schedule
.size strncpy, . - strncpy
--
2.34.1
More information about the Newlib
mailing list