[PATCH, RFC v2 8/8] libc: xtensa: fix PSRAM cache bug

Alexey Lapshin alexey.lapshin@espressif.com
Mon May 15 13:55:35 GMT 2023


newlib:
        * libc/machine/xtensa/Makefile.am: add PSRAM_FIX flag to AM_CCASFLAGS
        * libc/machine/xtensa/Makefile.in: Likewise.
        * libc/machine/xtensa/memcpy.S: add PSRAM_FIX workaround
        * libc/machine/xtensa/memset.S: Likewise.
        * libc/machine/xtensa/strcpy.S: Likewise.
        * libc/machine/xtensa/strncpy.S: Likewise.
---
 newlib/libc/machine/xtensa/Makefile.am |  3 +-
 newlib/libc/machine/xtensa/Makefile.in |  3 +-
 newlib/libc/machine/xtensa/memcpy.S    | 54 ++++++++++++++++++++++++++
 newlib/libc/machine/xtensa/memset.S    | 23 +++++++++++
 newlib/libc/machine/xtensa/strcpy.S    | 39 +++++++++++++++++++
 newlib/libc/machine/xtensa/strncpy.S   | 20 +++++++++-
 6 files changed, 138 insertions(+), 4 deletions(-)
---
 newlib/libc/machine/xtensa/Makefile.am |  3 +-
 newlib/libc/machine/xtensa/Makefile.in |  3 +-
 newlib/libc/machine/xtensa/memcpy.S    | 54 ++++++++++++++++++++++++++
 newlib/libc/machine/xtensa/memset.S    | 23 +++++++++++
 newlib/libc/machine/xtensa/strcpy.S    | 39 +++++++++++++++++++
 newlib/libc/machine/xtensa/strncpy.S   | 20 +++++++++-
 6 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/newlib/libc/machine/xtensa/Makefile.am b/newlib/libc/machine/xtensa/Makefile.am
index 9307cd871..16f593523 100644
--- a/newlib/libc/machine/xtensa/Makefile.am
+++ b/newlib/libc/machine/xtensa/Makefile.am
@@ -3,8 +3,9 @@
 AUTOMAKE_OPTIONS = cygnus
 
 INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
+PSRAM_FLAG = $(if $(filter -mfix-esp32-psram-cache-issue,$(CCAS) $(INCLUDES)),-DPSRAM_FIX=1,)
 
-AM_CCASFLAGS = $(INCLUDES)
+AM_CCASFLAGS = $(INCLUDES) $(PSRAM_FLAG)
 
 noinst_LIBRARIES = lib.a
 
diff --git a/newlib/libc/machine/xtensa/Makefile.in b/newlib/libc/machine/xtensa/Makefile.in
index ef546441b..4bf06cb3c 100644
--- a/newlib/libc/machine/xtensa/Makefile.in
+++ b/newlib/libc/machine/xtensa/Makefile.in
@@ -173,7 +173,8 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 AUTOMAKE_OPTIONS = cygnus
 INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
-AM_CCASFLAGS = $(INCLUDES)
+PSRAM_FLAG = $(if $(filter -mfix-esp32-psram-cache-issue,$(CCAS) $(INCLUDES)),-DPSRAM_FIX=1,)
+AM_CCASFLAGS = $(INCLUDES) $(PSRAM_FLAG)
 noinst_LIBRARIES = lib.a
 lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S strcpy.S strncpy.S strlen.S
 lib_a_CCASFLAGS = $(AM_CCASFLAGS)
diff --git a/newlib/libc/machine/xtensa/memcpy.S b/newlib/libc/machine/xtensa/memcpy.S
index 7cc8ed0fb..eefa64324 100644
--- a/newlib/libc/machine/xtensa/memcpy.S
+++ b/newlib/libc/machine/xtensa/memcpy.S
@@ -72,8 +72,16 @@ __memcpy_aux:
 #endif
 1:	l8ui	a6, a3, 0
 	addi	a3, a3, 1
+#ifdef PSRAM_FIX
+	nop
+	nop
+	nop
+#endif
 	s8i	a6, a5, 0
 	addi	a5, a5, 1
+#ifdef PSRAM_FIX
+	memw
+#endif
 #if !XCHAL_HAVE_LOOPS
 	bltu	a3, a7, 1b
 #endif
@@ -93,6 +101,9 @@ __memcpy_aux:
 	addi	a3, a3, 1
 	addi	a4, a4, -1
 	s8i	a6, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	addi	a5, a5, 1
 
 	/* Return to main algorithm if dst is now aligned.  */
@@ -110,6 +121,9 @@ __memcpy_aux:
 	addi	a4, a4, -2
 	s8i	a6, a5, 0
 	s8i	a7, a5, 1
+#ifdef PSRAM_FIX
+	memw
+#endif
 	addi	a5, a5, 2
 
 	/* dst is now aligned; return to main algorithm.  */
@@ -143,6 +157,9 @@ memcpy:
 	slli	a8, a7, 4
 	add	a8, a8, a3	// a8 = end of last 16B source chunk
 #endif
+
+#ifndef PSRAM_FIX
+
 1:	l32i	a6, a3, 0
 	l32i	a7, a3, 4
 	s32i	a6, a5, 0
@@ -153,6 +170,25 @@ memcpy:
 	addi	a3, a3, 16
 	s32i	a7, a5, 12
 	addi	a5, a5, 16
+
+#else
+1:	l32i	a6, a3, 0
+	l32i	a7, a3, 4
+	s32i	a6, a5, 0
+	s32i	a7, a5, 4
+	memw
+	l32i	a6, a3, 8
+	l32i	a7, a3, 12
+	s32i	a6, a5, 8
+	s32i	a7, a5, 12
+	memw
+
+	addi	a3, a3, 16
+	addi	a5, a5, 16
+
+#endif
+
+
 #if !XCHAL_HAVE_LOOPS
 	bltu	a3, a8, 1b
 #endif
@@ -171,6 +207,9 @@ memcpy:
 3:	bbsi.l	a4, 2, 4f
 	bbsi.l	a4, 1, 5f
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	.align 4
@@ -181,6 +220,9 @@ memcpy:
 	addi	a5, a5, 4
 	bbsi.l	a4, 1, 5f
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	/* Copy 2 bytes.  */
@@ -189,6 +231,9 @@ memcpy:
 	s16i	a6, a5, 0
 	addi	a5, a5, 2
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	/* Copy 1 byte.  */
@@ -196,6 +241,9 @@ memcpy:
 	s8i	a6, a5, 0
 
 .Ldone:
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 
@@ -277,11 +325,17 @@ memcpy:
 	s8i	a7, a5, 1
 	addi	a5, a5, 2
 	bbsi.l	a4, 0, 6f
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	/* Copy 1 byte.  */
 6:	l8ui	a6, a3, 0
 	s8i	a6, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 	.end schedule
diff --git a/newlib/libc/machine/xtensa/memset.S b/newlib/libc/machine/xtensa/memset.S
index dbfbe1a0c..a4480d5e7 100644
--- a/newlib/libc/machine/xtensa/memset.S
+++ b/newlib/libc/machine/xtensa/memset.S
@@ -59,6 +59,9 @@ __memset_aux:
 	add	a6, a5, a4	// a6 = ending address
 #endif
 1:	s8i	a3, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	addi	a5, a5, 1
 #if !XCHAL_HAVE_LOOPS
 	bltu	a5, a6, 1b
@@ -79,6 +82,9 @@ __memset_aux:
 	s8i	a3, a5, 0
 	addi	a5, a5, 1
 	addi	a4, a4, -1
+#ifdef PSRAM_FIX
+	memw
+#endif
 
 	/* Now retest if dst is aligned.  */
 	_bbci.l	a5, 1, .Ldstaligned
@@ -92,6 +98,9 @@ __memset_aux:
 	s16i	a3, a5, 0
 	addi	a5, a5, 2
 	addi	a4, a4, -2
+#ifdef PSRAM_FIX
+	memw
+#endif
 
 	/* dst is now aligned; return to main algorithm */
 	j	.Ldstaligned
@@ -121,6 +130,14 @@ memset:
 	/* Get number of loop iterations with 16B per iteration.  */
 	srli	a7, a4, 4
 
+#ifdef PSRAM_FIX
+	//do not do this if we have less than one iteration to do
+	beqz	a7, 2f
+	//this seems to work to prefetch the cache line
+	s32i	a3, a5, 0
+	nop
+#endif
+
 	/* Destination is word-aligned.  */
 #if XCHAL_HAVE_LOOPS
 	loopnez	a7, 2f
@@ -158,11 +175,17 @@ memset:
 	/* Set 2 bytes.  */
 	s16i	a3, a5, 0
 	addi	a5, a5, 2
+#ifdef PSRAM_FIX
+	memw
+#endif
 
 5:	bbci.l	a4, 0, 6f
 
 	/* Set 1 byte.  */
 	s8i	a3, a5, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 6:	leaf_return
 
 	.end schedule
diff --git a/newlib/libc/machine/xtensa/strcpy.S b/newlib/libc/machine/xtensa/strcpy.S
index 167aa9e08..a4e07e3ac 100644
--- a/newlib/libc/machine/xtensa/strcpy.S
+++ b/newlib/libc/machine/xtensa/strcpy.S
@@ -52,6 +52,9 @@ strcpy:
 	l8ui	a8, a3, 0	// get byte 0
 	addi	a3, a3, 1	// advance src pointer
 	s8i	a8, a10, 0	// store byte 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	beqz	a8, 1f		// if byte 0 is zero
 	addi	a10, a10, 1	// advance dst pointer
 	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
@@ -60,11 +63,17 @@ strcpy:
 	l8ui	a8, a3, 0	// get byte 0
 	/* 1-cycle interlock */
 	s8i	a8, a10, 0	// store byte 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	beqz	a8, 1f		// if byte 0 is zero
 	l8ui	a8, a3, 1	// get byte 0
 	addi	a3, a3, 2	// advance src pointer
 	s8i	a8, a10, 1	// store byte 0
 	addi	a10, a10, 2	// advance dst pointer
+#ifdef PSRAM_FIX
+	memw
+#endif
 	bnez	a8, .Lsrcaligned
 1:	leaf_return
 
@@ -93,6 +102,10 @@ strcpy:
 	bnone	a8, a5, .Lz1	// if byte 1 is zero
 	bnone	a8, a6, .Lz2	// if byte 2 is zero
 	s32i	a8, a10, 0	// store word to dst
+#ifdef PSRAM_FIX
+	l32i	a8, a10, 0
+	s32i	a8, a10, 0
+#endif
 	bnone	a8, a7, .Lz3	// if byte 3 is zero
 	addi	a10, a10, 4	// advance dst pointer
 
@@ -106,6 +119,11 @@ strcpy:
 	bnone	a8, a5, .Lz1	// if byte 1 is zero
 	bnone	a8, a6, .Lz2	// if byte 2 is zero
 	s32i	a8, a10, 0	// store word to dst
+#ifdef PSRAM_FIX
+	l32i	a8, a10, 0
+	s32i	a8, a10, 0
+#endif
+
 	bany	a8, a7, 1b	// if byte 3 is zero
 #endif /* !XCHAL_HAVE_LOOPS */
 
@@ -117,6 +135,9 @@ strcpy:
 	movi	a8, 0
 #endif
 	s8i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lz1:	/* Byte 1 is zero.  */
@@ -124,6 +145,9 @@ strcpy:
         extui   a8, a8, 16, 16
 #endif
 	s16i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lz2:	/* Byte 2 is zero.  */
@@ -133,6 +157,9 @@ strcpy:
 	s16i	a8, a10, 0
 	movi	a8, 0
 	s8i	a8, a10, 2
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 #if 1
@@ -162,6 +189,9 @@ strcpy:
 	addi	a3, a3, 1
 	s8i	a8, a10, 0
 	addi	a10, a10, 1
+#ifdef PSRAM_FIX
+	memw
+#endif
 #if XCHAL_HAVE_LOOPS
 	beqz	a8, 2f
 #else
@@ -214,6 +244,9 @@ strcpy:
 	movi	a8, 0
 #endif
 	s8i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lu1:	/* Byte 1 is zero.  */
@@ -221,12 +254,18 @@ strcpy:
         extui   a8, a8, 16, 16
 #endif
 	s16i	a8, a10, 0
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 .Lu2:	/* Byte 2 is zero.  */
 	s16i	a8, a10, 0
 	movi	a8, 0
 	s8i	a8, a10, 2
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 #endif /* 0 */
diff --git a/newlib/libc/machine/xtensa/strncpy.S b/newlib/libc/machine/xtensa/strncpy.S
index dc9363c2a..55f57d267 100644
--- a/newlib/libc/machine/xtensa/strncpy.S
+++ b/newlib/libc/machine/xtensa/strncpy.S
@@ -56,6 +56,9 @@ __strncpy_aux:
 	j	.Lfill
 
 .Lret:
+#ifdef PSRAM_FIX
+	memw
+#endif
 	leaf_return
 
 
@@ -122,7 +125,11 @@ strncpy:
 	addi	a10, a10, 1
 	bnez    a4, .Lfillcleanup
 
-2:	leaf_return
+2:
+#ifdef PSRAM_FIX
+	memw
+#endif
+	leaf_return
 
 .Lfill1mod2: // dst address is odd
 	s8i	a9, a10, 0	// store byte 0
@@ -241,6 +248,11 @@ strncpy:
 #endif
 1:	l8ui	a8, a3, 0
 	addi	a3, a3, 1
+#ifdef PSRAM_FIX
+	nop
+	nop
+	nop
+#endif
 	s8i	a8, a10, 0
 	addi	a4, a4, -1
 	beqz	a4, 3f
@@ -252,7 +264,11 @@ strncpy:
 #endif
 2:	j	.Lfill
 
-3:	leaf_return
+3:
+#ifdef PSRAM_FIX
+	memw
+#endif
+	leaf_return
 .end schedule
 
 	.size	strncpy, . - strncpy
-- 
2.34.1



More information about the Newlib mailing list