This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH][SPARC] supporting more than 2g in niagara4 memcpy and memset
- From: jose dot marchesi at oracle dot com (Jose E. Marchesi)
- To: libc-alpha at sourceware dot org
- Date: Tue, 11 Nov 2014 15:09:13 +0100
- Subject: [PATCH][SPARC] supporting more than 2g in niagara4 memcpy and memset
- Authentication-results: sourceware.org; auth=none
Hi.
The niagara4-specific versions of memcpy and memset do signed 32bit
compares with the size of the buffer to copy/set. Anything larger than
2G will have the 32st bit set and will be treated as negative.
The problem is easy to reproduce in a T4, T5 or T7 machine:
p1 = (char*) mmap (NULL, 0xe0000000,
PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
p2 = (char*) mmap (NULL, 0xe0000000,
PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
memset (p1, 0xc, 0xe0000000); /* Only 3 bytes are set. */
dst = memcpy (p2, p1, 0xe0000000); /* Only 3 bytes are copied. */
The patch below fixes the issue by using the XCC condition codes in the
proper branch instructions.
Tested in sparc64-unknown-linux-gnu.
No regressions observed.
Thanks.
2014-11-11 Jose E. Marchesi <jose.marchesi@oracle.com>
* sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S: Use the XCC
condition codes when comparing buffer sizes.
* sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: Likewise.
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
index 63e0d83..d183dcd 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
@@ -46,6 +46,11 @@
#define STORE(type,src,addr) type src, [addr]
#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
#if !defined NOT_IN_libc
.register %g2,#scratch
@@ -69,12 +74,12 @@ ENTRY(__memcpy_niagara4)
#endif
brz,pn %o2, .Lexit
cmp %o2, 3
- ble,pn %icc, .Ltiny
+ ble,pn %XCC, .Ltiny
cmp %o2, 19
- ble,pn %icc, .Lsmall
+ ble,pn %XCC, .Lsmall
or %o0, %o1, %g2
cmp %o2, 128
- bl,pn %icc, .Lmedium
+ bl,pn %XCC, .Lmedium
nop
.Llarge:/* len >= 0x80 */
@@ -152,14 +157,14 @@ ENTRY(__memcpy_niagara4)
add %o0, 0x08, %o0
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
add %o0, 0x08, %o0
- bne,pt %icc, 1b
+ bne,pt %XCC, 1b
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
membar #StoreLoad | #StoreStore
brz,pn %o2, .Lexit
cmp %o2, 19
- ble,pn %icc, .Lsmall_unaligned
+ ble,pn %XCC, .Lsmall_unaligned
nop
ba,a,pt %icc, .Lmedium_noprefetch
@@ -200,7 +205,7 @@ ENTRY(__memcpy_niagara4)
EX_ST(STORE(std, %f28, %o0 + 0x30))
EX_ST(STORE(std, %f30, %o0 + 0x38))
add %o0, 0x40, %o0
- bne,pt %icc, 1b
+ bne,pt %XCC, 1b
LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
VISExitHalf
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
index c090c50..15c931a 100644
--- a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
+++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
@@ -21,6 +21,11 @@
#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
#if !defined NOT_IN_libc
.register %g2, #scratch
@@ -46,7 +51,7 @@ END(__memset_niagara4)
ENTRY(__bzero_niagara4)
clr %o4
1: cmp %o1, 16
- ble %icc, .Ltiny
+ ble %XCC, .Ltiny
mov %o0, %o3
sub %g0, %o0, %g1
and %g1, 0x7, %g1
@@ -58,7 +63,7 @@ ENTRY(__bzero_niagara4)
add %o0, 1, %o0
.Laligned8:
cmp %o1, 64 + (64 - 8)
- ble .Lmedium
+ ble %XCC, .Lmedium
sub %g0, %o0, %g1
andcc %g1, (64 - 1), %g1
brz,pn %g1, .Laligned64
@@ -75,7 +80,7 @@ ENTRY(__bzero_niagara4)
1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
subcc %g1, 0x40, %g1
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
- bne,pt %icc, 1b
+ bne,pt %XCC, 1b
add %o0, 0x40, %o0
.Lpostloop:
cmp %o1, 8
@@ -116,7 +121,7 @@ ENTRY(__bzero_niagara4)
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
- bne,pt %icc, 1b
+ bne,pt %XCC, 1b
add %o0, 0x30, %o0
ba,a,pt %icc, .Lpostloop
END(__bzero_niagara4)