]> sourceware.org Git - glibc.git/commitdiff
powerpc: Avoid misaligned stores in memset
authorRajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Tue, 19 Sep 2017 08:25:49 +0000 (13:55 +0530)
committerRajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Tue, 19 Sep 2017 08:25:49 +0000 (13:55 +0530)
As per the section "3.1.4.2 Alignment Interrupts" of the "POWER8 Processor
User's Manual for the Single-Chip Module", alignment interrupt is reported
for misaligned stores in  Caching-inhibited storage.  As memset is used in
some drivers for DMA (like xorg), this patch avoids misaligned stores for
sizes less than 8 in memset.

ChangeLog
sysdeps/powerpc/powerpc64/power8/memset.S

index 757462f14eb1ed702042eb97b2fb28b91d41b4c0..25b05f25673ec765315a86948a589447e4e855a8 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2017-09-19  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
+
+       * sysdeps/powerpc/powerpc64/power8/memset.S: Avoid misaligned stores.
+
 2017-09-18  Joseph Myers  <joseph@codesourcery.com>
 
        * sysdeps/ieee754/ldbl-opt/w_exp10l_compat.c [LIBM_SVID_COMPAT &&
index 369b95894a7b128662ebec575d730b55203e647e..54828017177dbc6ee15e5e95d8409d40277dc440 100644 (file)
@@ -377,7 +377,10 @@ L(write_LT_32):
        subf    r5,r0,r5
 
 2:     bf      30,1f
-       sth     r4,0(r10)
+       /* Use stb instead of sth because it doesn't generate
+          alignment interrupts on cache-inhibited storage.  */
+       stb     r4,0(r10)
+       stb     r4,1(r10)
        addi    r10,r10,2
 
 1:     bf      31,L(end_4bytes_alignment)
@@ -437,11 +440,74 @@ L(tail5):
        /* Handles copies of 0~8 bytes.  */
        .align  4
 L(write_LE_8):
-       bne     cr6,L(tail4)
+       bne     cr6,L(LE7_tail4)
+       /* If input is word aligned, use stw, else use stb.  */
+       andi.   r0,r10,3
+       bne     L(8_unalign)
 
        stw     r4,0(r10)
        stw     r4,4(r10)
        blr
+
+       /* Unaligned input and size is 8.  */
+       .align  4
+L(8_unalign):
+       andi.   r0,r10,1
+       beq     L(8_hwalign)
+       stb     r4,0(r10)
+       sth     r4,1(r10)
+       sth     r4,3(r10)
+       sth     r4,5(r10)
+       stb     r4,7(r10)
+       blr
+
+       /* Halfword aligned input and size is 8.  */
+       .align  4
+L(8_hwalign):
+       sth     r4,0(r10)
+       sth     r4,2(r10)
+       sth     r4,4(r10)
+       sth     r4,6(r10)
+       blr
+
+       .align  4
+       /* Copies 4~7 bytes.  */
+L(LE7_tail4):
+       /* Use stb instead of sth because it doesn't generate
+          alignment interrupts on cache-inhibited storage.  */
+       bf      29,L(LE7_tail2)
+       stb     r4,0(r10)
+       stb     r4,1(r10)
+       stb     r4,2(r10)
+       stb     r4,3(r10)
+       bf      30,L(LE7_tail5)
+       stb     r4,4(r10)
+       stb     r4,5(r10)
+       bflr    31
+       stb     r4,6(r10)
+       blr
+
+       .align  4
+       /* Copies 2~3 bytes.  */
+L(LE7_tail2):
+       bf      30,1f
+       stb     r4,0(r10)
+       stb     r4,1(r10)
+       bflr    31
+       stb     r4,2(r10)
+       blr
+
+       .align  4
+L(LE7_tail5):
+       bflr    31
+       stb     r4,4(r10)
+       blr
+
+       .align  4
+1:     bflr    31
+       stb     r4,0(r10)
+       blr
+
 END_GEN_TB (MEMSET,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
 
This page took 0.080613 seconds and 5 git commands to generate.