This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFA] MIPS 24K Errata Patch


Hi Richard,

This patch is from the MIPS source base and addresses errata for the 24K. The errata is described in commentary in tc-mips.c. I've rewritten the patch to conform to the current gas structure for nop-insertion. Tests have successfully completed for mips-linux and mips-sde-elf. Does this look okay to install?

Thanks,
Catherine

gas ChangeLog entry:

2011-04-13  Catherine Moore  <clm@codesourcery.com>
            David Ung <davidu@mips.com>

        * config/mips.c (BASE_REG_EQ): New.
        (fix_24k_offset): New.
        (fix_24k_align_to): New.
        (nops_for_24k): New.
        (nops_for_insn): Add address_expr argument.
        Update all callers.  Call nops_for_24k.
        (nops_for_insn_or_target): Add address_expr argument.
        Update all callers.
        (append_insn): Check for stores and mfix-24k.


gas/testsuite ChangeLog entry: 2011-04-13 Catherine Moore <clm@codesourcery.com> David Ung <davidu@mips.com>

	* gas/mips/24k-branch-delay-1.d: New.
	* gas/mips/24k-branch-delay-1.s: New.
	* gas/mips/24k-triple-stores-1.d: New.
	* gas/mips/24k-triple-stores-1.s: New.
	* gas/mips/24k-triple-stores-2.d: New.
	* gas/mips/24k-triple-stores-2.s: New.
	* gas/mips/24k-triple-stores-3.d: New.
	* gas/mips/24k-triple-stores-3.s: New.
	* gas/mips/24k-triple-stores-5.d: New.
	* gas/mips/24k-triple-stores-5.s: New.
	* gas/mips/24k-triple-stores-6.d: New.
	* gas/mips/24k-triple-stores-6.s: New.
	* gas/mips/24k-triple-stores-7.d: New.
	* gas/mips/24k-triple-stores-7.s: New.
	* gas/mips/24k-triple-stores-8.d: New.
	* gas/mips/24k-triple-stores-8.s: New.
	* gas/mips/24k-triple-stores-9.d: New.
	* gas/mips/24k-triple-stores-9.s: New.
	* gas/mips/mips.exp: Invoke new tests.

Index: 24k-branch-delay-1.d
===================================================================
RCS file: 24k-branch-delay-1.d
diff -N 24k-branch-delay-1.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-branch-delay-1.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,16 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: delay slot filling
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <func>:
+    0:	24620005 	addiu	v0,v1,5
+    4:	8c440000 	lw	a0,0\(v0\)
+    8:	ac430004 	sw	v1,4\(v0\)
+    c:	10600002 	beqz	v1,18 <func\+0x18>
+   10:	00000000 	nop
+   14:	8c430008 	lw	v1,8\(v0\)
+   18:	8c450010 	lw	a1,16\(v0\)
+   1c:	00000000 	nop
Index: 24k-branch-delay-1.s
===================================================================
RCS file: 24k-branch-delay-1.s
diff -N 24k-branch-delay-1.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-branch-delay-1.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,12 ----
+ # Test that we don't move store into delay slots
+ 
+ 	.text
+ func:
+ 	addiu   $2,$3,5        
+ 	lw      $4,0($2)
+ 	sw      $3,4($2)
+ 	beq     $3,0,.L1
+ 	lw      $3,8($2)
+ .L1:
+ 	lw      $5,16($2)
+ 	.p2align        4
Index: 24k-triple-stores-1.d
===================================================================
RCS file: 24k-triple-stores-1.d
diff -N 24k-triple-stores-1.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-1.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,101 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (opcode check)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a3a20000 	sb	v0,0\(sp\)
+    4:	a3a30008 	sb	v1,8\(sp\)
+    8:	00000000 	nop
+    c:	a3a40010 	sb	a0,16\(sp\)
+   10:	a3a50018 	sb	a1,24\(sp\)
+   14:	00000000 	nop
+   18:	a3a60020 	sb	a2,32\(sp\)
+   1c:	a7a20000 	sh	v0,0\(sp\)
+   20:	a7a30008 	sh	v1,8\(sp\)
+   24:	00000000 	nop
+   28:	a7a40010 	sh	a0,16\(sp\)
+   2c:	a7a50018 	sh	a1,24\(sp\)
+   30:	00000000 	nop
+   34:	a7a60020 	sh	a2,32\(sp\)
+   38:	afa20000 	sw	v0,0\(sp\)
+   3c:	afa30008 	sw	v1,8\(sp\)
+   40:	00000000 	nop
+   44:	afa40010 	sw	a0,16\(sp\)
+   48:	afa50018 	sw	a1,24\(sp\)
+   4c:	00000000 	nop
+   50:	afa60020 	sw	a2,32\(sp\)
+   54:	bba20000 	swr	v0,0\(sp\)
+   58:	bba30008 	swr	v1,8\(sp\)
+   5c:	00000000 	nop
+   60:	bba40010 	swr	a0,16\(sp\)
+   64:	bba50018 	swr	a1,24\(sp\)
+   68:	00000000 	nop
+   6c:	bba60020 	swr	a2,32\(sp\)
+   70:	aba20000 	swl	v0,0\(sp\)
+   74:	aba30008 	swl	v1,8\(sp\)
+   78:	00000000 	nop
+   7c:	aba40010 	swl	a0,16\(sp\)
+   80:	aba50018 	swl	a1,24\(sp\)
+   84:	00000000 	nop
+   88:	aba60020 	swl	a2,32\(sp\)
+   8c:	e3a20000 	sc	v0,0\(sp\)
+   90:	e3a30008 	sc	v1,8\(sp\)
+   94:	00000000 	nop
+   98:	e3a40010 	sc	a0,16\(sp\)
+   9c:	e3a50018 	sc	a1,24\(sp\)
+   a0:	00000000 	nop
+   a4:	e3a60020 	sc	a2,32\(sp\)
+   a8:	e7a20000 	swc1	\$f2,0\(sp\)
+   ac:	e7a30008 	swc1	\$f3,8\(sp\)
+   b0:	00000000 	nop
+   b4:	e7a40010 	swc1	\$f4,16\(sp\)
+   b8:	e7a50018 	swc1	\$f5,24\(sp\)
+   bc:	00000000 	nop
+   c0:	e7a60020 	swc1	\$f6,32\(sp\)
+   c4:	eba20000 	swc2	\$2,0\(sp\)
+   c8:	eba30008 	swc2	\$3,8\(sp\)
+   cc:	00000000 	nop
+   d0:	eba40010 	swc2	\$4,16\(sp\)
+   d4:	eba50018 	swc2	\$5,24\(sp\)
+   d8:	00000000 	nop
+   dc:	eba60020 	swc2	\$6,32\(sp\)
+   e0:	f7a20000 	sdc1	\$f2,0\(sp\)
+   e4:	f7a30008 	sdc1	\$f3,8\(sp\)
+   e8:	00000000 	nop
+   ec:	f7a40010 	sdc1	\$f4,16\(sp\)
+   f0:	f7a50018 	sdc1	\$f5,24\(sp\)
+   f4:	00000000 	nop
+   f8:	f7a60020 	sdc1	\$f6,32\(sp\)
+   fc:	fba20000 	sdc2	\$2,0\(sp\)
+  100:	fba30008 	sdc2	\$3,8\(sp\)
+  104:	00000000 	nop
+  108:	fba40010 	sdc2	\$4,16\(sp\)
+  10c:	fba50018 	sdc2	\$5,24\(sp\)
+  110:	00000000 	nop
+  114:	fba60020 	sdc2	\$6,32\(sp\)
+  118:	4d090008 	swxc1	\$f0,t1\(t0\)
+  11c:	00000000 	nop
+  120:	4d0a0808 	swxc1	\$f1,t2\(t0\)
+  124:	4d0b1008 	swxc1	\$f2,t3\(t0\)
+  128:	00000000 	nop
+  12c:	4d0c1808 	swxc1	\$f3,t4\(t0\)
+  130:	4d0d2008 	swxc1	\$f4,t5\(t0\)
+  134:	00000000 	nop
+  138:	4d090009 	sdxc1	\$f0,t1\(t0\)
+  13c:	4d0a1009 	sdxc1	\$f2,t2\(t0\)
+  140:	00000000 	nop
+  144:	4d0b2009 	sdxc1	\$f4,t3\(t0\)
+  148:	4d0c3009 	sdxc1	\$f6,t4\(t0\)
+  14c:	00000000 	nop
+  150:	4d0d4009 	sdxc1	\$f8,t5\(t0\)
+  154:	4d09000d 	suxc1	\$f0,t1\(t0\)
+  158:	00000000 	nop
+  15c:	4d0a100d 	suxc1	\$f2,t2\(t0\)
+  160:	4d0b200d 	suxc1	\$f4,t3\(t0\)
+  164:	00000000 	nop
+  168:	4d0c300d 	suxc1	\$f6,t4\(t0\)
+  16c:	4d0d400d 	suxc1	\$f8,t5\(t0\)
+ 	\.\.\.
Index: 24k-triple-stores-1.s
===================================================================
RCS file: 24k-triple-stores-1.s
diff -N 24k-triple-stores-1.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-1.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,85 ----
+ # integer stores
+ 
+ 	sb      $2,0($sp)
+ 	sb      $3,8($sp)
+ 	sb      $4,16($sp)
+ 	sb      $5,24($sp)
+ 	sb      $6,32($sp)
+ 
+ 	sh      $2,0($sp)
+ 	sh      $3,8($sp)
+ 	sh      $4,16($sp)
+ 	sh      $5,24($sp)
+ 	sh      $6,32($sp)
+ 
+ 	sw      $2,0($sp)
+ 	sw      $3,8($sp)
+ 	sw      $4,16($sp)
+ 	sw      $5,24($sp)
+ 	sw      $6,32($sp)
+ 
+ 	swr     $2,0($sp)
+ 	swr     $3,8($sp)
+ 	swr     $4,16($sp)
+ 	swr     $5,24($sp)
+ 	swr     $6,32($sp)
+ 
+ 	swl     $2,0($sp)
+ 	swl     $3,8($sp)
+ 	swl     $4,16($sp)
+ 	swl     $5,24($sp)
+ 	swl     $6,32($sp)
+ 
+ 	sc      $2,0($sp)
+ 	sc      $3,8($sp)
+ 	sc      $4,16($sp)
+ 	sc      $5,24($sp)
+ 	sc      $6,32($sp)
+ 
+ # floating point stores
+ 
+ 	swc1    $2,0($sp)
+ 	swc1    $3,8($sp)
+ 	swc1    $4,16($sp)
+ 	swc1    $5,24($sp)
+ 	swc1    $6,32($sp)
+ 
+ 	swc2    $2,0($sp)
+ 	swc2    $3,8($sp)
+ 	swc2    $4,16($sp)
+ 	swc2    $5,24($sp)
+ 	swc2    $6,32($sp)
+ 
+ 	sdc1    $2,0($sp)
+ 	sdc1    $3,8($sp)
+ 	sdc1    $4,16($sp)
+ 	sdc1    $5,24($sp)
+ 	sdc1    $6,32($sp)
+ 
+ 	sdc2    $2,0($sp)
+ 	sdc2    $3,8($sp)
+ 	sdc2    $4,16($sp)
+ 	sdc2    $5,24($sp)
+ 	sdc2    $6,32($sp)
+ 
+ 	swxc1   $f0,$9($8)
+ 	swxc1   $f1,$10($8)
+ 	swxc1   $f2,$11($8)
+ 	swxc1   $f3,$12($8)
+ 	swxc1   $f4,$13($8)
+ 
+ 	sdxc1   $f0,$9($8)
+ 	sdxc1   $f2,$10($8)
+ 	sdxc1   $f4,$11($8)
+ 	sdxc1   $f6,$12($8)
+ 	sdxc1   $f8,$13($8)
+ 
+ 	suxc1   $f0,$9($8)
+ 	suxc1   $f2,$10($8)
+ 	suxc1   $f4,$11($8)
+ 	suxc1   $f6,$12($8)
+ 	suxc1   $f8,$13($8)
+ 
+ # Force at least 8 (non-delay-slot) zero bytes,to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-2.d
===================================================================
RCS file: 24k-triple-stores-2.d
diff -N 24k-triple-stores-2.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-2.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,41 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (range check)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a3a20000 	sb	v0,0\(sp\)
+    4:	a3a3000a 	sb	v1,10\(sp\)
+    8:	00000000 	nop
+    c:	a3a4001f 	sb	a0,31\(sp\)
+   10:	0000000d 	break
+   14:	a7a20000 	sh	v0,0\(sp\)
+   18:	a7a3fff0 	sh	v1,-16\(sp\)
+   1c:	a7a4ffe0 	sh	a0,-32\(sp\)
+   20:	0000000d 	break
+   24:	afa20000 	sw	v0,0\(sp\)
+   28:	afa3fff8 	sw	v1,-8\(sp\)
+   2c:	00000000 	nop
+   30:	afa40008 	sw	a0,8\(sp\)
+   34:	0000000d 	break
+   38:	bba20000 	swr	v0,0\(sp\)
+   3c:	bba3fff0 	swr	v1,-16\(sp\)
+   40:	bba40010 	swr	a0,16\(sp\)
+   44:	0000000d 	break
+   48:	aba20000 	swl	v0,0\(sp\)
+   4c:	aba30008 	swl	v1,8\(sp\)
+   50:	00000000 	nop
+   54:	aba40010 	swl	a0,16\(sp\)
+   58:	aba50018 	swl	a1,24\(sp\)
+   5c:	00000000 	nop
+   60:	aba60000 	swl	a2,0\(sp\)
+   64:	0000000d 	break
+   68:	e3a20020 	sc	v0,32\(sp\)
+   6c:	e3a30008 	sc	v1,8\(sp\)
+   70:	e3a4fff8 	sc	a0,-8\(sp\)
+   74:	00000000 	nop
+   78:	e3a50000 	sc	a1,0\(sp\)
+   7c:	e3a60020 	sc	a2,32\(sp\)
+ 	\.\.\.
Index: 24k-triple-stores-2.s
===================================================================
RCS file: 24k-triple-stores-2.s
diff -N 24k-triple-stores-2.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-2.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,38 ----
+ # Check for range
+ 
+ 	sb      $2,0($sp)
+ 	sb      $3,10($sp)
+ 	sb      $4,31($sp)
+ 	break
+ 
+ 	sh      $2,0($sp)
+ 	sh      $3,-16($sp)
+ 	sh      $4,-32($sp)
+ 	break
+ 
+ 	sw      $2,0($sp)
+ 	sw      $3,-8($sp)
+ 	sw      $4,8($sp)
+ 	break
+ 
+ 	swr      $2,0($sp)
+ 	swr      $3,-16($sp)
+ 	swr      $4,16($sp)
+ 	break
+ 
+ 	swl      $2,0($sp)
+ 	swl      $3,8($sp)
+ 	swl      $4,16($sp)
+ 	swl      $5,24($sp)
+ 	swl      $6,0($sp)
+ 	break
+ 
+ 	sc      $2,32($sp)
+ 	sc      $3,8($sp)
+ 	sc      $4,-8($sp)
+ 	sc      $5,0($sp)
+ 	sc      $6,32($sp)
+ 
+ # Force at least 8 (non-delay-slot) zero bytes, to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-3.d
===================================================================
RCS file: 24k-triple-stores-3.d
diff -N 24k-triple-stores-3.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-3.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,85 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (double-word check)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a3a2000b 	sb	v0,11\(sp\)
+    4:	a3a3000b 	sb	v1,11\(sp\)
+    8:	a3a40004 	sb	a0,4\(sp\)
+    c:	0000000d 	break
+   10:	a3a20000 	sb	v0,0\(sp\)
+   14:	a3a3000b 	sb	v1,11\(sp\)
+   18:	a3a40005 	sb	a0,5\(sp\)
+   1c:	0000000d 	break
+   20:	a3a20007 	sb	v0,7\(sp\)
+   24:	a3a3000b 	sb	v1,11\(sp\)
+   28:	00000000 	nop
+   2c:	a3a40010 	sb	a0,16\(sp\)
+   30:	0000000d 	break
+   34:	a1020000 	sb	v0,0\(t0\)
+   38:	a1030008 	sb	v1,8\(t0\)
+   3c:	00000000 	nop
+   40:	a1040009 	sb	a0,9\(t0\)
+   44:	0000000d 	break
+   48:	a7a20000 	sh	v0,0\(sp\)
+   4c:	a7a3ffe1 	sh	v1,-31\(sp\)
+   50:	a7a4ffe2 	sh	a0,-30\(sp\)
+   54:	0000000d 	break
+   58:	a7a20006 	sh	v0,6\(sp\)
+   5c:	a7a30008 	sh	v1,8\(sp\)
+   60:	00000000 	nop
+   64:	a7a40010 	sh	a0,16\(sp\)
+   68:	0000000d 	break
+   6c:	a5020001 	sh	v0,1\(t0\)
+   70:	a5030003 	sh	v1,3\(t0\)
+   74:	00000000 	nop
+   78:	a504000b 	sh	a0,11\(t0\)
+   7c:	0000000d 	break
+   80:	afa20008 	sw	v0,8\(sp\)
+   84:	afa3fff8 	sw	v1,-8\(sp\)
+   88:	afa40008 	sw	a0,8\(sp\)
+   8c:	0000000d 	break
+   90:	afa20004 	sw	v0,4\(sp\)
+   94:	afa30008 	sw	v1,8\(sp\)
+   98:	00000000 	nop
+   9c:	afa40010 	sw	a0,16\(sp\)
+   a0:	0000000d 	break
+   a4:	ad020003 	sw	v0,3\(t0\)
+   a8:	ad030007 	sw	v1,7\(t0\)
+   ac:	00000000 	nop
+   b0:	ad04000f 	sw	a0,15\(t0\)
+   b4:	0000000d 	break
+   b8:	aba20004 	swl	v0,4\(sp\)
+   bc:	aba3000a 	swl	v1,10\(sp\)
+   c0:	00000000 	nop
+   c4:	aba40011 	swl	a0,17\(sp\)
+   c8:	0000000d 	break
+   cc:	aba20007 	swl	v0,7\(sp\)
+   d0:	aba3000c 	swl	v1,12\(sp\)
+   d4:	00000000 	nop
+   d8:	aba40010 	swl	a0,16\(sp\)
+   dc:	0000000d 	break
+   e0:	aba20000 	swl	v0,0\(sp\)
+   e4:	aba3000c 	swl	v1,12\(sp\)
+   e8:	00000000 	nop
+   ec:	aba40017 	swl	a0,23\(sp\)
+   f0:	0000000d 	break
+   f4:	a9020003 	swl	v0,3\(t0\)
+   f8:	a9030008 	swl	v1,8\(t0\)
+   fc:	00000000 	nop
+  100:	a904000c 	swl	a0,12\(t0\)
+  104:	0000000d 	break
+  108:	aba20000 	swl	v0,0\(sp\)
+  10c:	aba3000c 	swl	v1,12\(sp\)
+  110:	00000000 	nop
+  114:	bba40017 	swr	a0,23\(sp\)
+  118:	0000000d 	break
+  11c:	a9020005 	swl	v0,5\(t0\)
+  120:	a9030011 	swl	v1,17\(t0\)
+  124:	00000000 	nop
+  128:	b904001c 	swr	a0,28\(t0\)
+  12c:	0000000d 	break
+ 	\.\.\.
Index: 24k-triple-stores-3.s
===================================================================
RCS file: 24k-triple-stores-3.s
diff -N 24k-triple-stores-3.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-3.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,99 ----
+ # Assume to be on the same line (within 32bytes)
+ # Check for individual different double words
+ 
+ 	# safe
+ 	sb      $2,11($sp)
+ 	sb      $3,11($sp)
+ 	sb      $4,4($sp)
+ 	break
+ 
+ 	# safe
+ 	sb      $2,0($sp)
+ 	sb      $3,11($sp)
+ 	sb      $4,5($sp)
+ 	break
+ 	
+ 	# edge case
+ 	sb      $2,7($sp)
+ 	sb      $3,11($sp)
+ 	sb      $4,16($sp)
+ 	break
+ 
+ 	# edge case (unaligned)
+ 	sb      $2,0($8)
+ 	sb      $3,8($8)
+ 	sb      $4,9($8)	
+ 	break
+ 
+ 	sh      $2,0($sp)
+ 	sh      $3,-31($sp)
+ 	sh      $4,-30($sp)
+ 	break
+ 
+ 	# edge case
+ 	sh      $2,6($sp)
+ 	sh      $3,8($sp)
+ 	sh      $4,16($sp)
+ 	break
+ 
+ 	# edge case (unaligned)
+ 	sh      $2,1($8)
+ 	sh      $3,3($8)
+ 	sh      $4,11($8)	
+ 	break
+ 
+ 	sw      $2,8($sp)
+ 	sw      $3,-8($sp)
+ 	sw      $4,8($sp)
+ 	break
+ 
+ 	# edge case
+ 	sw      $2,4($sp)
+ 	sw      $3,8($sp)
+ 	sw      $4,16($sp)
+ 	break
+ 
+ 	# edge case (unaligned)
+ 	sw      $2,3($8)
+ 	sw      $3,7($8)
+ 	sw      $4,15($8)	
+ 	break
+ 
+ 	swl      $2,4($sp)
+ 	swl      $3,10($sp)
+ 	swl      $4,17($sp)
+ 	break
+ 
+ 	# edge case
+ 	swl      $2,7($sp)
+ 	swl      $3,12($sp)
+ 	swl      $4,16($sp)
+ 	break
+ 
+ 	# edge case
+ 	swl      $2,0($sp)
+ 	swl      $3,12($sp)
+ 	swl      $4,23($sp)
+ 	break
+ 
+ 	# edge case (unaligned)
+ 	swl      $2,3($8)
+ 	swl      $3,8($8)
+ 	swl      $4,12($8)
+ 	break
+ 
+ 	# mix swl & swr
+ 	swl      $2,0($sp)
+ 	swl      $3,12($sp)
+ 	swr      $4,23($sp)
+ 	break
+ 
+ 	# mix swl & swr
+ 	swl      $2,5($8)
+ 	swl      $3,17($8)
+ 	swr      $4,28($8)
+ 	break		
+ 
+ # Force at least 8 (non-delay-slot) zero bytes, to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-5.d
===================================================================
RCS file: 24k-triple-stores-5.d
diff -N 24k-triple-stores-5.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-5.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,36 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (Mix byte/half/word size check)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a5020007 	sh	v0,7\(t0\)
+    4:	a1030000 	sb	v1,0\(t0\)
+    8:	ad040001 	sw	a0,1\(t0\)
+    c:	0000000d 	break
+   10:	a5020016 	sh	v0,22\(t0\)
+   14:	a103000f 	sb	v1,15\(t0\)
+   18:	00000000 	nop
+   1c:	ad040018 	sw	a0,24\(t0\)
+   20:	0000000d 	break
+   24:	a5020000 	sh	v0,0\(t0\)
+   28:	a1030009 	sb	v1,9\(t0\)
+   2c:	ad040002 	sw	a0,2\(t0\)
+   30:	0000000d 	break
+   34:	a5020006 	sh	v0,6\(t0\)
+   38:	a1030010 	sb	v1,16\(t0\)
+   3c:	00000000 	nop
+   40:	ad04000c 	sw	a0,12\(t0\)
+   44:	0000000d 	break
+   48:	a502000a 	sh	v0,10\(t0\)
+   4c:	a103000f 	sb	v1,15\(t0\)
+   50:	ad040004 	sw	a0,4\(t0\)
+   54:	0000000d 	break
+   58:	a502000a 	sh	v0,10\(t0\)
+   5c:	a1030010 	sb	v1,16\(t0\)
+   60:	00000000 	nop
+   64:	ad040004 	sw	a0,4\(t0\)
+   68:	0000000d 	break
+ 	\.\.\.
Index: 24k-triple-stores-5.s
===================================================================
RCS file: 24k-triple-stores-5.s
diff -N 24k-triple-stores-5.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-5.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,41 ----
+ # Mix byte/half/word sizes with arbitary base register.
+ 
+ 	# safe
+ 	sh      $2,7($8)
+ 	sb      $3,0($8)
+ 	sw      $4,1($8)
+ 	break
+ 
+ 	# nop
+ 	sh      $2,22($8)
+ 	sb      $3,15($8)
+ 	sw      $4,24($8)
+ 	break
+ 
+ 	# safe
+ 	sh      $2,0($8)
+ 	sb      $3,9($8)
+ 	sw      $4,2($8)
+ 	break
+ 
+ 	# nop
+ 	sh      $2,6($8)
+ 	sb      $3,16($8)
+ 	sw      $4,12($8)
+ 	break
+ 
+ 	# safe
+ 	sh      $2,10($8)
+ 	sb      $3,15($8)
+ 	sw      $4,4($8)
+ 	break
+ 
+ 	# nop
+ 	sh      $2,10($8)
+ 	sb      $3,16($8)
+ 	sw      $4,4($8)
+ 	break
+ 
+ # Force at least 8 (non-delay-slot) zero bytes,to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-6.d
===================================================================
RCS file: 24k-triple-stores-6.d
diff -N 24k-triple-stores-6.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-6.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,36 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (store macro check)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	abbf0050 	swl	ra,80\(sp\)
+    4:	bbbf0053 	swr	ra,83\(sp\)
+    8:	abb30058 	swl	s3,88\(sp\)
+    c:	bbb3005b 	swr	s3,91\(sp\)
+   10:	abbe0060 	swl	s8,96\(sp\)
+   14:	bbbe0063 	swr	s8,99\(sp\)
+   18:	0000000d 	break
+   1c:	a3bf0051 	sb	ra,81\(sp\)
+   20:	001f0a02 	srl	at,ra,0x8
+   24:	a3a10050 	sb	at,80\(sp\)
+   28:	a3b30059 	sb	s3,89\(sp\)
+   2c:	00130a02 	srl	at,s3,0x8
+   30:	a3a10058 	sb	at,88\(sp\)
+   34:	a3be0061 	sb	s8,97\(sp\)
+   38:	001e0a02 	srl	at,s8,0x8
+   3c:	a3a10060 	sb	at,96\(sp\)
+   40:	0000000d 	break
+   44:	e7a00050 	swc1	\$f0,80\(sp\)
+   48:	e7a20058 	swc1	\$f2,88\(sp\)
+   4c:	00000000 	nop
+   50:	e7a40060 	swc1	\$f4,96\(sp\)
+   54:	0000000d 	break
+   58:	f7a00050 	sdc1	\$f0,80\(sp\)
+   5c:	f7a20058 	sdc1	\$f2,88\(sp\)
+   60:	00000000 	nop
+   64:	f7a40060 	sdc1	\$f4,96\(sp\)
+   68:	0000000d 	break
+ 	\.\.\.
Index: 24k-triple-stores-6.s
===================================================================
RCS file: 24k-triple-stores-6.s
diff -N 24k-triple-stores-6.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-6.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,27 ----
+ 	# Store macros
+ 
+ 	usw      $ra,80($sp)
+ 	usw      $s3,88($sp)
+ 	usw      $s8,96($sp)
+ 	break
+ 
+ 	ush      $ra,80($sp)
+ 	ush      $s3,88($sp)
+ 	ush      $s8,96($sp)
+ 	break
+ 
+ 	# swc1 macro
+ 	s.s      $f0,80($sp)
+ 	s.s      $f2,88($sp)
+ 	s.s      $f4,96($sp)
+ 	break
+ 
+         # sdc1 macro
+ 	s.d      $f0,80($sp)
+ 	s.d      $f2,88($sp)
+ 	s.d      $f4,96($sp)
+ break
+ 
+ # Force at least 8 (non-delay-slot) zero bytes, to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-7.d
===================================================================
RCS file: 24k-triple-stores-7.d
diff -N 24k-triple-stores-7.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-7.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,80 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (extended range check)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a1130004 	sb	s3,4\(t0\)
+    4:	ad130008 	sw	s3,8\(t0\)
+    8:	a113000f 	sb	s3,15\(t0\)
+    c:	0000000d 	break
+   10:	a1130003 	sb	s3,3\(t0\)
+   14:	ad130008 	sw	s3,8\(t0\)
+   18:	00000000 	nop
+   1c:	a113000f 	sb	s3,15\(t0\)
+   20:	0000000d 	break
+   24:	ad13001c 	sw	s3,28\(t0\)
+   28:	ad130008 	sw	s3,8\(t0\)
+   2c:	a113001f 	sb	s3,31\(t0\)
+   30:	0000000d 	break
+   34:	a1130005 	sb	s3,5\(t0\)
+   38:	ad130009 	sw	s3,9\(t0\)
+   3c:	a1130010 	sb	s3,16\(t0\)
+   40:	0000000d 	break
+   44:	a1130004 	sb	s3,4\(t0\)
+   48:	ad130009 	sw	s3,9\(t0\)
+   4c:	00000000 	nop
+   50:	a1130010 	sb	s3,16\(t0\)
+   54:	0000000d 	break
+   58:	a1130006 	sb	s3,6\(t0\)
+   5c:	a5130008 	sh	s3,8\(t0\)
+   60:	a113000f 	sb	s3,15\(t0\)
+   64:	0000000d 	break
+   68:	a1130005 	sb	s3,5\(t0\)
+   6c:	a5130008 	sh	s3,8\(t0\)
+   70:	00000000 	nop
+   74:	a113000f 	sb	s3,15\(t0\)
+   78:	0000000d 	break
+   7c:	a513001e 	sh	s3,30\(t0\)
+   80:	a5130008 	sh	s3,8\(t0\)
+   84:	a113001f 	sb	s3,31\(t0\)
+   88:	0000000d 	break
+   8c:	a1130007 	sb	s3,7\(t0\)
+   90:	a5130009 	sh	s3,9\(t0\)
+   94:	a1130010 	sb	s3,16\(t0\)
+   98:	0000000d 	break
+   9c:	a1130006 	sb	s3,6\(t0\)
+   a0:	a5130009 	sh	s3,9\(t0\)
+   a4:	00000000 	nop
+   a8:	a1130010 	sb	s3,16\(t0\)
+   ac:	0000000d 	break
+   b0:	a1130007 	sb	s3,7\(t0\)
+   b4:	f5000008 	sdc1	\$f0,8\(t0\)
+   b8:	a113000f 	sb	s3,15\(t0\)
+   bc:	0000000d 	break
+   c0:	a1130007 	sb	s3,7\(t0\)
+   c4:	f5000008 	sdc1	\$f0,8\(t0\)
+   c8:	00000000 	nop
+   cc:	a1130010 	sb	s3,16\(t0\)
+   d0:	0000000d 	break
+   d4:	a1130010 	sb	s3,16\(t0\)
+   d8:	f5000008 	sdc1	\$f0,8\(t0\)
+   dc:	a1130017 	sb	s3,23\(t0\)
+   e0:	0000000d 	break
+   e4:	a1130010 	sb	s3,16\(t0\)
+   e8:	f5000008 	sdc1	\$f0,8\(t0\)
+   ec:	00000000 	nop
+   f0:	a1130018 	sb	s3,24\(t0\)
+   f4:	0000000d 	break
+   f8:	a1130008 	sb	s3,8\(t0\)
+   fc:	f5000009 	sdc1	\$f0,9\(t0\)
+  100:	a1130010 	sb	s3,16\(t0\)
+  104:	0000000d 	break
+  108:	a113fffd 	sb	s3,-3\(t0\)
+  10c:	f500fffe 	sdc1	\$f0,-2\(t0\)
+  110:	00000000 	nop
+  114:	a1130006 	sb	s3,6\(t0\)
+  118:	0000000d 	break
+ 	\.\.\.
Index: 24k-triple-stores-7.s
===================================================================
RCS file: 24k-triple-stores-7.s
diff -N 24k-triple-stores-7.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-7.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,92 ----
+ 	# range check
+ 	sb       $s3,4($t0)
+ 	sw       $s3,8($t0)
+ 	sb       $s3,15($t0)
+ 	break
+ 
+ 	sb       $s3,3($t0)
+ 	sw       $s3,8($t0)
+ 	sb       $s3,15($t0)
+ 	break
+ 
+ 	# overlap (same word)
+ 	sw       $s3,28($t0)
+ 	sw       $s3,8($t0)
+ 	sb       $s3,31($t0)
+ 	break
+ 
+ 	# unaligned
+ 	sb       $s3,5($t0)
+ 	sw       $s3,9($t0)
+ 	sb       $s3,16($t0)
+ 	break
+ 
+ 	sb       $s3,4($t0)
+ 	sw       $s3,9($t0)
+ 	sb       $s3,16($t0)
+ 	break
+ 	
+ 	# range check
+ 	sb       $s3,6($t0)
+ 	sh       $s3,8($t0)
+ 	sb       $s3,15($t0)
+ 	break
+ 
+ 	sb       $s3,5($t0)
+ 	sh       $s3,8($t0)
+ 	sb       $s3,15($t0)
+ 	break
+ 
+ 	# overlap (same hword)
+ 	sh       $s3,30($t0)
+ 	sh       $s3,8($t0)
+ 	sb       $s3,31($t0)
+ 	break
+ 
+ 	# unaligned
+ 	sb       $s3,7($t0)
+ 	sh       $s3,9($t0)
+ 	sb       $s3,16($t0)
+ 	break
+ 
+ 	sb       $s3,6($t0)
+ 	sh       $s3,9($t0)
+ 	sb       $s3,16($t0)
+ 	break
+ 	
+ 	# range check
+ 	sb       $s3,7($t0)
+ 	sdc1     $f0,8($t0)
+ 	sb       $s3,15($t0)
+ 	break
+ 
+ 	sb       $s3,7($t0)
+ 	sdc1     $f0,8($t0)
+ 	sb       $s3,16($t0)
+ 	break
+ 
+ 	# overlap (same dword)
+ 	sb       $s3,16($t0)
+ 	sdc1     $f0,8($t0)
+ 	sb       $s3,23($t0)
+ 	break
+ 
+ 	sb       $s3,16($t0)
+ 	sdc1     $f0,8($t0)
+ 	sb       $s3,24($t0)
+ 	break
+ 
+ 	# unaligned
+ 	sb       $s3,8($t0)
+ 	sdc1     $f0,9($t0)
+ 	sb       $s3,16($t0)
+ 	break
+ 
+ 	sb       $s3,-3($t0)
+ 	sdc1     $f0,-2($t0)
+ 	sb       $s3,6($t0)
+ 	break
+ 
+ # Force at least 8 (non-delay-slot) zero bytes,to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-8.d
===================================================================
RCS file: 24k-triple-stores-8.d
diff -N 24k-triple-stores-8.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-8.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,61 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (Range check >= 24)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a1130000 	sb	s3,0\(t0\)
+    4:	a1130001 	sb	s3,1\(t0\)
+    8:	00000000 	nop
+    c:	a1130018 	sb	s3,24\(t0\)
+   10:	0000000d 	break
+   14:	a1130000 	sb	s3,0\(t0\)
+   18:	a1130001 	sb	s3,1\(t0\)
+   1c:	a1130019 	sb	s3,25\(t0\)
+   20:	0000000d 	break
+   24:	a1130001 	sb	s3,1\(t0\)
+   28:	a1130019 	sb	s3,25\(t0\)
+   2c:	a113001a 	sb	s3,26\(t0\)
+   30:	0000000d 	break
+   34:	a1130000 	sb	s3,0\(t0\)
+   38:	a5130003 	sh	s3,3\(t0\)
+   3c:	00000000 	nop
+   40:	a113001a 	sb	s3,26\(t0\)
+   44:	0000000d 	break
+   48:	a5130000 	sh	s3,0\(t0\)
+   4c:	a1130003 	sb	s3,3\(t0\)
+   50:	a113001a 	sb	s3,26\(t0\)
+   54:	0000000d 	break
+   58:	a1130023 	sb	s3,35\(t0\)
+   5c:	a5130020 	sh	s3,32\(t0\)
+   60:	a1130009 	sb	s3,9\(t0\)
+   64:	0000000d 	break
+   68:	a1130001 	sb	s3,1\(t0\)
+   6c:	a5130019 	sh	s3,25\(t0\)
+   70:	a113001b 	sb	s3,27\(t0\)
+   74:	0000000d 	break
+   78:	a1130000 	sb	s3,0\(t0\)
+   7c:	ad130007 	sw	s3,7\(t0\)
+   80:	00000000 	nop
+   84:	a113001c 	sb	s3,28\(t0\)
+   88:	0000000d 	break
+   8c:	a1130000 	sb	s3,0\(t0\)
+   90:	a1130007 	sb	s3,7\(t0\)
+   94:	ad13001c 	sw	s3,28\(t0\)
+   98:	0000000d 	break
+   9c:	a1130040 	sb	s3,64\(t0\)
+   a0:	ad13003b 	sw	s3,59\(t0\)
+   a4:	00000000 	nop
+   a8:	ad130025 	sw	s3,37\(t0\)
+   ac:	0000000d 	break
+   b0:	ad130040 	sw	s3,64\(t0\)
+   b4:	a113003d 	sb	s3,61\(t0\)
+   b8:	a1130027 	sb	s3,39\(t0\)
+   bc:	0000000d 	break
+   c0:	a1130001 	sb	s3,1\(t0\)
+   c4:	ad130019 	sw	s3,25\(t0\)
+   c8:	a113001d 	sb	s3,29\(t0\)
+   cc:	0000000d 	break
+ 	\.\.\.
Index: 24k-triple-stores-8.s
===================================================================
RCS file: 24k-triple-stores-8.s
diff -N 24k-triple-stores-8.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-8.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,56 ----
+ 	# Range check after alignment between adjacent offsets >= 24 ??
+ 	
+ 	sb       $s3,0($t0)
+ 	sb       $s3,1($t0)
+ 	sb       $s3,24($t0)
+ 	break
+ 	sb       $s3,0($t0)
+ 	sb       $s3,1($t0)
+ 	sb       $s3,25($t0)
+ 	break
+ 	sb       $s3,1($t0)
+ 	sb       $s3,25($t0)
+ 	sb       $s3,26($t0)
+ 	break
+ 	
+ 	sb       $s3,0($t0)
+ 	sh       $s3,3($t0)
+ 	sb       $s3,26($t0)
+ 	break
+ 	sh       $s3,0($t0)
+ 	sb       $s3,3($t0)
+ 	sb       $s3,26($t0)
+ 	break
+ 	sb       $s3,35($t0)
+ 	sh       $s3,32($t0)
+ 	sb       $s3,9($t0)
+ 	break
+ 	sb       $s3,1($t0)
+ 	sh       $s3,25($t0)
+ 	sb       $s3,27($t0)
+ 	break
+ 	
+ 	sb       $s3,0($t0)
+ 	sw       $s3,7($t0)
+ 	sb       $s3,28($t0)
+ 	break
+ 	sb       $s3,0($t0)
+ 	sb       $s3,7($t0)
+ 	sw       $s3,28($t0)
+ 	break
+ 	sb       $s3,64($t0)
+ 	sw       $s3,59($t0)
+ 	sw       $s3,37($t0)
+ 	break
+ 	sw       $s3,64($t0)
+ 	sb       $s3,61($t0)
+ 	sb       $s3,39($t0)
+ 	break
+ 	sb       $s3,1($t0)
+ 	sw       $s3,25($t0)
+ 	sb       $s3,29($t0)
+ 	break
+ 
+ # Force at least 8 (non-delay-slot) zero bytes, to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: 24k-triple-stores-9.d
===================================================================
RCS file: 24k-triple-stores-9.d
diff -N 24k-triple-stores-9.d
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-9.d	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,63 ----
+ #objdump: -dr -mmips:isa32r2
+ #as: -march=24kfx -mfix-24k
+ #name: 24K: triple store (Range check >= 32)
+ 
+ .*: +file format .*mips.*
+ 
+ Disassembly of section .text:
+ 00000000 <.text>:
+    0:	a113000a 	sb	s3,10\(t0\)
+    4:	a5130001 	sh	s3,1\(t0\)
+    8:	00000000 	nop
+    c:	a1130020 	sb	s3,32\(t0\)
+   10:	0000000d 	break
+   14:	a113000a 	sb	s3,10\(t0\)
+   18:	a1130001 	sb	s3,1\(t0\)
+   1c:	a5130020 	sh	s3,32\(t0\)
+   20:	0000000d 	break
+   24:	a1130021 	sb	s3,33\(t0\)
+   28:	a5130037 	sh	s3,55\(t0\)
+   2c:	00000000 	nop
+   30:	a1130040 	sb	s3,64\(t0\)
+   34:	0000000d 	break
+   38:	a1130021 	sb	s3,33\(t0\)
+   3c:	a1130037 	sb	s3,55\(t0\)
+   40:	a5130040 	sh	s3,64\(t0\)
+   44:	0000000d 	break
+   48:	a113000c 	sb	s3,12\(t0\)
+   4c:	ad130001 	sw	s3,1\(t0\)
+   50:	00000000 	nop
+   54:	a1130020 	sb	s3,32\(t0\)
+   58:	0000000d 	break
+   5c:	a113000c 	sb	s3,12\(t0\)
+   60:	a1130001 	sb	s3,1\(t0\)
+   64:	ad130020 	sw	s3,32\(t0\)
+   68:	0000000d 	break
+   6c:	a1130023 	sb	s3,35\(t0\)
+   70:	ad130037 	sw	s3,55\(t0\)
+   74:	00000000 	nop
+   78:	a1130040 	sb	s3,64\(t0\)
+   7c:	0000000d 	break
+   80:	a1130023 	sb	s3,35\(t0\)
+   84:	a1130037 	sb	s3,55\(t0\)
+   88:	ad130040 	sw	s3,64\(t0\)
+   8c:	0000000d 	break
+   90:	a1130010 	sb	s3,16\(t0\)
+   94:	f5000001 	sdc1	\$f0,1\(t0\)
+   98:	00000000 	nop
+   9c:	a1130020 	sb	s3,32\(t0\)
+   a0:	0000000d 	break
+   a4:	a1130010 	sb	s3,16\(t0\)
+   a8:	a1130001 	sb	s3,1\(t0\)
+   ac:	f5000020 	sdc1	\$f0,32\(t0\)
+   b0:	0000000d 	break
+   b4:	a1130027 	sb	s3,39\(t0\)
+   b8:	f5000037 	sdc1	\$f0,55\(t0\)
+   bc:	00000000 	nop
+   c0:	a1130040 	sb	s3,64\(t0\)
+   c4:	0000000d 	break
+   c8:	a1130027 	sb	s3,39\(t0\)
+   cc:	a1130037 	sb	s3,55\(t0\)
+   d0:	f5000040 	sdc1	\$f0,64\(t0\)
+   d4:	0000000d 	break
+ 	\.\.\.
Index: 24k-triple-stores-9.s
===================================================================
RCS file: 24k-triple-stores-9.s
diff -N 24k-triple-stores-9.s
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 24k-triple-stores-9.s	13 Apr 2011 22:33:34 -0000
***************
*** 0 ****
--- 1,56 ----
+ 	# Range check for safe case after alignment its range >= 32.
+ 	
+ 	sb       $s3,10($t0)
+ 	sh       $s3,1($t0)
+ 	sb       $s3,32($t0)
+ 	break
+ 	sb       $s3,10($t0)
+ 	sb       $s3,1($t0)
+ 	sh       $s3,32($t0)
+ 	break
+ 	sb       $s3,33($t0)
+ 	sh       $s3,55($t0)
+ 	sb       $s3,64($t0)
+ 	break
+ 	sb       $s3,33($t0)
+ 	sb       $s3,55($t0)
+ 	sh       $s3,64($t0)
+ 	break
+ 	
+ 	sb       $s3,12($t0)
+ 	sw       $s3,1($t0)
+ 	sb       $s3,32($t0)
+ 	break
+ 	sb       $s3,12($t0)
+ 	sb       $s3,1($t0)
+ 	sw       $s3,32($t0)
+ 	break
+ 	sb       $s3,35($t0)
+ 	sw       $s3,55($t0)
+ 	sb       $s3,64($t0)
+ 	break
+ 	sb       $s3,35($t0)
+ 	sb       $s3,55($t0)
+ 	sw       $s3,64($t0)
+ 	break
+ 	
+ 	sb       $s3,16($t0)
+ 	sdc1     $f0,1($t0)
+ 	sb       $s3,32($t0)
+ 	break
+ 	sb       $s3,16($t0)
+ 	sb       $s3,1($t0)
+ 	sdc1     $f0,32($t0)
+ 	break
+ 	sb       $s3,39($t0)
+ 	sdc1     $f0,55($t0)
+ 	sb       $s3,64($t0)
+ 	break
+ 	sb       $s3,39($t0)
+ 	sb       $s3,55($t0)
+ 	sdc1     $f0,64($t0)
+ 	break
+ 
+ # Force at least 8 (non-delay-slot) zero bytes, to make 'objdump' print ...
+ 	.align	2
+ 	.space	8
Index: mips.exp
===================================================================
RCS file: /cvs/src/src/gas/testsuite/gas/mips/mips.exp,v
retrieving revision 1.182
diff -p -r1.182 mips.exp
*** mips.exp	28 Feb 2011 16:34:38 -0000	1.182
--- mips.exp	13 Apr 2011 22:33:34 -0000
*************** if { [istarget mips*-*-vxworks*] } {
*** 479,484 ****
--- 479,493 ----
      run_dump_test "eret-1"
      run_dump_test "eret-2"
      run_dump_test "eret-3"
+     run_dump_test "24k-branch-delay-1"
+     run_dump_test "24k-triple-stores-1"
+     run_dump_test "24k-triple-stores-2"
+     run_dump_test "24k-triple-stores-3"
+     run_dump_test "24k-triple-stores-5"
+     run_dump_test "24k-triple-stores-6"
+     run_dump_test "24k-triple-stores-7"
+     run_dump_test "24k-triple-stores-8"
+     run_dump_test "24k-triple-stores-9"
  
      if $elf {
  	run_dump_test_arches "jal-svr4pic" \
Index: tc-mips.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-mips.c,v
retrieving revision 1.461
diff -u -r1.461 tc-mips.c
--- tc-mips.c	28 Feb 2011 16:26:45 -0000	1.461
+++ tc-mips.c	13 Apr 2011 22:42:59 -0000
@@ -2680,6 +2680,212 @@
   return 0;
 }
 
+#define BASE_REG_EQ(INSN1, INSN2) 	\
+  ((((INSN1) >> OP_SH_RS) & OP_MASK_RS) \
+      == (((INSN2) >> OP_SH_RS) & OP_MASK_RS))
+
+/* Return the offset, if any, for this store instruction.  */
+static int
+fix_24k_offset (unsigned long opcode, const struct mips_opcode *mo)
+{
+  if (!strcmp (mo->name, "swxc1")
+      || !strcmp (mo->name, "suxc1")
+      || !strcmp (mo->name, "sdxc1"))
+    return 0;
+  
+  return (opcode >> OP_SH_IMMEDIATE) & OP_MASK_IMMEDIATE;
+}
+
+/* Return the the minimum alignment for this store instruction.  */
+static int
+fix_24k_align_to (const struct mips_opcode *mo)
+{
+  
+  if (!strcmp (mo->name, "sh"))
+    return 2;
+
+  if (!strcmp (mo->name, "swc1")
+      || !strcmp (mo->name, "swc2")
+      || !strcmp (mo->name, "sw")
+      || !strcmp (mo->name, "sc"))
+    return 4;
+
+  if (!strcmp (mo->name, "sdc1")
+       || !strcmp (mo->name, "sdc2"))
+    return 8;
+
+  /* sb, swl, swr */
+  return 1;
+}
+
+/* 24K Errata: Lost Data on Stores During Refill. 
+  
+  Problem: The FSB (fetch store buffer) acts as an intermediate buffer
+  for the data cache refills and store data. The following describes
+  the scenario where the store data could be lost.
+  
+  * A data cache miss, due to either a load or a store, causing fill
+    data to be supplied by the memory subsystem
+  * The first three doublewords of fill data are returned and written
+    into the cache
+  * A sequence of four stores occurs in consecutive cycles around the
+    final doubleword of the fill:
+  * Store A
+  * Store B
+  * Store C
+  * Zero, One or more instructions
+  * Store D
+  
+  The four stores A-D must be to different doublewords of the line that
+  is being filled. The fourth instruction in the sequence above permits
+  the fill of the final doubleword to be transferred from the FSB into
+  the cache. In the sequence above, the stores may be either integer
+  (sb, sh, sw, swr, swl, sc) or coprocessor (swc1/swc2, sdc1/sdc2,
+  swxc1, sdxc1, suxc1) stores, as long as the four stores are to
+  different doublewords on the line. If the floating point unit is
+  running in 1:2 mode, it is not possible to create the sequence above
+  using only floating point store instructions.  In this case, the cache
+  line being filled is incorrectly marked invalid, thereby losing the
+  data from any store to the line that occurs between the original miss
+  and the completion of the five cycle sequence shown above.
+  
+  * Run the data cache in write-through mode.
+  * Insert a non-store instruction between
+    Store A and Store B or Store B and Store C.  */
+  
+static int
+nops_for_24k (const struct mips_cl_insn *hist,
+	      const struct mips_cl_insn *insn,
+	      expressionS *address_expr)
+{
+
+  struct store_info
+    {
+      /* Immediate offset, if any, for this store instruction.  */
+      short off;
+      /* Alignment required by this store instruction.  */
+      int align_to;
+    } pos[3], tmp;
+
+  int align, i, align_insn;
+  int range;
+  int new_off[3];
+
+  /* Three stores in a row are required to trigger the errata.  */
+  if (!insn 
+      || !(insn->insn_mo->pinfo & INSN_STORE_MEMORY)
+      || !(hist[0].insn_mo->pinfo & INSN_STORE_MEMORY)
+      || !(hist[1].insn_mo->pinfo & INSN_STORE_MEMORY))
+    return 0;
+
+  if (frag_now != hist[0].frag
+      || frag_now != hist[1].frag)
+    return 1;
+
+  if (!BASE_REG_EQ (insn->insn_opcode, hist[0].insn_opcode)
+      || !BASE_REG_EQ (insn->insn_opcode, hist[1].insn_opcode))
+    return 1;
+
+  if (!address_expr)
+    return 1;
+
+  pos[0].off = address_expr->X_add_number;
+  pos[1].off = fix_24k_offset (hist[0].insn_opcode, hist[0].insn_mo);
+  pos[2].off = fix_24k_offset (hist[1].insn_opcode, hist[1].insn_mo);
+
+  pos[0].align_to = fix_24k_align_to (insn->insn_mo);
+  pos[1].align_to = fix_24k_align_to (history[0].insn_mo);
+  pos[2].align_to = fix_24k_align_to (history[1].insn_mo);
+
+  /* Sort pos from smallest to largest.  */
+  if (pos[1].off < pos[0].off)
+    {
+      tmp = pos[0];
+      pos[0] = pos[1];
+      pos[1] = tmp;
+    }
+  if (pos[2].off < pos[1].off)
+    {
+      tmp = pos[2];
+      pos[2] = pos[1];
+      pos[1] = tmp;
+
+      if (pos[1].off < pos[0].off)
+	{
+	  tmp = pos[0];
+	  pos[0] = pos[1];
+	  pos[1] = tmp;
+	}
+    }
+
+  /* Do a quick check to see if the range is more than 32 bytes.
+     If so, we are definately not on the same cache line.
+     There may be a chance that range after alignment adjustment
+     is >= 32, so we recheck this again later on.  */
+
+  if (pos[2].off - pos[0].off >= 32)
+    return 0;
+
+  /* Check for different double-words.  We also check for
+     corner cases (including unaligned addresses).
+     Depending on the type of store, a max distance of
+     (pos[2] - pos[0]) < X will guarantee one double-word
+     overlap.  */
+		  
+  align_insn = 0;
+
+  /* The value of X depends on the insn at pos[i] for the
+     alignment, where i is the widest insn of store type.  */
+
+  if (((insn->insn_opcode >> OP_SH_RS) & OP_MASK_RS) == SP)
+    align = 8;
+  else
+    {
+      align = 1;
+      for (i = 0; i < 3; i++)
+	{
+	  if (align < pos[i].align_to)
+	    {
+	      align = pos[i].align_to;
+	      align_insn = i;
+	    }
+	}
+    }
+
+  /* Align everything using align_insn's alignment.
+     1. Change align_insn's offset to 0.
+     2. Align all insns to "align".  */
+
+  new_off[0] = pos[0].off;
+  new_off[1] = pos[1].off;
+  new_off[2] = pos[2].off;
+
+  if (((insn->insn_opcode >> OP_SH_RS) & OP_MASK_RS) != SP)
+    {
+      /* Take care of unaligned offsets.  */
+      new_off[0] -= pos[align_insn].off;
+      new_off[1] -= pos[align_insn].off;
+      new_off[2] -= pos[align_insn].off;
+    }
+
+  /* Determine max range using align_insn's offset.  */
+  range = 8 + align;
+  new_off[0] &= ~align + 1;
+  new_off[1] &= ~align + 1;
+  new_off[2] &= ~align + 1;
+
+  if (new_off[0] == new_off[1]
+      || new_off[0] == new_off[2]
+      || new_off[1] == new_off[2]
+      || new_off[2] - new_off[0] < range
+      || new_off[2] - new_off[1] >= 24
+      || new_off[1] - new_off[0] >= 24
+      || new_off[2] - new_off[0] >= 32)
+   return 0;
+
+  return 1;
+}
+
 /* Return the number of nops that would be needed if instruction INSN
    immediately followed the MAX_NOPS instructions given by HIST,
    where HIST[0] is the most recent instruction.  If INSN is null,
@@ -2687,7 +2893,8 @@
 
 static int
 nops_for_insn (const struct mips_cl_insn *hist,
-	       const struct mips_cl_insn *insn)
+	       const struct mips_cl_insn *insn,
+	       expressionS *address_expr)
 {
   int i, nops, tmp_nops;
 
@@ -2706,6 +2913,14 @@
 	nops = tmp_nops;
     }
 
+  if (mips_fix_24k)
+    {
+      tmp_nops = nops_for_24k (hist, insn, address_expr);
+      if (tmp_nops > nops)
+	nops = tmp_nops;
+    }
+
+
   return nops;
 }
 
@@ -2727,7 +2942,7 @@
   while (cursor > buffer)
     *--cursor = *va_arg (args, const struct mips_cl_insn *);
 
-  nops = nops_for_insn (buffer, NULL);
+  nops = nops_for_insn (buffer, NULL, NULL);
   va_end (args);
   return nops;
 }
@@ -2737,11 +2952,12 @@
 
 static int
 nops_for_insn_or_target (const struct mips_cl_insn *hist,
-			 const struct mips_cl_insn *insn)
+			 const struct mips_cl_insn *insn,
+			 expressionS *address_expr)
 {
   int nops, tmp_nops;
 
-  nops = nops_for_insn (hist, insn);
+  nops = nops_for_insn (hist, insn, address_expr);
   if (insn->insn_mo->pinfo & (INSN_UNCOND_BRANCH_DELAY
 			      | INSN_COND_BRANCH_DELAY
 			      | INSN_COND_BRANCH_LIKELY))
@@ -2846,8 +3062,8 @@
 	 benefit hand written assembly code, and does not seem worth
 	 it.  */
       int nops = (mips_optimize == 0
-		  ? nops_for_insn (history, NULL)
-		  : nops_for_insn_or_target (history, ip));
+		  ? nops_for_insn (history, NULL, address_expr)
+		  : nops_for_insn_or_target (history, ip, address_expr));
       if (nops > 0)
 	{
 	  fragS *old_frag;
@@ -2885,7 +3101,7 @@
   else if (mips_relax.sequence != 2 && prev_nop_frag != NULL)
     {
       /* Work out how many nops in prev_nop_frag are needed by IP.  */
-      int nops = nops_for_insn_or_target (history, ip);
+      int nops = nops_for_insn_or_target (history, ip, address_expr);
       gas_assert (nops <= prev_nop_frag_holds);
 
       /* Enforce NOPS as a minimum.  */
@@ -3273,7 +3489,7 @@
 		  && prev_insn_frag_type == rs_machine_dependent)
 	      /* Check for conflicts between the branch and the instructions
 		 before the candidate delay slot.  */
-	      || nops_for_insn (history + 1, ip) > 0
+	      || nops_for_insn (history + 1, ip, NULL) > 0
 	      /* Check for conflicts between the swapped sequence and the
 		 target of the branch.  */
 	      || nops_for_sequence (2, history + 1, ip, history) > 0
@@ -3374,7 +3590,10 @@
 	      /* If the previous instruction is an ERET or
 		 DERET, avoid the swap.  */
               || (history[0].insn_opcode == INSN_ERET)
-              || (history[0].insn_opcode == INSN_DERET))
+              || (history[0].insn_opcode == INSN_DERET)
+  	      /* Don't swap if -mfix-24k and previous insn is a store.  */
+  	      || (mips_fix_24k
+ 		  && (prev_pinfo & INSN_STORE_MEMORY)))
 	    {
 	      if (mips_opts.mips16
 		  && (pinfo & INSN_UNCOND_BRANCH_DELAY)
@@ -3475,7 +3694,7 @@
 {
   if (! mips_opts.noreorder)
     {
-      int nops = nops_for_insn (history, NULL);
+      int nops = nops_for_insn (history, NULL, NULL);
       if (nops > 0)
 	{
 	  while (nops-- > 0)
@@ -3503,7 +3722,7 @@
       /* Insert any nops that might be needed between the .set noreorder
 	 block and the previous instructions.  We will later remove any
 	 nops that turn out not to be needed.  */
-      nops = nops_for_insn (history, NULL);
+      nops = nops_for_insn (history, NULL, NULL);
       if (nops > 0)
 	{
 	  if (mips_optimize != 0)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]