[PATCH] Reorder MSA branches
Chao-Ying Fu
Chao-Ying.Fu@imgtec.com
Fri Oct 18 00:18:00 GMT 2013
Hi All,
I forgot to support the reordering of MSA branches.
Here are two versions of patches.
Version 1. We disable the reordering of MSA branches conservatively.
2013-10-17 Chao-ying Fu <Chao-ying.Fu@imgtec.com>
* config/tc-mips.c (can_swap_branch_p): Don't swap MSA branches.
Index: gas/config/tc-mips.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-mips.c,v
retrieving revision 1.594
diff -u -p -r1.594 tc-mips.c
--- gas/config/tc-mips.c 14 Oct 2013 18:50:54 -0000 1.594
+++ gas/config/tc-mips.c 17 Oct 2013 23:43:58 -0000
@@ -6144,6 +6144,10 @@ can_swap_branch_p (struct mips_cl_insn *
if (gpr_read & prev_gpr_write)
return FALSE;
+ /* If the branch reads MSA registers, we won't swap conservatively. */
+ if (insn_reg_mask (ip, 1 << OP_REG_MSA, insn_read_mask (ip->insn_mo)))
+ return FALSE;
+
/* If the branch writes a register that the previous
instruction sets, we can not swap. */
gpr_write = gpr_write_mask (ip);
Version 2.
Because the MSA registers share the FP registers (if the FPU is present),
we combine FP and MSA register dependences together.
2013-10-17 Chao-ying Fu <Chao-ying.Fu@imgtec.com>
* config/tc-mips.c (fpr_read_mask): Test MSA registers.
(fpr_write_mask): Test MSA registers.
(can_swap_branch_p): Check fpr write followed by fpr read.
Index: gas/config/tc-mips.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-mips.c,v
retrieving revision 1.594
diff -u -p -r1.594 tc-mips.c
--- gas/config/tc-mips.c 14 Oct 2013 18:50:54 -0000 1.594
+++ gas/config/tc-mips.c 17 Oct 2013 23:38:59 -0000
@@ -4161,7 +4161,8 @@ fpr_read_mask (const struct mips_cl_insn
unsigned long pinfo;
unsigned int mask;
- mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC),
+ mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC)
+ | (1 << OP_REG_MSA),
insn_read_mask (ip->insn_mo));
pinfo = ip->insn_mo->pinfo;
/* Conservatively treat all operands to an FP_D instruction are doubles.
@@ -4179,7 +4180,8 @@ fpr_write_mask (const struct mips_cl_ins
unsigned long pinfo;
unsigned int mask;
- mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC),
+ mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC)
+ | (1 << OP_REG_MSA),
insn_write_mask (ip->insn_mo));
pinfo = ip->insn_mo->pinfo;
/* Conservatively treat all operands to an FP_D instruction are doubles.
@@ -6070,6 +6072,7 @@ can_swap_branch_p (struct mips_cl_insn *
{
unsigned long pinfo, pinfo2, prev_pinfo, prev_pinfo2;
unsigned int gpr_read, gpr_write, prev_gpr_read, prev_gpr_write;
+ unsigned int fpr_read, prev_fpr_write;
/* -O2 and above is required for this optimization. */
if (mips_optimize < 2)
@@ -6144,6 +6147,11 @@ can_swap_branch_p (struct mips_cl_insn *
if (gpr_read & prev_gpr_write)
return FALSE;
+ fpr_read = fpr_read_mask (ip);
+ prev_fpr_write = fpr_write_mask (&history[0]);
+ if (fpr_read & prev_fpr_write)
+ return FALSE;
+
/* If the branch writes a register that the previous
instruction sets, we can not swap. */
gpr_write = gpr_write_mask (ip);
# Testing
# cat r.s
.set reorder
test:
fsune.d $w0,$w1,$w2
bz.d $w0, test
fsune.d $w0,$w1,$w2
bz.d $w1, test
fsune.d $w0,$w1,$w2
bz.d $w2, test
add.s $f0,$f1,$f2
bz.d $w0, test
add.s $f0,$f1,$f2
bz.d $w1, test
add.s $f0,$f1,$f2
bz.d $w2, test
add.d $f0,$f2,$f4
bz.d $w0, test
add.d $f0,$f2,$f4
bz.d $w1, test
add.d $f0,$f2,$f4
bz.d $w2, test
# as-new r.s -o r.o -mmsa -mips32r2 -mfp64
# objdump -d r.o
00000000 <test>:
0: 7aa2081c fsune.d $w0,$w1,$w2
4: 4760fffe bz.d $w0,0 <test>
8: 00000000 nop
c: 4761fffc bz.d $w1,0 <test>
10: 7aa2081c fsune.d $w0,$w1,$w2
14: 4762fffa bz.d $w2,0 <test>
18: 7aa2081c fsune.d $w0,$w1,$w2
1c: 46020800 add.s $f0,$f1,$f2
20: 4760fff7 bz.d $w0,0 <test>
24: 00000000 nop
28: 4761fff5 bz.d $w1,0 <test>
2c: 46020800 add.s $f0,$f1,$f2
30: 4762fff3 bz.d $w2,0 <test>
34: 46020800 add.s $f0,$f1,$f2
38: 46241000 add.d $f0,$f2,$f4
3c: 4760fff0 bz.d $w0,0 <test>
40: 00000000 nop
44: 4761ffee bz.d $w1,0 <test>
48: 46241000 add.d $f0,$f2,$f4
4c: 4762ffec bz.d $w2,0 <test>
50: 46241000 add.d $f0,$f2,$f4
Any feedback? Which version is better? Thanks a lot!
Regards,
Chao-ying
More information about the Binutils
mailing list