This is the mail archive of the
binutils@sources.redhat.com
mailing list for the binutils project.
Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
- From: "H. J. Lu" <hjl at lucon dot org>
- To: binutils at sources dot redhat dot com
- Date: Thu, 12 May 2005 09:37:32 -0700
- Subject: Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
- References: <20050512161141.GA5705@lucon.org>
On Thu, May 12, 2005 at 09:11:41AM -0700, H. J. Lu wrote:
> This patch will try to turn br into brl when it is possible.
>
Slot 0 has to be NOP only for BBB. Here is an update.
H.J.
----
2005-05-12 H.J. Lu <hongjiu.lu@intel.com>
PR 834
* elfxx-ia64.c (elfNN_ia64_relax_br): New.
(elfNN_ia64_relax_brl): Use it.
--- bfd/elfxx-ia64.c.brl 2005-05-07 06:58:12.000000000 -0700
+++ bfd/elfxx-ia64.c 2005-05-12 09:34:52.000000000 -0700
@@ -681,6 +681,114 @@ bfd_elfNN_ia64_after_parse (int itanium)
oor_branch_size = itanium ? sizeof (oor_ip) : sizeof (oor_brl);
}
+static bfd_boolean
+elfNN_ia64_relax_br (bfd_byte *contents, bfd_vma off)
+{
+ unsigned int template, t0, t1, t2, t3, br_code1, br_code2, mlx;
+ long br_slot;
+ bfd_byte *hit_addr;
+
+ hit_addr = (bfd_byte *) (contents + off);
+ br_slot = (long) hit_addr & 0x3;
+ hit_addr -= br_slot;
+ t0 = bfd_getl32 (hit_addr + 0);
+ t1 = bfd_getl32 (hit_addr + 4);
+ t2 = bfd_getl32 (hit_addr + 8);
+ t3 = bfd_getl32 (hit_addr + 12);
+
+ /* Check if we can turn br into brl. A label is always at the start
+ of the bundle. Even if there are predicates on NOPs, we still
+ perform this optimization. */
+ template = t0 & 0x1e;
+ switch (br_slot)
+ {
+ case 0:
+ /* Check if slot 1 and slot 2 are NOPs. Possible template is
+ BBB. We only need to check nop.b. */
+ if (!((t2 & 0x787e00) == 0x100000
+ && (t3 & 0xf0fc0000) == 0x20000000))
+ return FALSE;
+ br_code1 = (t0 & 0xffffffe0) >> 5 | (t1 & 0x1f) << 27;
+ br_code2 = (t1 & 0x3fe0) >> 5;
+ break;
+ case 1:
+ /* Check if slot 2 is NOP. Possible templates are MBB and BBB.
+ For BBB, slot 0 also has to be nop.b. */
+ if (!((template == 0x12 /* MBB */
+ && (t3 & 0xf0fc0000) == 0x20000000)
+ || (template == 0x16 /* BBB */
+ && (t1 & 0x3c3f) == 0x800
+ && (t3 & 0xf0fc0000) == 0x20000000)))
+ return FALSE;
+ br_code1 = (t1 & 0xffffc000) >> 14 | (t2 & 0x3fff) << 18;
+ br_code2 = (t2 & 0x7fc000) >> 14;
+ break;
+ case 2:
+ /* Check if slot 1 is NOP. Possible templates are MIB, MBB, BBB,
+ MMB and MFB. For BBB, slot 0 also has to be nop.b. */
+ if (!((template == 0x10 /* MIB */
+ && (t2 & 0x7bff00) == 0x200)
+ || (template == 0x12 /* MBB */
+ && (t2 & 0x787e00) == 0x100000)
+ || (template == 0x16 /* BBB */
+ && (t1 & 0x3c3f) == 0x800
+ && (t2 & 0x787e00) == 0x100000)
+ || (template == 0x18 /* MMB */
+ && (t2 & 0x7bff00) == 0x200)
+ || (template == 0x1c /* MFB */
+ && (t2 & 0x78ff00) == 0x200)))
+ return FALSE;
+ br_code1 = (t2 & 0xff800000) >> 23 | (t3 & 0x7fffff) << 9;
+ br_code2 = (t3 & 0xff800000) >> 23;
+ break;
+ default:
+ /* It should never happen. */
+ abort ();
+ }
+
+ /* Check if we can turn br into brl. */
+ if (!(((br_code2 & 0x1e0) == 0xa0)
+ || ((br_code1 & 0x1c0) == 0 && (br_code2 & 0x1e0) == 0x80)))
+ return FALSE;
+
+ /* Turn br into brl by setting bit 40. */
+ br_code2 |= 0x100;
+
+ /* Turn the old bundle into a MLX bundle with the same stop-bit
+ variety. */
+ if (t0 & 0x1)
+ mlx = 0x5;
+ else
+ mlx = 0x4;
+
+ if (template == 0x16)
+ {
+ /* For BBB, we need to put nop.m in slot 0 and keep the original
+ predicate. */
+ t0 &= 0x7e0;
+ t1 &= ~0x3fff;
+ t1 |= 0x1;
+ }
+ else
+ {
+ /* Keep the original instruction in slot 0. */
+ t0 &= 0xffffffe0;
+ t1 &= 0x3fff;
+ }
+
+ t0 |= mlx;
+
+ /* Put brl in slot 1. */
+ t2 = br_code1 << 23;
+ t3 = (br_code1 >> 9) | (br_code2 << 23);
+
+ bfd_putl32 (t0, hit_addr);
+ bfd_putl32 (t1, hit_addr + 4);
+ bfd_putl32 (t2, hit_addr + 8);
+ bfd_putl32 (t3, hit_addr + 12);
+ return TRUE;
+}
+
static void
elfNN_ia64_relax_brl (bfd_byte *contents, bfd_vma off)
{
@@ -985,6 +1093,16 @@ elfNN_ia64_relax_section (abfd, sec, lin
}
else if (r_type == R_IA64_PCREL60B)
continue;
+ else if (elfNN_ia64_relax_br (contents, roff))
+ {
+ irel->r_info
+ = ELFNN_R_INFO (ELFNN_R_SYM (irel->r_info),
+ R_IA64_PCREL60B);
+
+ /* Make the relocation offset point to slot 1. */
+ irel->r_offset = (irel->r_offset & ~((bfd_vma) 0x3)) + 1;
+ continue;
+ }
/* We can't put a trampoline in a .init/.fini section. Issue
an error. */