This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Separate macro-fusible instructions from not for -mbranches-within-32B-boundaries


On Mon, Mar 2, 2020 at 4:32 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 02.03.2020 05:05, Hongtao Liu wrote:
> > Update patch.
>
> >+enum mf_jcc_kind
> >+  {
> >+    mf_jcc_jo = 0,  /* base opcode 0x70  */
>
> Is there a reason for the " = 0" here? Without is, the standard
> still mandates the value to be zero.
>
Yes.
I saw some other places in tc-i386.c where first member of enum is
assigned as 0, and also some without initializer, so i guess when the
enum variable is used as left value such as calculated by (baseopcode
& 0x0e ) >>1, it should be explicitly assigned, otherwise not.
Just my guess.
> >+enum mf_cmp_kind
> >+  {
> >+    mf_cmp_test,  /* test */
> >+    mf_cmp_cmp,  /* cmp */
> >+    mf_cmp_and,  /* and */
> >+    mf_cmp_alu,  /* add/sub */
> >+    mf_cmp_incdec  /* inc/dec */
> >+  };
>
> I saw your reply to my question of why this is 5 instead of 3
> enumerators. Yet I think this being a workaround for something
> that hopefully will be fixed in newer hardware suggests to go
> with the minimal necessary set first, and split enumerators
> only if indeed needed down the road.
Done.
>
> >@@ -8503,6 +8557,9 @@ add_branch_padding_frag_p (enum align_branch_kind *branch_p)
> >       }
> >       else
> >       {
> >+        /* Because J<cc> and JN<cc> share same group in macro-fusible table,
> >+           igore the lowest bit.  */
> >+        *mf_jcc_p = (i.tm.base_opcode - 0x70) >> 1;
>
> May I suggest to use (i.tm.base_opcode & 0x0e) >> 1, to be independent
> of the insn variant presently in i.tm (after all we do dynamically
> update it after copying from the template)?
>
Done.

Update my patch.

> Thanks for doing the other adjustments.
>
> Jan



--
BR,
Hongtao
From 1c33bac7687fd2b56dcc67a5d8f523151f45d42d Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Wed, 26 Feb 2020 14:57:04 +0800
Subject: [PATCH] According to intel SDM manual, not all compare flag-modifying
 instructions are marcro-fusible with subsequent jcc instruction. For those
 not, -mbranches-within-32B-boundaries need't align them as FUSED_JCC_PADDING,
 only jcc itself need to be aligned.

Here are 2 restrictions which separate macro-fusible instruction from not

Restriction 1:
If TEST/AND/CMP/ADD/SUB/INC/DEC is one of the following format:

   cmp m, imm
   add m, imm
   sub m, imm
   test m, imm
   and m, imm
   inc m
   dec m

it is unfusible with any jcc instruction.

Restriction 2:
/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
   Note it also works for Skylake and Cascadelake.
---------------------------------------------------------------------
|   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
| ------  | ----------- | ------- | -------- |
|   Jo    |      N      |    N    |     Y    |
|   Jno   |      N      |    N    |     Y    |
|  Jc/Jb  |      Y      |    N    |     Y    |
| Jae/Jnb |      Y      |    N    |     Y    |
|  Je/Jz  |      Y      |    Y    |     Y    |
| Jne/Jnz |      Y      |    Y    |     Y    |
| Jna/Jbe |      Y      |    N    |     Y    |
| Ja/Jnbe |      Y      |    N    |     Y    |
|   Js    |      N      |    N    |     Y    |
|   Jns   |      N      |    N    |     Y    |
|  Jp/Jpe |      N      |    N    |     Y    |
| Jnp/Jpo |      N      |    N    |     Y    |
| Jl/Jnge |      Y      |    Y    |     Y    |
| Jge/Jnl |      Y      |    Y    |     Y    |
| Jle/Jng |      Y      |    Y    |     Y    |
| Jg/Jnle |      Y      |    Y    |     Y    |

Update maybe_fused_with_jcc_p to check if operands of CMP like instructions
can be fused with condition jump.

	* gas/config/tc-i386.h (i386_tc_frag_data): Add member mf_type.
	(TC_FRAG_INIT): Init mf_type.
	* gas/config/tc-i386.c (enum mf_jcc_kind): New enum.
	(enum mf_cmp_kind): Ditto.
	(maybe_fused_with_jcc_p): Add argument mf_cmp_p to get
	mf_type of corresponding instructons, exclude unfusible
	instructions.
	(add_fused_jcc_padding_frag_p): Likewise.
	(add_branch_padding_frag_p): Likewise.
	(output_insn): Record mf_type for corresponding instructions.
	(i386_macro_fusible_p): New function.
	(i386_next_fusible_jcc_frag): Rename from i386_next_jcc_frag,
	add argument cmp_fragP to return next fusible jcc frag only.
	(i386_classify_machine_dependant_frag): Seperate macro-fusible
	instructions from condition jump.
	* gas/testsuite/gas/i386/align-branch-9.s: New file.
	* gas/testsuite/gas/i386/align-branch-9.d: Ditto.
	* gas/testsuite/gas/i386/x86-64-align-branch-9.s: Ditto.
	* gas/testsuite/gas/i386/x86-64-align-branch-9.d: Ditto.
	* gas/testsuite/gas/i386/i386.exp: Run new tests.
---
 gas/config/tc-i386.c                          | 165 ++++++++++++++----
 gas/config/tc-i386.h                          |   2 +
 gas/testsuite/gas/i386/align-branch-9.d       |  78 +++++++++
 gas/testsuite/gas/i386/align-branch-9.s       |  74 ++++++++
 gas/testsuite/gas/i386/i386.exp               |   2 +
 .../gas/i386/x86-64-align-branch-9.d          |  46 +++++
 .../gas/i386/x86-64-align-branch-9.s          |  43 +++++
 7 files changed, 378 insertions(+), 32 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/align-branch-9.d
 create mode 100644 gas/testsuite/gas/i386/align-branch-9.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-align-branch-9.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-align-branch-9.s

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 62b7cfbe6c..dc745aa7d2 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -687,6 +687,27 @@ static unsigned int align_branch = (align_branch_jcc_bit
 				    | align_branch_fused_bit
 				    | align_branch_jmp_bit);
 
+/* Types of condition jump used by macro-fusion.  */
+enum mf_jcc_kind
+  {
+    mf_jcc_jo = 0,  /* base opcode 0x70  */
+    mf_jcc_jc,      /* base opcode 0x72  */
+    mf_jcc_je,      /* base opcode 0x74  */
+    mf_jcc_jna,     /* base opcode 0x76  */
+    mf_jcc_js,      /* base opcode 0x78  */
+    mf_jcc_jp,      /* base opcode 0x7a  */
+    mf_jcc_jl,      /* base opcode 0x7c  */
+    mf_jcc_jle,     /* base opcode 0x7e  */
+  };
+
+/* Types of compare flag-modifying insntructions used by macro-fusion.  */
+enum mf_cmp_kind
+  {
+    mf_cmp_test_and,  /* test/cmp */
+    mf_cmp_alu_cmp,  /* add/sub/cmp */
+    mf_cmp_incdec  /* inc/dec */
+  };
+
 /* The maximum padding size for fused jcc.  CMP like instruction can
    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
    prefixes.   */
@@ -8374,10 +8395,22 @@ encoding_length (const fragS *start_frag, offsetT start_off,
 }
 
 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
-   be macro-fused with conditional jumps.  */
+   be macro-fused with conditional jumps.
+   NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
+   or is one of the following format:
+
+    cmp m, imm
+    add m, imm
+    sub m, imm
+   test m, imm
+    and m, imm
+    inc m
+    dec m
+
+   it is unfusible.  */
 
 static int
-maybe_fused_with_jcc_p (void)
+maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
 {
   /* No RIP address.  */
   if (i.base_reg && i.base_reg->reg_num == RegIP)
@@ -8387,36 +8420,54 @@ maybe_fused_with_jcc_p (void)
   if (is_any_vex_encoding (&i.tm))
     return 0;
 
-  /* and, add, sub with destination register.  */
-  if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
-      || i.tm.base_opcode <= 5
+  /* add, sub without add/sub m, imm.  */
+  if (i.tm.base_opcode <= 5
       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
       || ((i.tm.base_opcode | 3) == 0x83
-	  && ((i.tm.extension_opcode | 1) == 0x5
+	  && (i.tm.extension_opcode == 0x5
 	      || i.tm.extension_opcode == 0x0)))
-    return (i.types[1].bitfield.class == Reg
-	    || i.types[1].bitfield.instance == Accum);
+    {
+      *mf_cmp_p = mf_cmp_alu_cmp;
+      return !(i.mem_operands && i.imm_operands);
+    }
 
-  /* test, cmp with any register.  */
+  /* and without and m, imm.  */
+  if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
+      || ((i.tm.base_opcode | 3) == 0x83
+	  && i.tm.extension_opcode == 0x4))
+    {
+      *mf_cmp_p = mf_cmp_test_and;
+      return !(i.mem_operands && i.imm_operands);
+    }
+
+  /* test without test m imm.  */
   if ((i.tm.base_opcode | 1) == 0x85
       || (i.tm.base_opcode | 1) == 0xa9
       || ((i.tm.base_opcode | 1) == 0xf7
-	  && i.tm.extension_opcode == 0)
-      || (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
+	  && i.tm.extension_opcode == 0))
+    {
+      *mf_cmp_p = mf_cmp_test_and;
+      return !(i.mem_operands && i.imm_operands);
+    }
+
+  /* cmp without cmp m, imm.  */
+  if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
       || ((i.tm.base_opcode | 3) == 0x83
 	  && (i.tm.extension_opcode == 0x7)))
-    return (i.types[0].bitfield.class == Reg
-	    || i.types[0].bitfield.instance == Accum
-	    || i.types[1].bitfield.class == Reg
-	    || i.types[1].bitfield.instance == Accum);
+    {
+      *mf_cmp_p = mf_cmp_alu_cmp;
+      return !(i.mem_operands && i.imm_operands);
+    }
 
-  /* inc, dec with any register.   */
+  /* inc, dec without inc/dec m.   */
   if ((i.tm.cpu_flags.bitfield.cpuno64
        && (i.tm.base_opcode | 0xf) == 0x4f)
       || ((i.tm.base_opcode | 1) == 0xff
 	  && i.tm.extension_opcode <= 0x1))
-    return (i.types[0].bitfield.class == Reg
-	    || i.types[0].bitfield.instance == Accum);
+    {
+      *mf_cmp_p = mf_cmp_incdec;
+      return !i.mem_operands;
+    }
 
   return 0;
 }
@@ -8424,7 +8475,7 @@ maybe_fused_with_jcc_p (void)
 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
 
 static int
-add_fused_jcc_padding_frag_p (void)
+add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
 {
   /* NB: Don't work with COND_JUMP86 without i386.  */
   if (!align_branch_power
@@ -8433,7 +8484,7 @@ add_fused_jcc_padding_frag_p (void)
       || !(align_branch & align_branch_fused_bit))
     return 0;
 
-  if (maybe_fused_with_jcc_p ())
+  if (maybe_fused_with_jcc_p (mf_cmp_p))
     {
       if (last_insn.kind == last_insn_other
 	  || last_insn.seg != now_seg)
@@ -8481,7 +8532,8 @@ add_branch_prefix_frag_p (void)
 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
 
 static int
-add_branch_padding_frag_p (enum align_branch_kind *branch_p)
+add_branch_padding_frag_p (enum align_branch_kind *branch_p,
+			   enum mf_jcc_kind *mf_jcc_p)
 {
   int add_padding;
 
@@ -8503,6 +8555,9 @@ add_branch_padding_frag_p (enum align_branch_kind *branch_p)
 	}
       else
 	{
+	  /* Because J<cc> and JN<cc> share same group in macro-fusible table,
+	     igore the lowest bit.  */
+	  *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
 	  *branch_p = align_branch_jcc;
 	  if ((align_branch & align_branch_jcc_bit))
 	    add_padding = 1;
@@ -8573,6 +8628,10 @@ output_insn (void)
   offsetT insn_start_off;
   fragS *fragP = NULL;
   enum align_branch_kind branch = align_branch_none;
+  /* The initializer is arbitrary just to avoid uninitialized error.
+     it's actually either assigned in add_branch_padding_frag_p
+     or never be used.  */
+  enum mf_jcc_kind mf_jcc = mf_jcc_jo;
 
 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
   if (IS_ELF && x86_used_note)
@@ -8665,7 +8724,7 @@ output_insn (void)
   insn_start_frag = frag_now;
   insn_start_off = frag_now_fix ();
 
-  if (add_branch_padding_frag_p (&branch))
+  if (add_branch_padding_frag_p (&branch, &mf_jcc))
     {
       char *p;
       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
@@ -8686,6 +8745,7 @@ output_insn (void)
 		ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
 		NULL, 0, p);
 
+      fragP->tc_frag_data.mf_type = mf_jcc;
       fragP->tc_frag_data.branch_type = branch;
       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
     }
@@ -8705,6 +8765,7 @@ output_insn (void)
       unsigned char *q;
       unsigned int j;
       unsigned int prefix;
+      enum mf_cmp_kind mf_cmp;
 
       if (avoid_fence
 	  && (i.tm.base_opcode == 0xfaee8
@@ -8731,7 +8792,7 @@ output_insn (void)
       if (branch)
 	/* Skip if this is a branch.  */
 	;
-      else if (add_fused_jcc_padding_frag_p ())
+      else if (add_fused_jcc_padding_frag_p (&mf_cmp))
 	{
 	  /* Make room for padding.  */
 	  frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
@@ -8743,6 +8804,7 @@ output_insn (void)
 		    ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
 		    NULL, 0, p);
 
+	  fragP->tc_frag_data.mf_type = mf_cmp;
 	  fragP->tc_frag_data.branch_type = align_branch_fused;
 	  fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
 	}
@@ -10948,6 +11010,42 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
 }
 #endif
 
+/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
+   Note also work for Skylake and Cascadelake.
+---------------------------------------------------------------------
+|   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
+| ------  | ----------- | ------- | -------- |
+|   Jo    |      N      |    N    |     Y    |
+|   Jno   |      N      |    N    |     Y    |
+|  Jc/Jb  |      Y      |    N    |     Y    |
+| Jae/Jnb |      Y      |    N    |     Y    |
+|  Je/Jz  |      Y      |    Y    |     Y    |
+| Jne/Jnz |      Y      |    Y    |     Y    |
+| Jna/Jbe |      Y      |    N    |     Y    |
+| Ja/Jnbe |      Y      |    N    |     Y    |
+|   Js    |      N      |    N    |     Y    |
+|   Jns   |      N      |    N    |     Y    |
+|  Jp/Jpe |      N      |    N    |     Y    |
+| Jnp/Jpo |      N      |    N    |     Y    |
+| Jl/Jnge |      Y      |    Y    |     Y    |
+| Jge/Jnl |      Y      |    Y    |     Y    |
+| Jle/Jng |      Y      |    Y    |     Y    |
+| Jg/Jnle |      Y      |    Y    |     Y    |
+---------------------------------------------------------------------  */
+static int
+i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
+{
+  if (mf_cmp == mf_cmp_alu_cmp)
+    return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
+	    || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
+  if (mf_cmp == mf_cmp_incdec)
+    return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
+	    || mf_jcc == mf_jcc_jle);
+  if (mf_cmp == mf_cmp_test_and)
+    return 1;
+  return 0;
+}
+
 /* Return the next non-empty frag.  */
 
 static fragS *
@@ -10967,20 +11065,23 @@ i386_next_non_empty_frag (fragS *fragP)
 /* Return the next jcc frag after BRANCH_PADDING.  */
 
 static fragS *
-i386_next_jcc_frag (fragS *fragP)
+i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
 {
-  if (!fragP)
+  fragS *branch_fragP;
+  if (!pad_fragP)
     return NULL;
 
-  if (fragP->fr_type == rs_machine_dependent
-      && (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
+  if (pad_fragP->fr_type == rs_machine_dependent
+      && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
 	  == BRANCH_PADDING))
     {
-      fragP = i386_next_non_empty_frag (fragP);
-      if (fragP->fr_type != rs_machine_dependent)
+      branch_fragP = i386_next_non_empty_frag (pad_fragP);
+      if (branch_fragP->fr_type != rs_machine_dependent)
 	return NULL;
-      if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP)
-	return fragP;
+      if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
+	  && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
+				   pad_fragP->tc_frag_data.mf_type))
+	return branch_fragP;
     }
 
   return NULL;
@@ -11025,7 +11126,7 @@ i386_classify_machine_dependent_frag (fragS *fragP)
 	       */
 	    cmp_fragP = i386_next_non_empty_frag (next_fragP);
 	    pad_fragP = i386_next_non_empty_frag (cmp_fragP);
-	    branch_fragP = i386_next_jcc_frag (pad_fragP);
+	    branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
 	    if (branch_fragP)
 	      {
 		/* The BRANCH_PADDING frag is merged with the
diff --git a/gas/config/tc-i386.h b/gas/config/tc-i386.h
index 845b3901d4..93678c2282 100644
--- a/gas/config/tc-i386.h
+++ b/gas/config/tc-i386.h
@@ -273,6 +273,7 @@ struct i386_tc_frag_data
   unsigned char prefix_length;
   unsigned char default_prefix;
   unsigned char cmp_size;
+  unsigned int mf_type : 3;
   unsigned int classified : 1;
   unsigned int branch_type : 3;
 };
@@ -299,6 +300,7 @@ struct i386_tc_frag_data
      (FRAGP)->tc_frag_data.cmp_size = 0;			\
      (FRAGP)->tc_frag_data.classified = 0;			\
      (FRAGP)->tc_frag_data.branch_type = 0;			\
+     (FRAGP)->tc_frag_data.mf_type = 0;			\
    }								\
  while (0)
 
diff --git a/gas/testsuite/gas/i386/align-branch-9.d b/gas/testsuite/gas/i386/align-branch-9.d
new file mode 100644
index 0000000000..6340817d04
--- /dev/null
+++ b/gas/testsuite/gas/i386/align-branch-9.d
@@ -0,0 +1,78 @@
+#as: -mbranches-within-32B-boundaries
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <foo>:
+   0:	65 a3 01 00 00 00    	mov    %eax,%gs:0x1
+   6:	55                   	push   %ebp
+   7:	55                   	push   %ebp
+   8:	55                   	push   %ebp
+   9:	55                   	push   %ebp
+   a:	89 e5                	mov    %esp,%ebp
+   c:	89 7d f8             	mov    %edi,-0x8\(%ebp\)
+   f:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  12:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  15:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  18:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  1b:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  1e:	39 c5                	cmp    %eax,%ebp
+  20:	70 62                	jo     84 <foo\+0x84>
+  22:	89 73 f4             	mov    %esi,-0xc\(%ebx\)
+  25:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  28:	89 7d f8             	mov    %edi,-0x8\(%ebp\)
+  2b:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  2e:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  31:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  34:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  37:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  3a:	5d                   	pop    %ebp
+  3b:	5d                   	pop    %ebp
+  3c:	5d                   	pop    %ebp
+  3d:	74 45                	je     84 <foo\+0x84>
+  3f:	5d                   	pop    %ebp
+  40:	74 42                	je     84 <foo\+0x84>
+  42:	89 44 24 fc          	mov    %eax,-0x4\(%esp\)
+  46:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  49:	89 7d f8             	mov    %edi,-0x8\(%ebp\)
+  4c:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  4f:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  52:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  55:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  58:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  5b:	5d                   	pop    %ebp
+  5c:	eb 2c                	jmp    8a <foo\+0x8a>
+  5e:	66 90                	xchg   %ax,%ax
+  60:	eb 28                	jmp    8a <foo\+0x8a>
+  62:	eb 26                	jmp    8a <foo\+0x8a>
+  64:	89 45 fc             	mov    %eax,-0x4\(%ebp\)
+  67:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  6a:	89 7d f8             	mov    %edi,-0x8\(%ebp\)
+  6d:	5d                   	pop    %ebp
+  6e:	5d                   	pop    %ebp
+  6f:	40                   	inc    %eax
+  70:	72 12                	jb     84 <foo\+0x84>
+  72:	36 36 89 45 fc       	ss mov %eax,%ss:-0x4\(%ebp\)
+  77:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  7a:	89 7d f8             	mov    %edi,-0x8\(%ebp\)
+  7d:	89 75 f4             	mov    %esi,-0xc\(%ebp\)
+  80:	21 c3                	and    %eax,%ebx
+  82:	7c 06                	jl     8a <foo\+0x8a>
+  84:	8b 45 f4             	mov    -0xc\(%ebp\),%eax
+  87:	89 45 fc             	mov    %eax,-0x4\(%ebp\)
+  8a:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  90:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  96:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  9c:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  a2:	89 75 0c             	mov    %esi,0xc\(%ebp\)
+  a5:	e9 fc ff ff ff       	jmp    a6 <foo\+0xa6>
+  aa:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  b0:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  b6:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  bc:	89 b5 50 fb ff ff    	mov    %esi,-0x4b0\(%ebp\)
+  c2:	89 75 00             	mov    %esi,0x0\(%ebp\)
+  c5:	74 c3                	je     8a <foo\+0x8a>
+  c7:	74 c1                	je     8a <foo\+0x8a>
+#pass
diff --git a/gas/testsuite/gas/i386/align-branch-9.s b/gas/testsuite/gas/i386/align-branch-9.s
new file mode 100644
index 0000000000..357abe30f9
--- /dev/null
+++ b/gas/testsuite/gas/i386/align-branch-9.s
@@ -0,0 +1,74 @@
+  .text
+  .globl  foo
+  .p2align  4
+foo:
+  movl	%eax, %gs:0x1
+  pushl  %ebp
+  pushl  %ebp
+  pushl  %ebp
+  pushl  %ebp
+  movl  %esp, %ebp
+  movl  %edi, -8(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  cmp  %eax, %ebp
+  jo  .L_2
+  movl  %esi, -12(%ebx)
+  movl  %esi, -12(%ebp)
+  movl  %edi, -8(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  popl  %ebp
+  popl  %ebp
+  popl  %ebp
+  je  .L_2
+  popl  %ebp
+  je  .L_2
+  movl  %eax, -4(%esp)
+  movl  %esi, -12(%ebp)
+  movl  %edi, -8(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %esi, -12(%ebp)
+  popl  %ebp
+  jmp  .L_3
+  jmp  .L_3
+  jmp  .L_3
+  movl  %eax, -4(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %edi, -8(%ebp)
+  popl  %ebp
+  popl  %ebp
+  inc  %eax
+  jc  .L_2
+  movl  %eax, -4(%ebp)
+  movl  %esi, -12(%ebp)
+  movl  %edi, -8(%ebp)
+  movl  %esi, -12(%ebp)
+  and  %eax, %ebx
+  jl  .L_3
+.L_2:
+  movl  -12(%ebp), %eax
+  movl  %eax, -4(%ebp)
+.L_3:
+  movl  %esi, -1200(%ebp)
+  movl  %esi, -1200(%ebp)
+  movl  %esi, -1200(%ebp)
+  movl  %esi, -1200(%ebp)
+  movl  %esi, 12(%ebp)
+  jmp  bar
+  movl  %esi, -1200(%ebp)
+  movl  %esi, -1200(%ebp)
+  movl  %esi, -1200(%ebp)
+  movl  %esi, -1200(%ebp)
+  movl  %esi, (%ebp)
+  je .L_3
+  je .L_3
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 685e62ea72..8fc621f2bb 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -525,6 +525,7 @@ if [expr ([istarget "i*86-*-*"] ||  [istarget "x86_64-*-*"]) && [gas_32_check]]
     run_dump_test "align-branch-6"
     run_dump_test "align-branch-7"
     run_dump_test "align-branch-8"
+    run_dump_test "align-branch-9"
 
     # These tests require support for 8 and 16 bit relocs,
     # so we only run them for ELF and COFF targets.
@@ -1100,6 +1101,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t
     run_dump_test "x86-64-align-branch-6"
     run_dump_test "x86-64-align-branch-7"
     run_dump_test "x86-64-align-branch-8"
+    run_dump_test "x86-64-align-branch-9"
 
     if { ![istarget "*-*-aix*"]
       && ![istarget "*-*-beos*"]
diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.d b/gas/testsuite/gas/i386/x86-64-align-branch-9.d
new file mode 100644
index 0000000000..1041fd0483
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.d
@@ -0,0 +1,46 @@
+#as: -mbranches-within-32B-boundaries
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <foo>:
+   0:	c1 e9 02             	shr    \$0x2,%ecx
+   3:	c1 e9 02             	shr    \$0x2,%ecx
+   6:	c1 e9 02             	shr    \$0x2,%ecx
+   9:	89 d1                	mov    %edx,%ecx
+   b:	31 c0                	xor    %eax,%eax
+   d:	c1 e9 02             	shr    \$0x2,%ecx
+  10:	c1 e9 02             	shr    \$0x2,%ecx
+  13:	c1 e9 02             	shr    \$0x2,%ecx
+  16:	c1 e9 02             	shr    \$0x2,%ecx
+  19:	c1 e9 02             	shr    \$0x2,%ecx
+  1c:	c1 e9 02             	shr    \$0x2,%ecx
+  1f:	80 fa 02             	cmp    \$0x2,%dl
+  22:	70 df                	jo     3 <foo\+0x3>
+  24:	2e 2e 2e 2e 31 c0    	cs cs cs cs xor %eax,%eax
+  2a:	c1 e9 02             	shr    \$0x2,%ecx
+  2d:	c1 e9 02             	shr    \$0x2,%ecx
+  30:	c1 e9 02             	shr    \$0x2,%ecx
+  33:	89 d1                	mov    %edx,%ecx
+  35:	31 c0                	xor    %eax,%eax
+  37:	c1 e9 02             	shr    \$0x2,%ecx
+  3a:	c1 e9 02             	shr    \$0x2,%ecx
+  3d:	c1 e9 02             	shr    \$0x2,%ecx
+  40:	f6 c2 02             	test   \$0x2,%dl
+  43:	75 e8                	jne    2d <foo\+0x2d>
+  45:	31 c0                	xor    %eax,%eax
+  47:	c1 e9 02             	shr    \$0x2,%ecx
+  4a:	c1 e9 02             	shr    \$0x2,%ecx
+  4d:	89 d1                	mov    %edx,%ecx
+  4f:	c1 e9 02             	shr    \$0x2,%ecx
+  52:	c1 e9 02             	shr    \$0x2,%ecx
+  55:	89 d1                	mov    %edx,%ecx
+  57:	c1 e9 02             	shr    \$0x2,%ecx
+  5a:	89 d1                	mov    %edx,%ecx
+  5c:	31 c0                	xor    %eax,%eax
+  5e:	ff c0                	inc    %eax
+  60:	76 cb                	jbe    2d <foo\+0x2d>
+  62:	31 c0                	xor    %eax,%eax
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.s b/gas/testsuite/gas/i386/x86-64-align-branch-9.s
new file mode 100644
index 0000000000..917579bda4
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.s
@@ -0,0 +1,43 @@
+	.text
+	.p2align 4,,15
+foo:
+	shrl	$2, %ecx
+.L1:
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	movl	%edx, %ecx
+	xorl	%eax, %eax
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	cmpb	$2, %dl
+	jo	.L1
+	xorl	%eax, %eax
+	shrl	$2, %ecx
+.L2:
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	movl	%edx, %ecx
+	xorl	%eax, %eax
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	testb	$2, %dl
+	jne	.L2
+	xorl	%eax, %eax
+.L3:
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	movl	%edx, %ecx
+	shrl	$2, %ecx
+	shrl	$2, %ecx
+	movl	%edx, %ecx
+	shrl	$2, %ecx
+	movl	%edx, %ecx
+	xorl	%eax, %eax
+	inc	%eax
+	jbe	.L2
+	xorl	%eax, %eax
-- 
2.18.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]