[PATCH 1/2] i386: Generate lfence with load/indirect branch/ret [CVE-2020-0551]

Hongtao Liu crazylht@gmail.com
Wed Mar 25 09:27:48 GMT 2020


On Thu, Mar 12, 2020 at 12:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Mar 11, 2020 at 3:55 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> > On 10.03.2020 17:05, H.J. Lu wrote:
> > > @@ -4311,6 +4333,291 @@ optimize_encoding (void)
> > >      }
> > >  }
> > >
> > > +/* Return non-zero for load instruction.  */
> > > +
> > > +static int
> > > +load_insn_p (void)
> > > +{
> > > +  unsigned int dest;
> > > +  int any_vex_p = is_any_vex_encoding (&i.tm);
> > > +
> > > +  if (!any_vex_p)
> > > +    {
> > > +      /* lea  */
> > > +      if (i.tm.base_opcode == 0x8d)
> > > +     return 0;
> >
> > Also include INVLPG, CLFLUSH etc, and maybe some prefetches here?
> > (I'll mention the LEA-like MPX insns as well, but I think I can
> > predict your reply.)
>
> Hongtao, can you look into it?
>
> > > +      /* pop  */
> > > +      if ((i.tm.base_opcode & 0xfffffff8) == 0x58
> >
> > Mind using ~7 instead?
>
> Changed.
>
> > > +       || (i.tm.base_opcode == 0x8f && i.tm.extension_opcode == 0))
> > > +     return 1;
> >
> > What about segment register POPs, POPF, POPA, ENTER, and LEAVE?
>
> We decided that ENTER and LEAVE are safe.  Hongtao, can you look into others?
>
> > > +      /* movs, cmps, lods, scas.  */
> > > +      if ((i.tm.base_opcode >= 0xa4 && i.tm.base_opcode <= 0xa7)
> > > +       || (i.tm.base_opcode >= 0xac && i.tm.base_opcode <= 0xaf))
> >
> > This can be had with a single comparison:
> >
> >       if ((i.tm.base_opcode | 0xb) == 0xaf)
>
> Changed.
>
> > > +     return 1;
> > > +
> > > +      /* outs */
> > > +      if (i.tm.base_opcode == 0x6e || i.tm.base_opcode == 0x6f)
> >
> > And here:
> >
> >       if ((i.tm.base_opcode | 1) == 0x6f)
> >
> > Similar folding of comparisons may also be desirable further down.
>
> Changed,
>
> > Also, what about XLATB? What about implicit memory accesses done by
>
> Hongtao, can you look into it?
>
> > e.g. segment register loads? As to AMD-specific insns with implicit
> > memory operands (often accessed through rAX), should the doc
> > perhaps mention they're intentionally not covered?
>
> Yes, AMD specific insns are skipped.  Hongtao, can you look into it?
>
> > > +     return 1;
> > > +    }
> > > +
> > > +  /* No memory operand.  */
> > > +  if (!i.mem_operands)
> > > +    return 0;
> > > +
> > > +  if (any_vex_p)
> > > +    {
> > > +      /* vldmxcsr.  */
> > > +      if (i.tm.base_opcode == 0xae
> > > +       && i.tm.opcode_modifier.vex
> > > +       && i.tm.opcode_modifier.vexopcode == VEX0F
> > > +       && i.tm.extension_opcode == 2)
> > > +     return 1;
> > > +    }
> > > +  else
> > > +    {
> > > +      /* test, not, neg, mul, imul, div, idiv.  */
> > > +      if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
> > > +       && i.tm.extension_opcode != 1)
> > > +     return 1;
> > > +
> > > +      /* inc, dec.  */
> > > +      if ((i.tm.base_opcode == 0xfe || i.tm.base_opcode == 0xff)
> > > +       && i.tm.extension_opcode <= 1)
> > > +     return 1;
> > > +
> > > +      /* add, or, adc, sbb, and, sub, xor, cmp.  */
> > > +      if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
> > > +     return 1;
> > > +
> > > +      /* bt, bts, btr, btc.  */
> > > +      if (i.tm.base_opcode == 0xfba
> > > +       && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
> > > +     return 1;
> > > +
> > > +      /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
> > > +      if ((i.tm.base_opcode == 0xc0
> > > +        || i.tm.base_opcode == 0xc1
> > > +        || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
> > > +       && i.tm.extension_opcode != 6)
> > > +     return 1;
> > > +
> > > +      /* cmpxchg8b, cmpxchg16b, xrstors.  */
> > > +      if (i.tm.base_opcode == 0xfc7
> > > +       && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
> > > +     return 1;
> > > +
> > > +      /* fxrstor, ldmxcsr, xrstor.  */
> > > +      if (i.tm.base_opcode == 0xfae
> > > +       && (i.tm.extension_opcode == 1
> > > +           || i.tm.extension_opcode == 2
> > > +           || i.tm.extension_opcode == 5))
> > > +     return 1;
> > > +
> > > +      /* lgdt, lidt, lmsw.  */
> > > +      if (i.tm.base_opcode == 0xf01
> > > +       && (i.tm.extension_opcode == 2
> > > +           || i.tm.extension_opcode == 3
> > > +           || i.tm.extension_opcode == 6))
> > > +     return 1;
> > > +
> > > +      /* vmptrld */
> > > +      if (i.tm.base_opcode == 0xfc7
> > > +       && i.tm.extension_opcode == 6)
> > > +     return 1;
> > > +
> > > +      /* Check for x87 instructions.  */
> > > +      if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
> > > +     {
> > > +       /* Skip fst, fstp, fstenv, fstcw.  */
> > > +       if (i.tm.base_opcode == 0xd9
> > > +           && (i.tm.extension_opcode == 2
> > > +               || i.tm.extension_opcode == 3
> > > +               || i.tm.extension_opcode == 6
> > > +               || i.tm.extension_opcode == 7))
> > > +         return 0;
> > > +
> > > +       /* Skip fisttp, fist, fistp, fstp.  */
> > > +       if (i.tm.base_opcode == 0xdb
> > > +           && (i.tm.extension_opcode == 1
> > > +               || i.tm.extension_opcode == 2
> > > +               || i.tm.extension_opcode == 3
> > > +               || i.tm.extension_opcode == 7))
> > > +         return 0;
> > > +
> > > +       /* Skip fisttp, fst, fstp, fsave, fstsw.  */
> > > +       if (i.tm.base_opcode == 0xdd
> > > +           && (i.tm.extension_opcode == 1
> > > +               || i.tm.extension_opcode == 2
> > > +               || i.tm.extension_opcode == 3
> > > +               || i.tm.extension_opcode == 6
> > > +               || i.tm.extension_opcode == 7))
> > > +         return 0;
> > > +
> > > +       /* Skip fisttp, fist, fistp, fbstp, fistp.  */
> > > +       if (i.tm.base_opcode == 0xdf
> > > +           && (i.tm.extension_opcode == 1
> > > +               || i.tm.extension_opcode == 2
> > > +               || i.tm.extension_opcode == 3
> > > +               || i.tm.extension_opcode == 6
> > > +               || i.tm.extension_opcode == 7))
> > > +         return 0;
> > > +
> > > +       return 1;
> > > +     }
> > > +    }
> > > +
> > > +  dest = i.operands - 1;
> > > +
> > > +  /* Check fake imm8 operand and 3 source operands.  */
> > > +  if ((i.tm.opcode_modifier.immext
> > > +       || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
> > > +      && i.types[dest].bitfield.imm8)
> > > +    dest--;
> > > +
> > > +  /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd  */
> > > +  if (!any_vex_p
> > > +      && (i.tm.base_opcode == 0x0
> > > +       || i.tm.base_opcode == 0x1
> > > +       || i.tm.base_opcode == 0x8
> > > +       || i.tm.base_opcode == 0x9
> > > +       || i.tm.base_opcode == 0x10
> > > +       || i.tm.base_opcode == 0x11
> > > +       || i.tm.base_opcode == 0x18
> > > +       || i.tm.base_opcode == 0x19
> > > +       || i.tm.base_opcode == 0x20
> > > +       || i.tm.base_opcode == 0x21
> > > +       || i.tm.base_opcode == 0x28
> > > +       || i.tm.base_opcode == 0x29
> > > +       || i.tm.base_opcode == 0x30
> > > +       || i.tm.base_opcode == 0x31
> > > +       || i.tm.base_opcode == 0x38
> > > +       || i.tm.base_opcode == 0x39
> > > +       || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
> > > +       || i.tm.base_opcode == 0xfc0
> > > +       || i.tm.base_opcode == 0xfc1))
> > > +    return 1;
> >
> > Don't quite a few of these fit very well with ...
> >
>
> Changed.
>
> > > +  /* Check for load instruction.  */
> > > +  return (i.types[dest].bitfield.class != ClassNone
> > > +       || i.types[dest].bitfield.instance == Accum);
> >
> > ... this generic expression? It would seem to me that only TEST
> > and XCHG need special casing, for allowing either operand order.
> > Same seems to apply to quite a few of the special cases in the
> > big "else" block further up, and even its if() [vldmxcsr] part.
>
> Hongtao, can you look into it?
>

Many instruction take mem operand both as input and output, they also
need to be handled. But they're not fitting well in generic
expressions, because they either only 1 operand, or mem operand is in
the dest place.

> > > +static void
> > > +insert_lfence_before (void)
> > > +{
> > > +  char *p;
> > > +
> > > +  if (i.tm.base_opcode == 0xff
> > > +      && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
> >
> > Also exclude VEX- and alike encoded insn here again?
>
> I changed to:
>
> static void
> insert_lfence_before (void)
> {
>   char *p;
>
>   if (is_any_vex_encoding (&i.tm))
>     return;
>
> > > +    {
> > > +      /* Insert lfence before indirect branch if needed.  */
> > > +
> > > +      if (lfence_before_indirect_branch == lfence_branch_none)
> > > +     return;
> > > +
> > > +      if (i.operands != 1)
> > > +     abort ();
> > > +
> > > +      if (i.reg_operands == 1)
> > > +     {
> > > +       /* Indirect branch via register.  Don't insert lfence with
> > > +          -mlfence-after-load=yes.  */
> > > +       if (lfence_after_load
> > > +           || lfence_before_indirect_branch == lfence_branch_memory)
> > > +         return;
> > > +     }
> > > +      else if (i.mem_operands == 1
> > > +            && lfence_before_indirect_branch != lfence_branch_register)
> > > +     {
> > > +       as_warn (_("indirect branch `%s` over memory should be avoided"),
> > > +                i.tm.name);
> >
> > Perhaps drop "branch" and replace "over memory" by "with memory operand"?
>
> Changed.
>
> > > +       return;
> > > +     }
> > > +      else
> > > +     return;
> > > +
> > > +      if (last_insn.kind != last_insn_other
> > > +       && last_insn.seg == now_seg)
> > > +     {
> > > +       as_warn_where (last_insn.file, last_insn.line,
> > > +                      _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
> > > +                      last_insn.name, i.tm.name);
> > > +       return;
> > > +     }
> > > +
> > > +      p = frag_more (3);
> > > +      *p++ = 0xf;
> > > +      *p++ = 0xae;
> > > +      *p = 0xe8;
> > > +      return;
> > > +    }
> > > +
> > > +  /* Output orl/notl and lfence before ret.  */
> >
> > May I suggest to either drop the insn suffixes here (and below),
> > or make them correctly reflect the code below (which may also
> > produce q- or w-suffixed insns)?
>
> Changed.
>
> > > +  if (lfence_before_ret != lfence_before_ret_none
> > > +      && (i.tm.base_opcode == 0xc2
> > > +       || i.tm.base_opcode == 0xc3
> > > +       || i.tm.base_opcode == 0xca
> > > +       || i.tm.base_opcode == 0xcb))
> > > +    {
> > > +      if (last_insn.kind != last_insn_other
> > > +       && last_insn.seg == now_seg)
> > > +     {
> > > +       as_warn_where (last_insn.file, last_insn.line,
> > > +                      _("`%s` skips -mlfence-before-ret on `%s`"),
> > > +                      last_insn.name, i.tm.name);
> > > +       return;
> > > +     }
> > > +      if (lfence_before_ret == lfence_before_ret_or)
> > > +     {
> > > +       /* orl: 0x830c2400.  */
> > > +       p = frag_more ((flag_code == CODE_64BIT ? 1 : 0) + 4 + 3);
> > > +       if (flag_code == CODE_64BIT)
> > > +         *p++ = 0x48;
> >
> > Shouldn't this depend on RET's operand size? Likewise wouldn't you
> > also need to insert 0x66/0x67 in certain cases?
>
> Hongtao, can you look into it?
>

I suppose you mean OR's operand size?

> > > +       *p++ = 0x83;
> > > +       *p++ = 0xc;
> > > +       *p++ = 0x24;
> > > +       *p++ = 0x0;
> > > +     }
> > > +      else
> > > +     {
> > > +       p = frag_more ((flag_code == CODE_64BIT ? 2 : 0) + 6 + 3);
> > > +       /* notl: 0xf71424.  */
> > > +       if (flag_code == CODE_64BIT)
> > > +         *p++ = 0x48;
> > > +       *p++ = 0xf7;
> > > +       *p++ = 0x14;
> > > +       *p++ = 0x24;
> > > +       if (flag_code == CODE_64BIT)
> > > +         *p++ = 0x48;
> > > +       /* notl: 0xf71424.  */
> > > +       *p++ = 0xf7;
> > > +       *p++ = 0x14;
> > > +       *p++ = 0x24;
> >
> > When reading the description I was wondering about the use of NOT.
> > I think the doc should mention that it's _two_ NOTs that get inserted,
> > as this is even more growth of code size than the OR variant. Is
> > there a performance reason for having this extra, more expensive (in
> > terms of code size) variant? Or is it rather because of the OR
> > variant clobbering EFLAGS (which ought to be called out in the doc)?
> > In which case - was it considered to use e.g. SHL with an immediate
> > of zero, thus having smaller code _and_ untouched EFLAGS (but of
> > course requiring at least an 80186, albeit the addressing mode
> > used requires a 386 anyway, which you don't seem to be checking
> > anywhere)?
>
> This is a very good suggestion.  I will talk to our people.  In meantime,
> I'd like to keep it as is since this version has been tested extensively.
> We can change it to SHL 0 later.
>
> > Also I guess the last comment above would better move two lines up?
>
> Changed.
>
> > > @@ -12668,6 +12986,41 @@ md_parse_option (int c, const char *arg)
> > >          as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
> > >        break;
> > >
> > > +    case OPTION_MLFENCE_AFTER_LOAD:
> > > +      if (strcasecmp (arg, "yes") == 0)
> > > +     lfence_after_load = 1;
> > > +      else if (strcasecmp (arg, "no") == 0)
> > > +     lfence_after_load = 0;
> > > +      else
> > > +        as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
> > > +      break;
> > > +
> > > +    case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
> > > +      if (strcasecmp (arg, "all") == 0)
> > > +     lfence_before_indirect_branch = lfence_branch_all;
> >
> > I wonder whether this shouldn't also enable a safe lfence_before_ret
> > mode (i.e. not the OR one), for RET also being an indirect branch. Of
> > course care would need to be taken to avoid clobbering an already set
> > lfence_before_ret mode.
>
> Hongtao, can you look into it?
>
> > > @@ -13254,6 +13616,10 @@ i386_cons_align (int ignore ATTRIBUTE_UNUSED)
> > >        last_insn.kind = last_insn_directive;
> > >        last_insn.name = "constant directive";
> > >        last_insn.file = as_where (&last_insn.line);
> > > +      if (lfence_before_ret != lfence_before_ret_none)
> > > +     as_warn (_("constant directive skips -mlfence-before-ret"));
> > > +      if (lfence_before_indirect_branch != lfence_branch_none)
> > > +     as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
> >
> > Could these be folded into a single warning, to avoid getting overly
> > verbose?
> >
>
> Changed.
>
> This is the patch I am checking in.
>
> --
> H.J.

For other parts, i'm working on fix/change them.

-- 
BR,
Hongtao



More information about the Binutils mailing list